"""Util that calls you.com Search API.In order to set this up, follow instructions at:https://documentation.you.com/quickstart"""importwarningsfromtypingimportAny,Dict,List,Literal,Optionalimportaiohttpimportrequestsfromlangchain_core.documentsimportDocumentfromlangchain_core.utilsimportget_from_dict_or_envfrompydanticimportBaseModel,Field,model_validatorfromtyping_extensionsimportSelfYOU_API_URL="https://api.ydc-index.io"
[docs]classYouHitMetadata(BaseModel):"""Metadata on a single hit from you.com"""title:str=Field(description="The title of the result")url:str=Field(description="The url of the result")thumbnail_url:str=Field(description="Thumbnail associated with the result")description:str=Field(description="Details about the result")
[docs]classYouHit(YouHitMetadata):"""A single hit from you.com, which may contain multiple snippets"""snippets:List[str]=Field(description="One or snippets of text")
[docs]classYouAPIOutput(BaseModel):"""Output from you.com API."""hits:List[YouHit]=Field(description="A list of dictionaries containing the results")
[docs]classYouDocument(BaseModel):"""Output of parsing one snippet."""page_content:str=Field(description="One snippet of text")metadata:YouHitMetadata
[docs]classYouSearchAPIWrapper(BaseModel):"""Wrapper for you.com Search and News API. To connect to the You.com api requires an API key which you can get at https://api.you.com. You can check out the docs at https://documentation.you.com/api-reference/. You need to set the environment variable `YDC_API_KEY` for retriever to operate. Attributes ---------- ydc_api_key: str, optional you.com api key, if YDC_API_KEY is not set in the environment endpoint_type: str, optional you.com endpoints: search, news, rag; `web` and `snippet` alias `search` `rag` returns `{'message': 'Forbidden'}` @todo `news` endpoint num_web_results: int, optional The max number of web results to return, must be under 20. This is mapped to the `count` query parameter for the News API. safesearch: str, optional Safesearch settings, one of off, moderate, strict, defaults to moderate country: str, optional Country code, ex: 'US' for United States, see api docs for list search_lang: str, optional (News API) Language codes, ex: 'en' for English, see api docs for list ui_lang: str, optional (News API) User interface language for the response, ex: 'en' for English, see api docs for list spellcheck: bool, optional (News API) Whether to spell check query or not, defaults to True k: int, optional max number of Documents to return using `results()` n_hits: int, optional, deprecated Alias for num_web_results n_snippets_per_hit: int, optional limit the number of snippets returned per hit """ydc_api_key:Optional[str]=None# @todo deprecate `snippet`, not part of APIendpoint_type:Literal["search","news","rag","snippet"]="search"# Common fields between Search and News APInum_web_results:Optional[int]=Nonesafesearch:Optional[Literal["off","moderate","strict"]]=Nonecountry:Optional[str]=None# News API specific fieldssearch_lang:Optional[str]=Noneui_lang:Optional[str]=Nonespellcheck:Optional[bool]=Nonek:Optional[int]=Nonen_snippets_per_hit:Optional[int]=None# should deprecate n_hitsn_hits:Optional[int]=None@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validate that api key exists in environment."""ydc_api_key=get_from_dict_or_env(values,"ydc_api_key","YDC_API_KEY")values["ydc_api_key"]=ydc_api_keyreturnvalues@model_validator(mode="after")defwarn_if_set_fields_have_no_effect(self)->Self:ifself.endpoint_type!="news":news_api_fields=("search_lang","ui_lang","spellcheck")forfieldinnews_api_fields:ifgetattr(self,field):warnings.warn((f"News API-specific field '{field}' is set but "f'`endpoint_type="{self.endpoint_type}"`. '"This will have no effect."),UserWarning,)ifself.endpoint_typenotin("search","snippet"):ifself.n_snippets_per_hit:warnings.warn(("Field 'n_snippets_per_hit' only has effect on "'`endpoint_type="search"`.'),UserWarning,)returnself@model_validator(mode="after")defwarn_if_deprecated_endpoints_are_used(self)->Self:ifself.endpoint_type=="snippets":warnings.warn((f'`endpoint_type="{self.endpoint_type}"` is deprecated. ''Use `endpoint_type="search"` instead.'),DeprecationWarning,)returnselfdef_generate_params(self,query:str,**kwargs:Any)->Dict:""" Parse parameters required for different You.com APIs. Args: query: The query to search for. """params={"safesearch":self.safesearch,"country":self.country,**kwargs,}# Add endpoint-specific paramsifself.endpoint_typein("search","snippet"):params.update(query=query,num_web_results=self.num_web_results,)elifself.endpoint_type=="news":params.update(q=query,count=self.num_web_results,search_lang=self.search_lang,ui_lang=self.ui_lang,spellcheck=self.spellcheck,)params={k:vfork,vinparams.items()ifvisnotNone}returnparamsdef_parse_results(self,raw_search_results:Dict)->List[Document]:""" Extracts snippets from each hit and puts them in a Document Parameters: raw_search_results: A dict containing list of hits Returns: List[YouDocument]: A dictionary of parsed results """# return news resultsifself.endpoint_type=="news":news_results=raw_search_results["news"]["results"]ifself.kisnotNone:news_results=news_results[:self.k]return[Document(page_content=result["description"],metadata=result)forresultinnews_results]docs=[]forhitinraw_search_results["hits"]:n_snippets_per_hit=self.n_snippets_per_hitorlen(hit.get("snippets"))forsnippetinhit.get("snippets")[:n_snippets_per_hit]:docs.append(Document(page_content=snippet,metadata={"url":hit.get("url"),"thumbnail_url":hit.get("thumbnail_url"),"title":hit.get("title"),"description":hit.get("description"),},))ifself.kisnotNoneandlen(docs)>=self.k:returndocsreturndocs
[docs]defraw_results(self,query:str,**kwargs:Any,)->Dict:"""Run query through you.com Search and return hits. Args: query: The query to search for. Returns: YouAPIOutput """headers={"X-API-Key":self.ydc_api_keyor""}params=self._generate_params(query,**kwargs)# @todo deprecate `snippet`, not part of APIifself.endpoint_type=="snippet":self.endpoint_type="search"response=requests.get(# type: ignoref"{YOU_API_URL}/{self.endpoint_type}",params=params,headers=headers,)response.raise_for_status()returnresponse.json()
[docs]defresults(self,query:str,**kwargs:Any,)->List[Document]:"""Run query through you.com Search and parses results into Documents."""raw_search_results=self.raw_results(query,**{key:valueforkey,valueinkwargs.items()ifvalueisnotNone},)returnself._parse_results(raw_search_results)
[docs]asyncdefraw_results_async(self,query:str,**kwargs:Any,)->Dict:"""Get results from the you.com Search API asynchronously."""headers={"X-API-Key":self.ydc_api_keyor""}params=self._generate_params(query,**kwargs)# @todo deprecate `snippet`, not part of APIifself.endpoint_type=="snippet":self.endpoint_type="search"asyncwithaiohttp.ClientSession()assession:asyncwithsession.get(url=f"{YOU_API_URL}/{self.endpoint_type}",params=params,headers=headers,)asres:ifres.status==200:results=awaitres.json()returnresultselse:raiseException(f"Error {res.status}: {res.reason}")