Source code for langchain_community.utilities.metaphor_search

"""调用隐喻搜索API的工具。

要设置这个工具,请按照以下说明操作:
"""
import json
from typing import Dict, List, Optional

import aiohttp
import requests
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
from langchain_core.utils import get_from_dict_or_env

METAPHOR_API_URL = "https://api.metaphor.systems"


[docs]class MetaphorSearchAPIWrapper(BaseModel): """Metaphor Search API的包装器。""" metaphor_api_key: str k: int = 10 class Config: """此pydantic对象的配置。""" extra = Extra.forbid def _metaphor_search_results( self, query: str, num_results: int, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None, end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None, ) -> List[dict]: headers = {"X-Api-Key": self.metaphor_api_key} params = { "numResults": num_results, "query": query, "includeDomains": include_domains, "excludeDomains": exclude_domains, "startCrawlDate": start_crawl_date, "endCrawlDate": end_crawl_date, "startPublishedDate": start_published_date, "endPublishedDate": end_published_date, "useAutoprompt": use_autoprompt, } response = requests.post( # type: ignore f"{METAPHOR_API_URL}/search", headers=headers, json=params, ) response.raise_for_status() search_results = response.json() return search_results["results"] @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: """确保环境中存在API密钥和端点。""" metaphor_api_key = get_from_dict_or_env( values, "metaphor_api_key", "METAPHOR_API_KEY" ) values["metaphor_api_key"] = metaphor_api_key return values
[docs] def results( self, query: str, num_results: int, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None, end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None, ) -> List[Dict]: """通过隐喻搜索运行查询并返回元数据。 参数: query: 要搜索的查询。 num_results: 要返回的结果数量。 include_domains: 要在搜索中包含的域列表。只能定义include_domains和exclude_domains中的一个。 exclude_domains: 要从搜索中排除的域列表。只能定义include_domains和exclude_domains中的一个。 start_crawl_date: 如果指定,只返回在start_crawl_date之后我们抓取的页面。 end_crawl_date: 如果指定,只返回在end_crawl_date之前我们抓取的页面。 start_published_date: 如果指定,只返回在start_published_date之后发布的页面。 end_published_date: 如果指定,只返回在end_published_date之前发布的页面。 use_autoprompt: 如果为true,则将您的查询转换为更适合隐喻的查询。会增加延迟。 返回: 一个包含以下键的字典列表: title - 页面的标题 url - 网址 author - 内容的作者(如果适用)。否则为None。 published_date - 估计的发布日期,以YYYY-MM-DD格式表示。否则为None。 """ # noqa: E501 raw_search_results = self._metaphor_search_results( query, num_results=num_results, include_domains=include_domains, exclude_domains=exclude_domains, start_crawl_date=start_crawl_date, end_crawl_date=end_crawl_date, start_published_date=start_published_date, end_published_date=end_published_date, use_autoprompt=use_autoprompt, ) return self._clean_results(raw_search_results)
[docs] async def results_async( self, query: str, num_results: int, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None, end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None, ) -> List[Dict]: """从隐喻搜索API异步获取结果。""" # Function to perform the API call async def fetch() -> str: headers = {"X-Api-Key": self.metaphor_api_key} params = { "numResults": num_results, "query": query, "includeDomains": include_domains, "excludeDomains": exclude_domains, "startCrawlDate": start_crawl_date, "endCrawlDate": end_crawl_date, "startPublishedDate": start_published_date, "endPublishedDate": end_published_date, "useAutoprompt": use_autoprompt, } async with aiohttp.ClientSession() as session: async with session.post( f"{METAPHOR_API_URL}/search", json=params, headers=headers ) as res: if res.status == 200: data = await res.text() return data else: raise Exception(f"Error {res.status}: {res.reason}") results_json_str = await fetch() results_json = json.loads(results_json_str) return self._clean_results(results_json["results"])
def _clean_results(self, raw_search_results: List[Dict]) -> List[Dict]: cleaned_results = [] for result in raw_search_results: cleaned_results.append( { "title": result.get("title", "Unknown Title"), "url": result.get("url", "Unknown URL"), "author": result.get("author", "Unknown Author"), "published_date": result.get("publishedDate", "Unknown Date"), } ) return cleaned_results