Source code for langchain_community.utilities.tavily_search

"""调用Tavily搜索API的工具。

要设置这个工具,请按照以下说明进行操作:
"""
import json
from typing import Dict, List, Optional

import aiohttp
import requests
from langchain_core.pydantic_v1 import BaseModel, Extra, SecretStr, root_validator
from langchain_core.utils import get_from_dict_or_env

TAVILY_API_URL = "https://api.tavily.com"


[docs]class TavilySearchAPIWrapper(BaseModel): """Tavily搜索API的封装器。""" tavily_api_key: SecretStr class Config: """此pydantic对象的配置。""" extra = Extra.forbid @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: """确保环境中存在API密钥和端点。""" tavily_api_key = get_from_dict_or_env( values, "tavily_api_key", "TAVILY_API_KEY" ) values["tavily_api_key"] = tavily_api_key return values
[docs] def raw_results( self, query: str, max_results: Optional[int] = 5, search_depth: Optional[str] = "advanced", include_domains: Optional[List[str]] = [], exclude_domains: Optional[List[str]] = [], include_answer: Optional[bool] = False, include_raw_content: Optional[bool] = False, include_images: Optional[bool] = False, ) -> Dict: params = { "api_key": self.tavily_api_key.get_secret_value(), "query": query, "max_results": max_results, "search_depth": search_depth, "include_domains": include_domains, "exclude_domains": exclude_domains, "include_answer": include_answer, "include_raw_content": include_raw_content, "include_images": include_images, } response = requests.post( # type: ignore f"{TAVILY_API_URL}/search", json=params, ) response.raise_for_status() return response.json()
[docs] def results( self, query: str, max_results: Optional[int] = 5, search_depth: Optional[str] = "advanced", include_domains: Optional[List[str]] = [], exclude_domains: Optional[List[str]] = [], include_answer: Optional[bool] = False, include_raw_content: Optional[bool] = False, include_images: Optional[bool] = False, ) -> List[Dict]: """通过Tavily Search运行查询并返回元数据。 参数: query: 要搜索的查询。 max_results: 要返回的最大结果数。 search_depth: 搜索的深度。可以是"basic"或"advanced"。 include_domains: 要包含在搜索中的域名列表。 exclude_domains: 要从搜索中排除的域名列表。 include_answer: 是否在结果中包含答案。 include_raw_content: 是否在结果中包含原始内容。 include_images: 是否在结果中包含图像。 返回: query: 搜索的查询。 follow_up_questions: 后续问题列表。 response_time: 查询的响应时间。 answer: 查询的答案。 images: 图像列表。 results: 包含结果的字典列表: title: 结果的标题。 url: 结果的URL。 content: 结果的内容。 score: 结果的分数。 raw_content: 结果的原始内容。 """ # noqa: E501 raw_search_results = self.raw_results( query, max_results=max_results, search_depth=search_depth, include_domains=include_domains, exclude_domains=exclude_domains, include_answer=include_answer, include_raw_content=include_raw_content, include_images=include_images, ) return self.clean_results(raw_search_results["results"])
[docs] async def raw_results_async( self, query: str, max_results: Optional[int] = 5, search_depth: Optional[str] = "advanced", include_domains: Optional[List[str]] = [], exclude_domains: Optional[List[str]] = [], include_answer: Optional[bool] = False, include_raw_content: Optional[bool] = False, include_images: Optional[bool] = False, ) -> Dict: """从Tavily搜索API异步获取结果。""" # Function to perform the API call async def fetch() -> str: params = { "api_key": self.tavily_api_key.get_secret_value(), "query": query, "max_results": max_results, "search_depth": search_depth, "include_domains": include_domains, "exclude_domains": exclude_domains, "include_answer": include_answer, "include_raw_content": include_raw_content, "include_images": include_images, } async with aiohttp.ClientSession() as session: async with session.post(f"{TAVILY_API_URL}/search", json=params) as res: if res.status == 200: data = await res.text() return data else: raise Exception(f"Error {res.status}: {res.reason}") results_json_str = await fetch() return json.loads(results_json_str)
[docs] async def results_async( self, query: str, max_results: Optional[int] = 5, search_depth: Optional[str] = "advanced", include_domains: Optional[List[str]] = [], exclude_domains: Optional[List[str]] = [], include_answer: Optional[bool] = False, include_raw_content: Optional[bool] = False, include_images: Optional[bool] = False, ) -> List[Dict]: results_json = await self.raw_results_async( query=query, max_results=max_results, search_depth=search_depth, include_domains=include_domains, exclude_domains=exclude_domains, include_answer=include_answer, include_raw_content=include_raw_content, include_images=include_images, ) return self.clean_results(results_json["results"])
[docs] def clean_results(self, results: List[Dict]) -> List[Dict]: """清理Tavily搜索API的结果。""" clean_results = [] for result in results: clean_results.append( { "url": result["url"], "content": result["content"], } ) return clean_results