"""调用Tavily搜索API的工具。
要设置这个工具,请按照以下说明进行操作:
"""
import json
from typing import Dict, List, Optional
import aiohttp
import requests
from langchain_core.pydantic_v1 import BaseModel, Extra, SecretStr, root_validator
from langchain_core.utils import get_from_dict_or_env
TAVILY_API_URL = "https://api.tavily.com"
[docs]class TavilySearchAPIWrapper(BaseModel):
"""Tavily搜索API的封装器。"""
tavily_api_key: SecretStr
class Config:
"""此pydantic对象的配置。"""
extra = Extra.forbid
@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""确保环境中存在API密钥和端点。"""
tavily_api_key = get_from_dict_or_env(
values, "tavily_api_key", "TAVILY_API_KEY"
)
values["tavily_api_key"] = tavily_api_key
return values
[docs] def raw_results(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> Dict:
params = {
"api_key": self.tavily_api_key.get_secret_value(),
"query": query,
"max_results": max_results,
"search_depth": search_depth,
"include_domains": include_domains,
"exclude_domains": exclude_domains,
"include_answer": include_answer,
"include_raw_content": include_raw_content,
"include_images": include_images,
}
response = requests.post(
# type: ignore
f"{TAVILY_API_URL}/search",
json=params,
)
response.raise_for_status()
return response.json()
[docs] def results(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> List[Dict]:
"""通过Tavily Search运行查询并返回元数据。
参数:
query: 要搜索的查询。
max_results: 要返回的最大结果数。
search_depth: 搜索的深度。可以是"basic"或"advanced"。
include_domains: 要包含在搜索中的域名列表。
exclude_domains: 要从搜索中排除的域名列表。
include_answer: 是否在结果中包含答案。
include_raw_content: 是否在结果中包含原始内容。
include_images: 是否在结果中包含图像。
返回:
query: 搜索的查询。
follow_up_questions: 后续问题列表。
response_time: 查询的响应时间。
answer: 查询的答案。
images: 图像列表。
results: 包含结果的字典列表:
title: 结果的标题。
url: 结果的URL。
content: 结果的内容。
score: 结果的分数。
raw_content: 结果的原始内容。
""" # noqa: E501
raw_search_results = self.raw_results(
query,
max_results=max_results,
search_depth=search_depth,
include_domains=include_domains,
exclude_domains=exclude_domains,
include_answer=include_answer,
include_raw_content=include_raw_content,
include_images=include_images,
)
return self.clean_results(raw_search_results["results"])
[docs] async def raw_results_async(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> Dict:
"""从Tavily搜索API异步获取结果。"""
# Function to perform the API call
async def fetch() -> str:
params = {
"api_key": self.tavily_api_key.get_secret_value(),
"query": query,
"max_results": max_results,
"search_depth": search_depth,
"include_domains": include_domains,
"exclude_domains": exclude_domains,
"include_answer": include_answer,
"include_raw_content": include_raw_content,
"include_images": include_images,
}
async with aiohttp.ClientSession() as session:
async with session.post(f"{TAVILY_API_URL}/search", json=params) as res:
if res.status == 200:
data = await res.text()
return data
else:
raise Exception(f"Error {res.status}: {res.reason}")
results_json_str = await fetch()
return json.loads(results_json_str)
[docs] async def results_async(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> List[Dict]:
results_json = await self.raw_results_async(
query=query,
max_results=max_results,
search_depth=search_depth,
include_domains=include_domains,
exclude_domains=exclude_domains,
include_answer=include_answer,
include_raw_content=include_raw_content,
include_images=include_images,
)
return self.clean_results(results_json["results"])
[docs] def clean_results(self, results: List[Dict]) -> List[Dict]:
"""清理Tavily搜索API的结果。"""
clean_results = []
for result in results:
clean_results.append(
{
"url": result["url"],
"content": result["content"],
}
)
return clean_results