Exa

ExaToolSpec #

基类: BaseToolSpec

Exa工具规范。

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py

class ExaToolSpec(BaseToolSpec):
    """Exa tool spec."""

    spec_functions = [
        "search",
        "retrieve_documents",
        "search_and_retrieve_documents",
        "search_and_retrieve_highlights",
        "find_similar",
        "current_date",
    ]

    def __init__(
        self,
        api_key: str,
        verbose: bool = True,
        max_characters: int = 2000,
    ) -> None:
        """Initialize with parameters."""
        from exa_py import Exa

        self.client = Exa(api_key=api_key, user_agent="llama-index")
        self._verbose = verbose
        # max characters for the text field in the search_and_contents function
        self._max_characters = max_characters

    def search(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
        use_autoprompt: bool = True,
        type: str = "magic",
    ) -> List:
        """
        Exa allows you to use a natural language query to search the internet.

        Args:
            query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
            num_results (Optional[int]): Number of results to return. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specific sites.
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
            end_published_date (Optional[str]): End date string

        """
        response = self.client.search(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=use_autoprompt,
            type=type,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [
            {"title": result.title, "url": result.url, "id": result.id}
            for result in response.results
        ]

    def retrieve_documents(self, ids: List[str]) -> List[Document]:
        """
        Retrieve a list of document texts returned by `exa_search`, using the ID field.

        Args:
            ids (List(str)): the ids of the documents to retrieve

        """
        response = self.client.get_contents(ids)
        return [Document(text=result.text) for result in response.results]

    def find_similar(
        self,
        url: str,
        num_results: Optional[int] = 3,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
    ) -> List:
        """
        Retrieve a list of similar documents to a given url.

        Args:
            url (str): The web page to find similar results of
            num_results (Optional[int]): Number of results to return. Default 3.
            start_published_date (Optional[str]): A date string like "2020-06-15"
            end_published_date (Optional[str]): End date string

        """
        response = self.client.find_similar(
            url,
            num_results=num_results,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
        )
        return [
            {"title": result.title, "url": result.url, "id": result.id}
            for result in response.results
        ]

    def search_and_retrieve_documents(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
        use_autoprompt: bool = True,
        type: str = "magic",
    ) -> List[Document]:
        """
        Combines the functionality of `search` and `retrieve_documents`.

        Args:
            query (str): the natural language query
            num_results (Optional[int]): Number of results. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15".
            end_published_date (Optional[str]): End date string

        """
        response = self.client.search_and_contents(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=use_autoprompt,
            text={"max_characters": self._max_characters},
            type=type,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [Document(text=document.text) for document in response.results]

    def search_and_retrieve_highlights(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
        use_autoprompt: bool = True,
        type: str = "magic",
    ) -> List[Document]:
        """
        Searches and retrieves highlights (intelligent snippets from the document).

        Args:
            query (str): the natural language query
            num_results (Optional[int]): Number of results. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15".
            end_published_date (Optional[str]): End date string

        """
        response = self.client.search_and_contents(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=use_autoprompt,
            highlights=True,
            type=type,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [Document(text=document.highlights[0]) for document in response.results]

    def current_date(self):
        """
        A function to return todays date.

        Call this before any other functions that take timestamps as an argument
        """
        return datetime.date.today()

搜索 #

search(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: bool = True, type: str = 'magic') -> List

Exa 允许您使用自然语言查询来搜索互联网。

参数:

名称	类型	描述	默认值
`query`	`str`	一个自然语言查询，表述为链接提供内容的答案，例如："这是关于太空的最新消息："	required
`num_results`	`Optional[int]`	返回的结果数量。默认为10。	`10`
`include_domains`	`Optional[List(str)]`	一个顶级域名列表，如["wsj.com"]，用于将搜索限制在特定网站。	`None`
`exclude_domains`	`Optional[List(str)]`	要排除的顶级域名。	`None`
`start_published_date`	`Optional[str]`	一个日期字符串，例如"2020-06-15"。从`current_date`获取日期	`None`
`end_published_date`	`Optional[str]`	结束日期字符串	`None`

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py

def search(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
    use_autoprompt: bool = True,
    type: str = "magic",
) -> List:
    """
    Exa allows you to use a natural language query to search the internet.

    Args:
        query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
        num_results (Optional[int]): Number of results to return. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specific sites.
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
        end_published_date (Optional[str]): End date string

    """
    response = self.client.search(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=use_autoprompt,
        type=type,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [
        {"title": result.title, "url": result.url, "id": result.id}
        for result in response.results
    ]

retrieve_documents #

retrieve_documents(ids: List[str]) -> List[Document]

通过ID字段检索由exa_search返回的文档文本列表。

参数:

名称	类型	描述	默认值
`ids`	`List(str`	要检索的文档ID	required

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py

def retrieve_documents(self, ids: List[str]) -> List[Document]:
    """
    Retrieve a list of document texts returned by `exa_search`, using the ID field.

    Args:
        ids (List(str)): the ids of the documents to retrieve

    """
    response = self.client.get_contents(ids)
    return [Document(text=result.text) for result in response.results]

find_similar #

find_similar(url: str, num_results: Optional[int] = 3, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None) -> List

检索与给定URL相似的文档列表。

参数:

名称	类型	描述	默认值
`url`	`str`	用于查找相似结果的网页	required
`num_results`	`Optional[int]`	返回的结果数量。默认为3。	`3`
`start_published_date`	`Optional[str]`	类似"2020-06-15"的日期字符串	`None`
`end_published_date`	`Optional[str]`	结束日期字符串	`None`

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py

def find_similar(
    self,
    url: str,
    num_results: Optional[int] = 3,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
) -> List:
    """
    Retrieve a list of similar documents to a given url.

    Args:
        url (str): The web page to find similar results of
        num_results (Optional[int]): Number of results to return. Default 3.
        start_published_date (Optional[str]): A date string like "2020-06-15"
        end_published_date (Optional[str]): End date string

    """
    response = self.client.find_similar(
        url,
        num_results=num_results,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
    )
    return [
        {"title": result.title, "url": result.url, "id": result.id}
        for result in response.results
    ]

搜索并检索文档 #

search_and_retrieve_documents(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: bool = True, type: str = 'magic') -> List[Document]

结合了search和retrieve_documents的功能。

参数:

名称	类型	描述	默认值
`query`	`str`	自然语言查询	required
`num_results`	`Optional[int]`	返回结果的数量。默认为10。	`10`
`include_domains`	`Optional[List(str)]`	要搜索的顶级域名列表，例如["wsj.com"]	`None`
`exclude_domains`	`Optional[List(str)]`	要排除的顶级域名。	`None`
`start_published_date`	`Optional[str]`	类似"2020-06-15"的日期字符串。	`None`
`end_published_date`	`Optional[str]`	结束日期字符串	`None`

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py

def search_and_retrieve_documents(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
    use_autoprompt: bool = True,
    type: str = "magic",
) -> List[Document]:
    """
    Combines the functionality of `search` and `retrieve_documents`.

    Args:
        query (str): the natural language query
        num_results (Optional[int]): Number of results. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15".
        end_published_date (Optional[str]): End date string

    """
    response = self.client.search_and_contents(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=use_autoprompt,
        text={"max_characters": self._max_characters},
        type=type,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [Document(text=document.text) for document in response.results]

搜索并获取高亮内容 #

search_and_retrieve_highlights(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: bool = True, type: str = 'magic') -> List[Document]

搜索并检索高亮内容（文档中的智能片段）。

参数:

名称	类型	描述	默认值
`query`	`str`	自然语言查询	required
`num_results`	`Optional[int]`	返回结果的数量。默认为10。	`10`
`include_domains`	`Optional[List(str)]`	要搜索的顶级域名列表，例如["wsj.com"]	`None`
`exclude_domains`	`Optional[List(str)]`	要排除的顶级域名。	`None`
`start_published_date`	`Optional[str]`	类似"2020-06-15"的日期字符串。	`None`
`end_published_date`	`Optional[str]`	结束日期字符串	`None`

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py

def search_and_retrieve_highlights(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
    use_autoprompt: bool = True,
    type: str = "magic",
) -> List[Document]:
    """
    Searches and retrieves highlights (intelligent snippets from the document).

    Args:
        query (str): the natural language query
        num_results (Optional[int]): Number of results. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15".
        end_published_date (Optional[str]): End date string

    """
    response = self.client.search_and_contents(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=use_autoprompt,
        highlights=True,
        type=type,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [Document(text=document.highlights[0]) for document in response.results]

current_date #

current_date()

一个返回今天日期的函数。

在调用任何其他以时间戳作为参数的函数之前调用此函数

Source code in llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py

def current_date(self):
    """
    A function to return todays date.

    Call this before any other functions that take timestamps as an argument
    """
    return datetime.date.today()