Source code for langchain_community.embeddings.aleph_alpha

from typing import Any, Dict, List, Optional

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain_core.utils import get_from_dict_or_env


[docs]class AlephAlphaAsymmetricSemanticEmbedding(BaseModel, Embeddings):
    """Aleph Alpha的非对称语义嵌入。

    AA为您提供了一个端点，用于嵌入文档和查询。
    这些模型经过优化，使得文档和查询的嵌入尽可能相似。
    要了解更多，请访问：https://docs.aleph-alpha.com/docs/tasks/semantic_embed/

    示例:
        .. code-block:: python
            from aleph_alpha import AlephAlphaAsymmetricSemanticEmbedding

            embeddings = AlephAlphaAsymmetricSemanticEmbedding(
                normalize=True, compress_to_size=128
            )

            document = "This is a content of the document"
            query = "What is the content of the document?"

            doc_result = embeddings.embed_documents([document])
            query_result = embeddings.embed_query(query)"""

    client: Any  #: :meta private:

    # Embedding params
    model: str = "luminous-base"
    """要使用的模型名称。"""
    compress_to_size: Optional[int] = None
    """返回的嵌入向量应该是原始的5120维向量，还是应该压缩为128维。"""
    normalize: bool = False
    """返回的嵌入是否应该被归一化"""
    contextual_control_threshold: Optional[int] = None
    """注意控制参数仅适用于在请求中明确设置的令牌。"""
    control_log_additive: bool = True
    """通过将log(control_factor)添加到注意力分数中，对提示项目应用控件。"""

    # Client params
    aleph_alpha_api_key: Optional[str] = None
    """Aleph Alpha API 的 API 密钥。"""
    host: str = "https://api.aleph-alpha.com"
    """API主机的主机名。
    默认值为"https://api.aleph-alpha.com"""
    hosting: Optional[str] = None
    """确定请求可以在哪些数据中心处理。
您可以将参数设置为"aleph-alpha"，也可以省略（默认为None）。
不设置此值，或将其设置为None，可以在我们自己的数据中心和其他提供商托管的服务器上最大限度地灵活处理您的请求。
选择此选项以获得最大的可用性。
将其设置为"aleph-alpha"允许我们仅在自己的数据中心处理请求。
选择此选项以获得最大的数据隐私。"""
    request_timeout_seconds: int = 305
    """HTTP请求中`requests`库API调用的客户端超时时间设置。
服务器将在300秒后关闭所有请求，并返回内部服务器错误。"""
    total_retries: int = 8
    """在请求失败时重试的次数，具有可重试状态码。如果最后一次重试失败，则会引发相应的异常。请注意，在重试之间会应用指数退避，从第一次重试后的0.5秒开始，并且每次重试都会加倍。因此，默认设置为8次重试，总等待时间为63.5秒，其中包括重试之间的等待时间。"""
    nice: bool = False
    """将此设置为True，将向API表明您打算对其他用户友好，通过将您的请求优先级降低到并发请求之下。"""

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """验证环境中是否存在API密钥和Python包。"""
        aleph_alpha_api_key = get_from_dict_or_env(
            values, "aleph_alpha_api_key", "ALEPH_ALPHA_API_KEY"
        )
        try:
            from aleph_alpha_client import Client

            values["client"] = Client(
                token=aleph_alpha_api_key,
                host=values["host"],
                hosting=values["hosting"],
                request_timeout_seconds=values["request_timeout_seconds"],
                total_retries=values["total_retries"],
                nice=values["nice"],
            )
        except ImportError:
            raise ImportError(
                "Could not import aleph_alpha_client python package. "
                "Please install it with `pip install aleph_alpha_client`."
            )

        return values

[docs]    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """调用Aleph Alpha的非对称文档端点。

参数：
    texts：要嵌入的文本列表。

返回：
    嵌入的列表，每个文本对应一个嵌入。
"""
        try:
            from aleph_alpha_client import (
                Prompt,
                SemanticEmbeddingRequest,
                SemanticRepresentation,
            )
        except ImportError:
            raise ImportError(
                "Could not import aleph_alpha_client python package. "
                "Please install it with `pip install aleph_alpha_client`."
            )
        document_embeddings = []

        for text in texts:
            document_params = {
                "prompt": Prompt.from_text(text),
                "representation": SemanticRepresentation.Document,
                "compress_to_size": self.compress_to_size,
                "normalize": self.normalize,
                "contextual_control_threshold": self.contextual_control_threshold,
                "control_log_additive": self.control_log_additive,
            }

            document_request = SemanticEmbeddingRequest(**document_params)
            document_response = self.client.semantic_embed(
                request=document_request, model=self.model
            )

            document_embeddings.append(document_response.embedding)

        return document_embeddings

[docs]    def embed_query(self, text: str) -> List[float]:
        """调用Aleph Alpha的非对称查询嵌入端点
参数：
    text：要嵌入的文本。

返回：
    文本的嵌入。
"""
        try:
            from aleph_alpha_client import (
                Prompt,
                SemanticEmbeddingRequest,
                SemanticRepresentation,
            )
        except ImportError:
            raise ImportError(
                "Could not import aleph_alpha_client python package. "
                "Please install it with `pip install aleph_alpha_client`."
            )
        symmetric_params = {
            "prompt": Prompt.from_text(text),
            "representation": SemanticRepresentation.Query,
            "compress_to_size": self.compress_to_size,
            "normalize": self.normalize,
            "contextual_control_threshold": self.contextual_control_threshold,
            "control_log_additive": self.control_log_additive,
        }

        symmetric_request = SemanticEmbeddingRequest(**symmetric_params)
        symmetric_response = self.client.semantic_embed(
            request=symmetric_request, model=self.model
        )

        return symmetric_response.embedding


[docs]class AlephAlphaSymmetricSemanticEmbedding(AlephAlphaAsymmetricSemanticEmbedding):
    """Aleph Alpha的语义嵌入的对称版本。

主要区别在于，这里文档和查询都是使用SemanticRepresentation.Symmetric进行嵌入。
示例：
.. code-block:: python

    from aleph_alpha import AlephAlphaSymmetricSemanticEmbedding

    embeddings = AlephAlphaAsymmetricSemanticEmbedding(
        normalize=True, compress_to_size=128
    )
    text = "This is a test text"

    doc_result = embeddings.embed_documents([text])
    query_result = embeddings.embed_query(text)"""

    def _embed(self, text: str) -> List[float]:
        try:
            from aleph_alpha_client import (
                Prompt,
                SemanticEmbeddingRequest,
                SemanticRepresentation,
            )
        except ImportError:
            raise ImportError(
                "Could not import aleph_alpha_client python package. "
                "Please install it with `pip install aleph_alpha_client`."
            )
        query_params = {
            "prompt": Prompt.from_text(text),
            "representation": SemanticRepresentation.Symmetric,
            "compress_to_size": self.compress_to_size,
            "normalize": self.normalize,
            "contextual_control_threshold": self.contextual_control_threshold,
            "control_log_additive": self.control_log_additive,
        }

        query_request = SemanticEmbeddingRequest(**query_params)
        query_response = self.client.semantic_embed(
            request=query_request, model=self.model
        )

        return query_response.embedding

[docs]    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """调用Aleph Alpha的文档端点。

参数：
    texts：要嵌入的文本列表。

返回：
    每个文本的嵌入列表。
"""
        document_embeddings = []

        for text in texts:
            document_embeddings.append(self._embed(text))
        return document_embeddings

[docs]    def embed_query(self, text: str) -> List[float]:
        """调用Aleph Alpha的非对称查询嵌入端点
参数：
    text：要嵌入的文本。

返回：
    文本的嵌入。
"""
        return self._embed(text)