Source code for langchain_community.embeddings.elasticsearch

from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional

from langchain_core._api import deprecated
from langchain_core.utils import get_from_env

if TYPE_CHECKING:
    from elasticsearch import Elasticsearch
    from elasticsearch.client import MlClient

from langchain_core.embeddings import Embeddings


[docs]@deprecated(
    "0.1.11", alternative="Use class in langchain-elasticsearch package", pending=True
)
class ElasticsearchEmbeddings(Embeddings):
    """Elasticsearch嵌入模型。

    该类提供了一个接口，用于使用部署在Elasticsearch集群中的模型生成嵌入。它需要一个Elasticsearch连接对象和集群中部署的模型的model_id。

    在Elasticsearch中，您需要加载和部署一个嵌入模型。
    - https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-trained-model.html
    - https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-deploy-models.html"""  # noqa: E501

[docs]    def __init__(
        self,
        client: MlClient,
        model_id: str,
        *,
        input_field: str = "text_field",
    ):
        """初始化ElasticsearchEmbeddings实例。

参数：
    client（MlClient）：Elasticsearch ML客户端对象。
    model_id（str）：Elasticsearch集群中部署的模型的model_id。
    input_field（str）：文档中输入文本字段的键的名称。默认为'text_field'。
"""
        self.client = client
        self.model_id = model_id
        self.input_field = input_field

[docs]    @classmethod
    def from_credentials(
        cls,
        model_id: str,
        *,
        es_cloud_id: Optional[str] = None,
        es_user: Optional[str] = None,
        es_password: Optional[str] = None,
        input_field: str = "text_field",
    ) -> ElasticsearchEmbeddings:
        """从Elasticsearch凭据实例化嵌入。

参数:
    model_id (str): 部署在Elasticsearch集群中的模型的模型ID。
    input_field (str): 文档中输入文本字段的键名。默认为'text_field'。
    es_cloud_id: (str, 可选): 要连接的Elasticsearch云ID。
    es_user: (str, 可选): Elasticsearch用户名。
    es_password: (str, 可选): Elasticsearch密码。

示例:
    .. code-block:: python

        from langchain_community.embeddings import ElasticsearchEmbeddings

        # 定义模型ID和输入字段名称（如果与默认值不同）
        model_id = "your_model_id"
        # 可选，只有在与'text_field'不同的情况下才需要
        input_field = "your_input_field"

        # 凭据可以通过两种方式传递。可以设置环境变量ES_CLOUD_ID、ES_USER、ES_PASSWORD，
        # 系统会自动获取它们，或者直接作为关键字参数传入。
        embeddings = ElasticsearchEmbeddings.from_credentials(
            model_id,
            input_field=input_field,
            # es_cloud_id="foo",
            # es_user="bar",
            # es_password="baz",
        )

        documents = [
            "This is an example document.",
            "Another example document to generate embeddings for.",
        ]
        embeddings_generator.embed_documents(documents)
"""
        try:
            from elasticsearch import Elasticsearch
            from elasticsearch.client import MlClient
        except ImportError:
            raise ImportError(
                "elasticsearch package not found, please install with 'pip install "
                "elasticsearch'"
            )

        es_cloud_id = es_cloud_id or get_from_env("es_cloud_id", "ES_CLOUD_ID")
        es_user = es_user or get_from_env("es_user", "ES_USER")
        es_password = es_password or get_from_env("es_password", "ES_PASSWORD")

        # Connect to Elasticsearch
        es_connection = Elasticsearch(
            cloud_id=es_cloud_id, basic_auth=(es_user, es_password)
        )
        client = MlClient(es_connection)
        return cls(client, model_id, input_field=input_field)

[docs]    @classmethod
    def from_es_connection(
        cls,
        model_id: str,
        es_connection: Elasticsearch,
        input_field: str = "text_field",
    ) -> ElasticsearchEmbeddings:
        """实例化来自现有Elasticsearch连接的嵌入。

此方法提供了一种使用现有的Elasticsearch连接创建ElasticsearchEmbeddings类实例的方式。连接对象用于创建MlClient，然后用于初始化ElasticsearchEmbeddings实例。

参数：
model_id（str）：部署在Elasticsearch集群中的模型的model_id。
es_connection（elasticsearch.Elasticsearch）：现有的Elasticsearch连接对象。
input_field（str，可选）：文档中输入文本字段的键名。默认为'text_field'。

返回：
ElasticsearchEmbeddings：ElasticsearchEmbeddings类的一个实例。

示例：
    .. code-block:: python

        from elasticsearch import Elasticsearch

        from langchain_community.embeddings import ElasticsearchEmbeddings

        # 定义模型ID和输入字段名称（如果与默认值不同）
        model_id = "your_model_id"
        # 可选，仅在与'text_field'不同的情况下使用
        input_field = "your_input_field"

        # 创建Elasticsearch连接
        es_connection = Elasticsearch(
            hosts=["localhost:9200"], http_auth=("user", "password")
        )

        # 使用现有连接实例化ElasticsearchEmbeddings
        embeddings = ElasticsearchEmbeddings.from_es_connection(
            model_id,
            es_connection,
            input_field=input_field,
        )

        documents = [
            "这是一个示例文档。",
            "另一个示例文档，用于生成嵌入。",
        ]
        embeddings_generator.embed_documents(documents)
"""
        # Importing MlClient from elasticsearch.client within the method to
        # avoid unnecessary import if the method is not used
        from elasticsearch.client import MlClient

        # Create an MlClient from the given Elasticsearch connection
        client = MlClient(es_connection)

        # Return a new instance of the ElasticsearchEmbeddings class with
        # the MlClient, model_id, and input_field
        return cls(client, model_id, input_field=input_field)

    def _embedding_func(self, texts: List[str]) -> List[List[float]]:
        """使用Elasticsearch模型为给定文本生成嵌入。

参数:
    texts (List[str]): 要生成嵌入的文本字符串列表。

返回:
    List[List[float]]: 一个嵌入列表，每个文本对应一个嵌入。
"""
        response = self.client.infer_trained_model(
            model_id=self.model_id, docs=[{self.input_field: text} for text in texts]
        )

        embeddings = [doc["predicted_value"] for doc in response["inference_results"]]
        return embeddings

[docs]    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """为文档列表生成嵌入。

参数：
    texts（List[str]）：要生成嵌入的文档文本字符串列表。

返回：
    List[List[float]]：输入列表中每个文档的嵌入列表。
"""
        return self._embedding_func(texts)

[docs]    def embed_query(self, text: str) -> List[float]:
        """为单个查询文本生成嵌入。 

参数：
    text（str）：要生成嵌入的查询文本。
    
返回：
    List[float]：输入查询文本的嵌入。
"""
        return self._embedding_func([text])[0]