import os
import sys
from typing import Any, List

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra

[docs]class JohnSnowLabsEmbeddings(BaseModel, Embeddings): """JohnSnowLabs嵌入模型 要使用,您应该已安装``johnsnowlabs`` python包。 示例: .. code-block:: python from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert') output = embedding.embed_query("foo bar")""" # noqa: E501 model: Any = "embed_sentence.bert" def __init__( self, model: Any = "embed_sentence.bert", hardware_target: str = "cpu", **kwargs: Any, ): """初始化johnsnowlabs模型。""" super().__init__(**kwargs) # 1) Check imports try: from johnsnowlabs import nlp from nlu.pipe.pipeline import NLUPipeline except ImportError as exc: raise ImportError( "Could not import johnsnowlabs python package. " "Please install it with `pip install johnsnowlabs`." ) from exc # 2) Start a Spark Session try: os.environ["PYSPARK_PYTHON"] = sys.executable os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable nlp.start(hardware_target=hardware_target) except Exception as exc: raise Exception("Failure starting Spark Session") from exc # 3) Load the model try: if isinstance(model, str): self.model = nlp.load(model) elif isinstance(model, NLUPipeline): self.model = model else: self.model = nlp.to_nlu_pipe(model) except Exception as exc: raise Exception("Failure loading model") from exc class Config: """此pydantic对象的配置。""" extra = Extra.forbid
[docs] def embed_documents(self, texts: List[str]) -> List[List[float]]: """使用JohnSnowLabs转换器模型计算文档嵌入。 参数: texts:要嵌入的文本列表。 返回: 每个文本的嵌入列表。 """ df = self.model.predict(texts, output_level="document") emb_col = None for c in df.columns: if "embedding" in c: emb_col = c return [vec.tolist() for vec in df[emb_col].tolist()]
[docs] def embed_query(self, text: str) -> List[float]: """使用JohnSnowLabs转换器模型计算查询嵌入。 参数: text:要嵌入的文本。 返回: 文本的嵌入。 """ return self.embed_documents([text])[0]