Source code for langchain_community.embeddings.johnsnowlabs
import os
import sys
from typing import Any, List
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra
[docs]class JohnSnowLabsEmbeddings(BaseModel, Embeddings):
"""JohnSnowLabs嵌入模型
要使用,您应该已安装``johnsnowlabs`` python包。
示例:
.. code-block:: python
from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings
embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert')
output = embedding.embed_query("foo bar")""" # noqa: E501
model: Any = "embed_sentence.bert"
def __init__(
self,
model: Any = "embed_sentence.bert",
hardware_target: str = "cpu",
**kwargs: Any,
):
"""初始化johnsnowlabs模型。"""
super().__init__(**kwargs)
# 1) Check imports
try:
from johnsnowlabs import nlp
from nlu.pipe.pipeline import NLUPipeline
except ImportError as exc:
raise ImportError(
"Could not import johnsnowlabs python package. "
"Please install it with `pip install johnsnowlabs`."
) from exc
# 2) Start a Spark Session
try:
os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
nlp.start(hardware_target=hardware_target)
except Exception as exc:
raise Exception("Failure starting Spark Session") from exc
# 3) Load the model
try:
if isinstance(model, str):
self.model = nlp.load(model)
elif isinstance(model, NLUPipeline):
self.model = model
else:
self.model = nlp.to_nlu_pipe(model)
except Exception as exc:
raise Exception("Failure loading model") from exc
class Config:
"""此pydantic对象的配置。"""
extra = Extra.forbid
[docs] def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""使用JohnSnowLabs转换器模型计算文档嵌入。
参数:
texts:要嵌入的文本列表。
返回:
每个文本的嵌入列表。
"""
df = self.model.predict(texts, output_level="document")
emb_col = None
for c in df.columns:
if "embedding" in c:
emb_col = c
return [vec.tolist() for vec in df[emb_col].tolist()]
[docs] def embed_query(self, text: str) -> List[float]:
"""使用JohnSnowLabs转换器模型计算查询嵌入。
参数:
text:要嵌入的文本。
返回:
文本的嵌入。
"""
return self.embed_documents([text])[0]