Source code for langchain_community.embeddings.modelscope_hub

from typing import Any, List, Optional

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra


[docs]class ModelScopeEmbeddings(BaseModel, Embeddings): """模型范围嵌入模型。 要使用,您应该已安装``modelscope`` python包。 示例: .. code-block:: python from langchain_community.embeddings import ModelScopeEmbeddings model_id = "damo/nlp_corom_sentence-embedding_english-base" embed = ModelScopeEmbeddings(model_id=model_id, model_revision="v1.0.0")""" embed: Any model_id: str = "damo/nlp_corom_sentence-embedding_english-base" """要使用的模型名称。""" model_revision: Optional[str] = None def __init__(self, **kwargs: Any): """初始化模型范围""" super().__init__(**kwargs) try: from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks except ImportError as e: raise ImportError( "Could not import some python packages." "Please install it with `pip install modelscope`." ) from e self.embed = pipeline( Tasks.sentence_embedding, model=self.model_id, model_revision=self.model_revision, ) class Config: """此pydantic对象的配置。""" extra = Extra.forbid
[docs] def embed_documents(self, texts: List[str]) -> List[List[float]]: """使用模型范围嵌入模型计算文档嵌入。 参数: texts:要嵌入的文本列表。 返回: 每个文本的嵌入列表。 """ texts = list(map(lambda x: x.replace("\n", " "), texts)) inputs = {"source_sentence": texts} embeddings = self.embed(input=inputs)["text_embedding"] return embeddings.tolist()
[docs] def embed_query(self, text: str) -> List[float]: """使用模型范围嵌入模型计算查询嵌入。 参数: text:要嵌入的文本。 返回: 文本的嵌入。 """ text = text.replace("\n", " ") inputs = {"source_sentence": [text]} embedding = self.embed(input=inputs)["text_embedding"][0] return embedding.tolist()