Source code for langchain_community.embeddings.volcengine

from __future__ import annotations

import logging
from typing import Any, Dict, List, Optional

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain_core.utils import get_from_dict_or_env

logger = logging.getLogger(__name__)


[docs]class VolcanoEmbeddings(BaseModel, Embeddings): """`Volcengine Embeddings` 嵌入模型。""" volcano_ak: Optional[str] = None """火山引擎访问密钥 了解更多信息:https://www.volcengine.com/docs/6459/76491#ak-sk""" volcano_sk: Optional[str] = None """火山密钥 了解更多信息:https://www.volcengine.com/docs/6459/76491#ak-sk""" host: str = "maas-api.ml-platform-cn-beijing.volces.com" """主机 了解更多信息,请访问https://www.volcengine.com/docs/82379/1174746""" region: str = "cn-beijing" """```python region 了解更多,请访问 https://www.volcengine.com/docs/82379/1174746 ```""" model: str = "bge-large-zh" """模型名称 您可以从https://www.volcengine.com/docs/82379/1174746 获取 目前,我们支持bge_large_zh""" version: str = "1.0" """模型版本""" chunk_size: int = 100 """多个文本输入时的块大小""" client: Any """```python # 火山客户端 ```""" @root_validator() def validate_environment(cls, values: Dict) -> Dict: """检验环境变量或配置文件中是否存在 volcano_ak 和 volcano_sk。 使用 `ak`、`sk`、`host`、`region` 初始化火山引擎客户端。 参数: values: 包含配置信息的字典,必须包括 volcano_ak 和 volcano_sk 字段 返回: 包含配置信息的字典。如果环境变量或配置文件中未提供 volcano_ak 和 volcano_sk,则返回原始值;否则返回包含 volcano_ak 和 volcano_sk 的值。 异常: ValueError: 未找到 volcengine 包,请使用 `pip install volcengine` 安装它 """ values["volcano_ak"] = get_from_dict_or_env( values, "volcano_ak", "VOLC_ACCESSKEY", ) values["volcano_sk"] = get_from_dict_or_env( values, "volcano_sk", "VOLC_SECRETKEY", ) try: from volcengine.maas import MaasService client = MaasService(values["host"], values["region"]) client.set_ak(values["volcano_ak"]) client.set_sk(values["volcano_sk"]) values["client"] = client except ImportError: raise ImportError( "volcengine package not found, please install it with " "`pip install volcengine`" ) return values
[docs] def embed_query(self, text: str) -> List[float]: return self.embed_documents([text])[0]
[docs] def embed_documents(self, texts: List[str]) -> List[List[float]]: """使用AutoVOT算法嵌入文本文档列表。 参数: texts(List[str]):要嵌入的文本文档列表。 返回: List[List[float]]:输入列表中每个文档的嵌入列表。 每个嵌入都表示为一组浮点值。 """ text_in_chunks = [ texts[i : i + self.chunk_size] for i in range(0, len(texts), self.chunk_size) ] lst = [] for chunk in text_in_chunks: req = { "model": { "name": self.model, "version": self.version, }, "input": chunk, } try: from volcengine.maas import MaasException resp = self.client.embeddings(req) lst.extend([res["embedding"] for res in resp["data"]]) except MaasException as e: raise ValueError(f"embed by volcengine Error: {e}") return lst