Source code for langchain_community.embeddings.volcengine
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain_core.utils import get_from_dict_or_env
logger = logging.getLogger(__name__)
[docs]class VolcanoEmbeddings(BaseModel, Embeddings):
"""`Volcengine Embeddings` 嵌入模型。"""
volcano_ak: Optional[str] = None
"""火山引擎访问密钥
了解更多信息:https://www.volcengine.com/docs/6459/76491#ak-sk"""
volcano_sk: Optional[str] = None
"""火山密钥
了解更多信息:https://www.volcengine.com/docs/6459/76491#ak-sk"""
host: str = "maas-api.ml-platform-cn-beijing.volces.com"
"""主机
了解更多信息,请访问https://www.volcengine.com/docs/82379/1174746"""
region: str = "cn-beijing"
"""```python
region
了解更多,请访问 https://www.volcengine.com/docs/82379/1174746
```"""
model: str = "bge-large-zh"
"""模型名称
您可以从https://www.volcengine.com/docs/82379/1174746 获取
目前,我们支持bge_large_zh"""
version: str = "1.0"
"""模型版本"""
chunk_size: int = 100
"""多个文本输入时的块大小"""
client: Any
"""```python
# 火山客户端
```"""
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""检验环境变量或配置文件中是否存在 volcano_ak 和 volcano_sk。
使用 `ak`、`sk`、`host`、`region` 初始化火山引擎客户端。
参数:
values: 包含配置信息的字典,必须包括 volcano_ak 和 volcano_sk 字段
返回:
包含配置信息的字典。如果环境变量或配置文件中未提供 volcano_ak 和 volcano_sk,则返回原始值;否则返回包含 volcano_ak 和 volcano_sk 的值。
异常:
ValueError: 未找到 volcengine 包,请使用 `pip install volcengine` 安装它
"""
values["volcano_ak"] = get_from_dict_or_env(
values,
"volcano_ak",
"VOLC_ACCESSKEY",
)
values["volcano_sk"] = get_from_dict_or_env(
values,
"volcano_sk",
"VOLC_SECRETKEY",
)
try:
from volcengine.maas import MaasService
client = MaasService(values["host"], values["region"])
client.set_ak(values["volcano_ak"])
client.set_sk(values["volcano_sk"])
values["client"] = client
except ImportError:
raise ImportError(
"volcengine package not found, please install it with "
"`pip install volcengine`"
)
return values
[docs] def embed_query(self, text: str) -> List[float]:
return self.embed_documents([text])[0]
[docs] def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""使用AutoVOT算法嵌入文本文档列表。
参数:
texts(List[str]):要嵌入的文本文档列表。
返回:
List[List[float]]:输入列表中每个文档的嵌入列表。
每个嵌入都表示为一组浮点值。
"""
text_in_chunks = [
texts[i : i + self.chunk_size]
for i in range(0, len(texts), self.chunk_size)
]
lst = []
for chunk in text_in_chunks:
req = {
"model": {
"name": self.model,
"version": self.version,
},
"input": chunk,
}
try:
from volcengine.maas import MaasException
resp = self.client.embeddings(req)
lst.extend([res["embedding"] for res in resp["data"]])
except MaasException as e:
raise ValueError(f"embed by volcengine Error: {e}")
return lst