Source code for langchain_community.embeddings.cloudflare_workersai

from typing import Any, Dict, List

import requests
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra

DEFAULT_MODEL_NAME = "@cf/baai/bge-base-en-v1.5"


[docs]class CloudflareWorkersAIEmbeddings(BaseModel, Embeddings): """Cloudflare Workers AI嵌入模型。 要使用,您需要提供API令牌和 账户ID以访问Cloudflare Workers AI。 示例: .. code-block:: python from langchain_community.embeddings import CloudflareWorkersAIEmbeddings account_id = "my_account_id" api_token = "my_secret_api_token" model_name = "@cf/baai/bge-small-en-v1.5" cf = CloudflareWorkersAIEmbeddings( account_id=account_id, api_token=api_token, model_name=model_name ) """ api_base_url: str = "https://api.cloudflare.com/client/v4/accounts" account_id: str api_token: str model_name: str = DEFAULT_MODEL_NAME batch_size: int = 50 strip_new_lines: bool = True headers: Dict[str, str] = {"Authorization": "Bearer "} def __init__(self, **kwargs: Any): """初始化Cloudflare Workers AI客户端。""" super().__init__(**kwargs) self.headers = {"Authorization": f"Bearer {self.api_token}"} class Config: """此pydantic对象的配置。""" extra = Extra.forbid
[docs] def embed_documents(self, texts: List[str]) -> List[List[float]]: """使用Cloudflare Workers AI计算文档嵌入。 参数: texts:要嵌入的文本列表。 返回: 嵌入列表,每个文本对应一个嵌入。 """ if self.strip_new_lines: texts = [text.replace("\n", " ") for text in texts] batches = [ texts[i : i + self.batch_size] for i in range(0, len(texts), self.batch_size) ] embeddings = [] for batch in batches: response = requests.post( f"{self.api_base_url}/{self.account_id}/ai/run/{self.model_name}", headers=self.headers, json={"text": batch}, ) embeddings.extend(response.json()["result"]["data"]) return embeddings
[docs] def embed_query(self, text: str) -> List[float]: """使用Cloudflare Workers AI计算查询嵌入。 参数: text:要嵌入的文本。 返回: 文本的嵌入。 """ text = text.replace("\n", " ") if self.strip_new_lines else text response = requests.post( f"{self.api_base_url}/{self.account_id}/ai/run/{self.model_name}", headers=self.headers, json={"text": [text]}, ) return response.json()["result"]["data"][0]