Source code for langchain_community.retrievers.arcee

from typing import Any, Dict, List, Optional

from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.pydantic_v1 import Extra, SecretStr, root_validator
from langchain_core.retrievers import BaseRetriever
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env

from langchain_community.utilities.arcee import ArceeWrapper, DALMFilter


[docs]class ArceeRetriever(BaseRetriever): """Arcee领域自适应语言模型(DALMs)检索器。 要使用,请设置``ARCEE_API_KEY``环境变量为您的Arcee API密钥,或将``arcee_api_key``作为命名参数传递。 示例: .. code-block:: python from langchain_community.retrievers import ArceeRetriever retriever = ArceeRetriever( model="DALM-PubMed", arcee_api_key="ARCEE-API-KEY" ) documents = retriever.invoke("基于AI的音乐疗法")""" _client: Optional[ArceeWrapper] = None #: :meta private: """Arcee客户端。""" arcee_api_key: SecretStr """Arcee API密钥""" model: str """Arcee DALM 名称""" arcee_api_url: str = "https://api.arcee.ai" """Arcee API URL""" arcee_api_version: str = "v2" """Arcee API版本""" arcee_app_url: str = "https://app.arcee.ai" """Arcee 应用程序 URL""" model_kwargs: Optional[Dict[str, Any]] = None """传递给模型的关键字参数。""" class Config: """此pydantic对象的配置。""" extra = Extra.forbid underscore_attrs_are_private = True def __init__(self, **data: Any) -> None: """初始化私有字段。""" super().__init__(**data) self._client = ArceeWrapper( arcee_api_key=self.arcee_api_key.get_secret_value(), arcee_api_url=self.arcee_api_url, arcee_api_version=self.arcee_api_version, model_kwargs=self.model_kwargs, model_name=self.model, ) self._client.validate_model_training_status() @root_validator() def validate_environments(cls, values: Dict) -> Dict: """验证Arcee环境变量。""" # validate env vars values["arcee_api_key"] = convert_to_secret_str( get_from_dict_or_env( values, "arcee_api_key", "ARCEE_API_KEY", ) ) values["arcee_api_url"] = get_from_dict_or_env( values, "arcee_api_url", "ARCEE_API_URL", ) values["arcee_app_url"] = get_from_dict_or_env( values, "arcee_app_url", "ARCEE_APP_URL", ) values["arcee_api_version"] = get_from_dict_or_env( values, "arcee_api_version", "ARCEE_API_VERSION", ) # validate model kwargs if values["model_kwargs"]: kw = values["model_kwargs"] # validate size if kw.get("size") is not None: if not kw.get("size") >= 0: raise ValueError("`size` must not be negative.") # validate filters if kw.get("filters") is not None: if not isinstance(kw.get("filters"), List): raise ValueError("`filters` must be a list.") for f in kw.get("filters"): DALMFilter(**f) return values def _get_relevant_documents( self, query: str, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any ) -> List[Document]: """使用检索器检索给定查询的{size}个上下文 参数: query: 提交给模型的查询 size: 要检索的上下文结果的最大数量。 默认为3。(如果提供了过滤器,可能会更少)。 filters: 要应用于上下文数据集的过滤器。 """ try: if not self._client: raise ValueError("Client is not initialized.") return self._client.retrieve(query=query, **kwargs) except Exception as e: raise ValueError(f"Error while retrieving documents: {e}") from e