Source code for langchain_community.llms.petals

import logging
from typing import Any, Dict, List, Mapping, Optional

from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.pydantic_v1 import Extra, Field, SecretStr, root_validator
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env

from langchain_community.llms.utils import enforce_stop_tokens

logger = logging.getLogger(__name__)


[docs]class Petals(LLM): """花瓣绽放模型。 要使用,应安装``petals`` python包,并设置环境变量``HUGGINGFACE_API_KEY``为您的API密钥。 可以传递任何有效的参数到调用中,即使在此类中没有明确保存。 示例: .. code-block:: python from langchain_community.llms import petals petals = Petals()""" client: Any """用于API调用的客户端。""" tokenizer: Any """用于API调用的分词器。""" model_name: str = "bigscience/bloom-petals" """要使用的模型。""" temperature: float = 0.7 """使用哪种采样温度""" max_new_tokens: int = 256 """生成完成中要生成的新令牌的最大数量。""" top_p: float = 0.9 """对于top-p抽样的累积概率。""" top_k: Optional[int] = None """保留用于top-k过滤的具有最高概率的词汇标记数量。""" do_sample: bool = True """是否使用采样;否则使用贪婪解码。""" max_length: Optional[int] = None """要生成的序列的最大长度。""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """保存任何在`create`调用中有效的模型参数,如果没有明确指定。""" huggingface_api_key: Optional[SecretStr] = None class Config: """这是用于pydantic配置的设置。""" extra = Extra.forbid @root_validator(pre=True) def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """从传入的额外参数构建额外的kwargs。""" all_required_field_names = {field.alias for field in cls.__fields__.values()} extra = values.get("model_kwargs", {}) for field_name in list(values): if field_name not in all_required_field_names: if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( f"""WARNING! {field_name} is not default parameter. {field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) values["model_kwargs"] = extra return values @root_validator() def validate_environment(cls, values: Dict) -> Dict: """验证环境中是否存在API密钥和Python包。""" huggingface_api_key = convert_to_secret_str( get_from_dict_or_env(values, "huggingface_api_key", "HUGGINGFACE_API_KEY") ) try: from petals import AutoDistributedModelForCausalLM from transformers import AutoTokenizer model_name = values["model_name"] values["tokenizer"] = AutoTokenizer.from_pretrained(model_name) values["client"] = AutoDistributedModelForCausalLM.from_pretrained( model_name ) values["huggingface_api_key"] = huggingface_api_key.get_secret_value() except ImportError: raise ImportError( "Could not import transformers or petals python package." "Please install with `pip install -U transformers petals`." ) return values @property def _default_params(self) -> Dict[str, Any]: """获取调用Petals API 的默认参数。""" normal_params = { "temperature": self.temperature, "max_new_tokens": self.max_new_tokens, "top_p": self.top_p, "top_k": self.top_k, "do_sample": self.do_sample, "max_length": self.max_length, } return {**normal_params, **self.model_kwargs} @property def _identifying_params(self) -> Mapping[str, Any]: """获取识别参数。""" return {**{"model_name": self.model_name}, **self._default_params} @property def _llm_type(self) -> str: """llm的返回类型。""" return "petals" def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: """调用Petals API。""" params = self._default_params params = {**params, **kwargs} inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"] outputs = self.client.generate(inputs, **params) text = self.tokenizer.decode(outputs[0]) if stop is not None: # I believe this is required since the stop tokens # are not enforced by the model parameters text = enforce_stop_tokens(text, stop) return text