Source code for langchain_community.llms.petals
import logging
from typing import Any, Dict, List, Mapping, Optional
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.pydantic_v1 import Extra, Field, SecretStr, root_validator
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
from langchain_community.llms.utils import enforce_stop_tokens
logger = logging.getLogger(__name__)
[docs]class Petals(LLM):
"""花瓣绽放模型。
要使用,应安装``petals`` python包,并设置环境变量``HUGGINGFACE_API_KEY``为您的API密钥。
可以传递任何有效的参数到调用中,即使在此类中没有明确保存。
示例:
.. code-block:: python
from langchain_community.llms import petals
petals = Petals()"""
client: Any
"""用于API调用的客户端。"""
tokenizer: Any
"""用于API调用的分词器。"""
model_name: str = "bigscience/bloom-petals"
"""要使用的模型。"""
temperature: float = 0.7
"""使用哪种采样温度"""
max_new_tokens: int = 256
"""生成完成中要生成的新令牌的最大数量。"""
top_p: float = 0.9
"""对于top-p抽样的累积概率。"""
top_k: Optional[int] = None
"""保留用于top-k过滤的具有最高概率的词汇标记数量。"""
do_sample: bool = True
"""是否使用采样;否则使用贪婪解码。"""
max_length: Optional[int] = None
"""要生成的序列的最大长度。"""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""保存任何在`create`调用中有效的模型参数,如果没有明确指定。"""
huggingface_api_key: Optional[SecretStr] = None
class Config:
"""这是用于pydantic配置的设置。"""
extra = Extra.forbid
@root_validator(pre=True)
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
"""从传入的额外参数构建额外的kwargs。"""
all_required_field_names = {field.alias for field in cls.__fields__.values()}
extra = values.get("model_kwargs", {})
for field_name in list(values):
if field_name not in all_required_field_names:
if field_name in extra:
raise ValueError(f"Found {field_name} supplied twice.")
logger.warning(
f"""WARNING! {field_name} is not default parameter.
{field_name} was transferred to model_kwargs.
Please confirm that {field_name} is what you intended."""
)
extra[field_name] = values.pop(field_name)
values["model_kwargs"] = extra
return values
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""验证环境中是否存在API密钥和Python包。"""
huggingface_api_key = convert_to_secret_str(
get_from_dict_or_env(values, "huggingface_api_key", "HUGGINGFACE_API_KEY")
)
try:
from petals import AutoDistributedModelForCausalLM
from transformers import AutoTokenizer
model_name = values["model_name"]
values["tokenizer"] = AutoTokenizer.from_pretrained(model_name)
values["client"] = AutoDistributedModelForCausalLM.from_pretrained(
model_name
)
values["huggingface_api_key"] = huggingface_api_key.get_secret_value()
except ImportError:
raise ImportError(
"Could not import transformers or petals python package."
"Please install with `pip install -U transformers petals`."
)
return values
@property
def _default_params(self) -> Dict[str, Any]:
"""获取调用Petals API 的默认参数。"""
normal_params = {
"temperature": self.temperature,
"max_new_tokens": self.max_new_tokens,
"top_p": self.top_p,
"top_k": self.top_k,
"do_sample": self.do_sample,
"max_length": self.max_length,
}
return {**normal_params, **self.model_kwargs}
@property
def _identifying_params(self) -> Mapping[str, Any]:
"""获取识别参数。"""
return {**{"model_name": self.model_name}, **self._default_params}
@property
def _llm_type(self) -> str:
"""llm的返回类型。"""
return "petals"
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""调用Petals API。"""
params = self._default_params
params = {**params, **kwargs}
inputs = self.tokenizer(prompt, return_tensors="pt")["input_ids"]
outputs = self.client.generate(inputs, **params)
text = self.tokenizer.decode(outputs[0])
if stop is not None:
# I believe this is required since the stop tokens
# are not enforced by the model parameters
text = enforce_stop_tokens(text, stop)
return text