Source code for langchain_community.llms.vllm
from typing import Any, Dict, List, Optional
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import BaseLLM
from langchain_core.outputs import Generation, LLMResult
from langchain_core.pydantic_v1 import Field, root_validator
from langchain_community.llms.openai import BaseOpenAI
from langchain_community.utils.openai import is_openai_v1
[docs]class VLLM(BaseLLM):
"""VLLM语言模型。"""
model: str = ""
"""HuggingFace Transformers 模型的名称或路径。"""
tensor_parallel_size: Optional[int] = 1
"""用于张量并行分布式执行的GPU数量。"""
trust_remote_code: Optional[bool] = False
"""信任远程代码(例如来自HuggingFace)在下载模型和分词器时。"""
n: int = 1
"""给定提示返回的输出序列数量。"""
best_of: Optional[int] = None
"""从提示生成的输出序列数量。"""
presence_penalty: float = 0.0
"""根据生成的文本中新标记是否出现来惩罚的浮点数"""
frequency_penalty: float = 0.0
"""根据生成文本中迄今为止的频率对新标记进行惩罚的浮点数"""
temperature: float = 1.0
"""控制采样随机性的浮点数。"""
top_p: float = 1.0
"""控制要考虑的顶部标记的累积概率的浮点数。"""
top_k: int = -1
"""控制要考虑的顶部标记数量的整数。"""
use_beam_search: bool = False
"""是否使用束搜索而不是采样。"""
stop: Optional[List[str]] = None
"""生成时停止生成的字符串列表。"""
ignore_eos: bool = False
"""是否忽略EOS标记并在生成EOS标记后继续生成标记。"""
max_new_tokens: int = 512
"""每个输出序列生成的最大令牌数。"""
logprobs: Optional[int] = None
"""每个输出标记返回的对数概率数量。"""
dtype: str = "auto"
"""模型权重和激活的数据类型。"""
download_dir: Optional[str] = None
"""下载和加载权重的目录。(默认为huggingface的默认缓存目录)"""
vllm_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""保存任何在`vllm.LLM`调用中没有明确指定的模型参数。"""
client: Any #: :meta private:
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""验证Python包是否存在于环境中。"""
try:
from vllm import LLM as VLLModel
except ImportError:
raise ImportError(
"Could not import vllm python package. "
"Please install it with `pip install vllm`."
)
values["client"] = VLLModel(
model=values["model"],
tensor_parallel_size=values["tensor_parallel_size"],
trust_remote_code=values["trust_remote_code"],
dtype=values["dtype"],
download_dir=values["download_dir"],
**values["vllm_kwargs"],
)
return values
@property
def _default_params(self) -> Dict[str, Any]:
"""获取调用vllm的默认参数。"""
return {
"n": self.n,
"best_of": self.best_of,
"max_tokens": self.max_new_tokens,
"top_k": self.top_k,
"top_p": self.top_p,
"temperature": self.temperature,
"presence_penalty": self.presence_penalty,
"frequency_penalty": self.frequency_penalty,
"stop": self.stop,
"ignore_eos": self.ignore_eos,
"use_beam_search": self.use_beam_search,
"logprobs": self.logprobs,
}
def _generate(
self,
prompts: List[str],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> LLMResult:
"""在给定的提示和输入上运行LLM。"""
from vllm import SamplingParams
# build sampling parameters
params = {**self._default_params, **kwargs, "stop": stop}
sampling_params = SamplingParams(**params)
# call the model
outputs = self.client.generate(prompts, sampling_params)
generations = []
for output in outputs:
text = output.outputs[0].text
generations.append([Generation(text=text)])
return LLMResult(generations=generations)
@property
def _llm_type(self) -> str:
"""llm的返回类型。"""
return "vllm"
[docs]class VLLMOpenAI(BaseOpenAI):
"""vLLM OpenAI兼容的API客户端"""
[docs] @classmethod
def is_lc_serializable(cls) -> bool:
return False
@property
def _invocation_params(self) -> Dict[str, Any]:
"""获取用于调用模型的参数。"""
params: Dict[str, Any] = {
"model": self.model_name,
**self._default_params,
"logit_bias": None,
}
if not is_openai_v1():
params.update(
{
"api_key": self.openai_api_key,
"api_base": self.openai_api_base,
}
)
return params
@property
def _llm_type(self) -> str:
"""llm的返回类型。"""
return "vllm-openai"