from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional, Union
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.outputs import GenerationChunk
from langchain_core.pydantic_v1 import Field, root_validator
from langchain_core.utils import get_pydantic_field_names
from langchain_core.utils.utils import build_extra_kwargs
logger = logging.getLogger(__name__)
[docs]class LlamaCpp(LLM):
"""llama.cpp 模型。
要使用,应安装 llama-cpp-python 库,并将 Llama 模型的路径作为构造函数的命名参数提供。
查看:https://github.com/abetlen/llama-cpp-python
示例:
.. code-block:: python
from langchain_community.llms import LlamaCpp
llm = LlamaCpp(model_path="/path/to/llama/model")"""
client: Any #: :meta private:
model_path: str
"""Llama模型文件的路径。"""
lora_base: Optional[str] = None
"""Llama LoRA基础模型的路径。"""
lora_path: Optional[str] = None
"""Llama LoRA的路径。如果为None,则没有加载LoRa。"""
n_ctx: int = Field(512, alias="n_ctx")
"""标记上下文窗口。"""
n_parts: int = Field(-1, alias="n_parts")
"""模型分割成的部分数量。
如果为-1,则自动确定部分的数量。"""
seed: int = Field(-1, alias="seed")
"""种子。如果为-1,则使用随机种子。"""
f16_kv: bool = Field(True, alias="f16_kv")
"""为键/值缓存使用半精度。"""
logits_all: bool = Field(False, alias="logits_all")
"""返回所有标记的logits,而不仅仅是最后一个标记。"""
vocab_only: bool = Field(False, alias="vocab_only")
"""只加载词汇表,不加载权重。"""
use_mlock: bool = Field(False, alias="use_mlock")
"""强制系统将模型保留在内存中。"""
n_threads: Optional[int] = Field(None, alias="n_threads")
"""线程数目。
如果为None,则线程数目会自动确定。"""
n_batch: Optional[int] = Field(8, alias="n_batch")
"""并行处理的令牌数量。
应该是介于1和n_ctx之间的数字。"""
n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
"""要加载到GPU内存中的层数。默认值为None。"""
suffix: Optional[str] = Field(None)
"""要附加到生成文本的后缀。如果为None,则不附加后缀。"""
max_tokens: Optional[int] = 256
"""生成的最大令牌数量。"""
temperature: Optional[float] = 0.8
"""用于采样的温度。"""
top_p: Optional[float] = 0.95
"""用于抽样的顶部p值。"""
logprobs: Optional[int] = Field(None)
"""要返回的对数概率数量。如果为None,则不返回对数概率。"""
echo: Optional[bool] = False
"""是否回显提示符。"""
stop: Optional[List[str]] = []
"""遇到时停止生成的字符串列表。"""
repeat_penalty: Optional[float] = 1.1
"""重复标记的惩罚。"""
top_k: Optional[int] = 40
"""用于采样的前k个值。"""
last_n_tokens_size: Optional[int] = 64
"""在应用重复惩罚时要查看的标记数量。"""
use_mmap: Optional[bool] = True
"""是否将模型保留在内存中"""
rope_freq_scale: float = 1.0
"""绳索采样的比例因子。"""
rope_freq_base: float = 10000.0
"""用于绳索采样的基本频率。"""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""传递给llama_cpp.Llama的任何额外参数。"""
streaming: bool = True
"""是否逐个标记流式传输结果。"""
grammar_path: Optional[Union[str, Path]] = None
""" grammar_path: 指向定义形式语法的 .gbnf 文件的路径,用于约束模型输出。例如,可以使用语法来强制模型生成有效的 JSON 或者只使用表情符号进行语音输出。最多只能传入 grammar_path 和 grammar 中的一个。"""
grammar: Optional[Union[str, Any]] = None
"""
语法:用于约束模型输出的形式语法。例如,可以使用语法来强制模型生成有效的JSON,或者只使用表情符号进行语音输出。最多只能传入 grammar_path 和 grammar 中的一个。"""
verbose: bool = True
"""将详细输出打印到stderr。"""
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""验证llama-cpp-python库是否已安装。"""
try:
from llama_cpp import Llama, LlamaGrammar
except ImportError:
raise ImportError(
"Could not import llama-cpp-python library. "
"Please install the llama-cpp-python library to "
"use this embedding model: pip install llama-cpp-python"
)
model_path = values["model_path"]
model_param_names = [
"rope_freq_scale",
"rope_freq_base",
"lora_path",
"lora_base",
"n_ctx",
"n_parts",
"seed",
"f16_kv",
"logits_all",
"vocab_only",
"use_mlock",
"n_threads",
"n_batch",
"use_mmap",
"last_n_tokens_size",
"verbose",
]
model_params = {k: values[k] for k in model_param_names}
# For backwards compatibility, only include if non-null.
if values["n_gpu_layers"] is not None:
model_params["n_gpu_layers"] = values["n_gpu_layers"]
model_params.update(values["model_kwargs"])
try:
values["client"] = Llama(model_path, **model_params)
except Exception as e:
raise ValueError(
f"Could not load Llama model from path: {model_path}. "
f"Received error {e}"
)
if values["grammar"] and values["grammar_path"]:
grammar = values["grammar"]
grammar_path = values["grammar_path"]
raise ValueError(
"Can only pass in one of grammar and grammar_path. Received "
f"{grammar=} and {grammar_path=}."
)
elif isinstance(values["grammar"], str):
values["grammar"] = LlamaGrammar.from_string(values["grammar"])
elif values["grammar_path"]:
values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
else:
pass
return values
@root_validator(pre=True)
def build_model_kwargs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
"""从传入的额外参数构建额外的kwargs。"""
all_required_field_names = get_pydantic_field_names(cls)
extra = values.get("model_kwargs", {})
values["model_kwargs"] = build_extra_kwargs(
extra, values, all_required_field_names
)
return values
@property
def _default_params(self) -> Dict[str, Any]:
"""获取调用llama_cpp的默认参数。"""
params = {
"suffix": self.suffix,
"max_tokens": self.max_tokens,
"temperature": self.temperature,
"top_p": self.top_p,
"logprobs": self.logprobs,
"echo": self.echo,
"stop_sequences": self.stop, # key here is convention among LLM classes
"repeat_penalty": self.repeat_penalty,
"top_k": self.top_k,
}
if self.grammar:
params["grammar"] = self.grammar
return params
@property
def _identifying_params(self) -> Dict[str, Any]:
"""获取识别参数。"""
return {**{"model_path": self.model_path}, **self._default_params}
@property
def _llm_type(self) -> str:
"""llm的返回类型。"""
return "llamacpp"
def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]:
"""执行健全性检查,准备 llama_cpp 需要的参数格式。
参数:
stop (Optional[List[str]]): llama_cpp 的停止序列列表。
返回:
包含合并参数的字典。
"""
# Raise error if stop sequences are in both input and default params
if self.stop and stop is not None:
raise ValueError("`stop` found in both the input and default params.")
params = self._default_params
# llama_cpp expects the "stop" key not this, so we remove it:
params.pop("stop_sequences")
# then sets it as configured, or default to an empty list:
params["stop"] = self.stop or stop or []
return params
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""调用Llama模型并返回输出。
参数:
prompt: 用于生成的提示。
stop: 遇到时停止生成的字符串列表。
返回:
生成的文本。
示例:
.. code-block:: python
from langchain_community.llms import LlamaCpp
llm = LlamaCpp(model_path="/path/to/local/llama/model.bin")
llm.invoke("This is a prompt.")
"""
if self.streaming:
# If streaming is enabled, we use the stream
# method that yields as they are generated
# and return the combined strings from the first choices's text:
combined_text_output = ""
for chunk in self._stream(
prompt=prompt,
stop=stop,
run_manager=run_manager,
**kwargs,
):
combined_text_output += chunk.text
return combined_text_output
else:
params = self._get_parameters(stop)
params = {**params, **kwargs}
result = self.client(prompt=prompt, **params)
return result["choices"][0]["text"]
def _stream(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[GenerationChunk]:
"""以实时生成的结果对象作为生成器。
它还使用类似于OpenAI LLM类同名方法的参数调用回调管理器的on_llm_new_token事件。
参数:
prompt:传递给模型的提示。
stop:生成时使用的可选停止词列表。
返回:
表示正在生成的令牌流的生成器。
生成:
类似于包含字符串令牌和元数据的对象的字典。
有关更多信息,请参阅llama-cpp-python文档和下面的内容。
示例:
.. code-block:: python
from langchain_community.llms import LlamaCpp
llm = LlamaCpp(
model_path="/path/to/local/model.bin",
temperature = 0.5
)
for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
stop=["'","
"]):
result = chunk["choices"][0]
print(result["text"], end='', flush=True) # noqa: T201
"""
params = {**self._get_parameters(stop), **kwargs}
result = self.client(prompt=prompt, stream=True, **params)
for part in result:
logprobs = part["choices"][0].get("logprobs", None)
chunk = GenerationChunk(
text=part["choices"][0]["text"],
generation_info={"logprobs": logprobs},
)
if run_manager:
run_manager.on_llm_new_token(
token=chunk.text, verbose=self.verbose, log_probs=logprobs
)
yield chunk
[docs] def get_num_tokens(self, text: str) -> int:
tokenized_text = self.client.tokenize(text.encode("utf-8"))
return len(tokenized_text)