Source code for langchain_community.llms.llamacpp

from __future__ import annotations

import logging
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional, Union

from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.outputs import GenerationChunk
from langchain_core.pydantic_v1 import Field, root_validator
from langchain_core.utils import get_pydantic_field_names
from langchain_core.utils.utils import build_extra_kwargs

logger = logging.getLogger(__name__)


[docs]class LlamaCpp(LLM): """llama.cpp 模型。 要使用,应安装 llama-cpp-python 库,并将 Llama 模型的路径作为构造函数的命名参数提供。 查看:https://github.com/abetlen/llama-cpp-python 示例: .. code-block:: python from langchain_community.llms import LlamaCpp llm = LlamaCpp(model_path="/path/to/llama/model")""" client: Any #: :meta private: model_path: str """Llama模型文件的路径。""" lora_base: Optional[str] = None """Llama LoRA基础模型的路径。""" lora_path: Optional[str] = None """Llama LoRA的路径。如果为None,则没有加载LoRa。""" n_ctx: int = Field(512, alias="n_ctx") """标记上下文窗口。""" n_parts: int = Field(-1, alias="n_parts") """模型分割成的部分数量。 如果为-1,则自动确定部分的数量。""" seed: int = Field(-1, alias="seed") """种子。如果为-1,则使用随机种子。""" f16_kv: bool = Field(True, alias="f16_kv") """为键/值缓存使用半精度。""" logits_all: bool = Field(False, alias="logits_all") """返回所有标记的logits,而不仅仅是最后一个标记。""" vocab_only: bool = Field(False, alias="vocab_only") """只加载词汇表,不加载权重。""" use_mlock: bool = Field(False, alias="use_mlock") """强制系统将模型保留在内存中。""" n_threads: Optional[int] = Field(None, alias="n_threads") """线程数目。 如果为None,则线程数目会自动确定。""" n_batch: Optional[int] = Field(8, alias="n_batch") """并行处理的令牌数量。 应该是介于1和n_ctx之间的数字。""" n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers") """要加载到GPU内存中的层数。默认值为None。""" suffix: Optional[str] = Field(None) """要附加到生成文本的后缀。如果为None,则不附加后缀。""" max_tokens: Optional[int] = 256 """生成的最大令牌数量。""" temperature: Optional[float] = 0.8 """用于采样的温度。""" top_p: Optional[float] = 0.95 """用于抽样的顶部p值。""" logprobs: Optional[int] = Field(None) """要返回的对数概率数量。如果为None,则不返回对数概率。""" echo: Optional[bool] = False """是否回显提示符。""" stop: Optional[List[str]] = [] """遇到时停止生成的字符串列表。""" repeat_penalty: Optional[float] = 1.1 """重复标记的惩罚。""" top_k: Optional[int] = 40 """用于采样的前k个值。""" last_n_tokens_size: Optional[int] = 64 """在应用重复惩罚时要查看的标记数量。""" use_mmap: Optional[bool] = True """是否将模型保留在内存中""" rope_freq_scale: float = 1.0 """绳索采样的比例因子。""" rope_freq_base: float = 10000.0 """用于绳索采样的基本频率。""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """传递给llama_cpp.Llama的任何额外参数。""" streaming: bool = True """是否逐个标记流式传输结果。""" grammar_path: Optional[Union[str, Path]] = None """ grammar_path: 指向定义形式语法的 .gbnf 文件的路径,用于约束模型输出。例如,可以使用语法来强制模型生成有效的 JSON 或者只使用表情符号进行语音输出。最多只能传入 grammar_path 和 grammar 中的一个。""" grammar: Optional[Union[str, Any]] = None """ 语法:用于约束模型输出的形式语法。例如,可以使用语法来强制模型生成有效的JSON,或者只使用表情符号进行语音输出。最多只能传入 grammar_path 和 grammar 中的一个。""" verbose: bool = True """将详细输出打印到stderr。""" @root_validator() def validate_environment(cls, values: Dict) -> Dict: """验证llama-cpp-python库是否已安装。""" try: from llama_cpp import Llama, LlamaGrammar except ImportError: raise ImportError( "Could not import llama-cpp-python library. " "Please install the llama-cpp-python library to " "use this embedding model: pip install llama-cpp-python" ) model_path = values["model_path"] model_param_names = [ "rope_freq_scale", "rope_freq_base", "lora_path", "lora_base", "n_ctx", "n_parts", "seed", "f16_kv", "logits_all", "vocab_only", "use_mlock", "n_threads", "n_batch", "use_mmap", "last_n_tokens_size", "verbose", ] model_params = {k: values[k] for k in model_param_names} # For backwards compatibility, only include if non-null. if values["n_gpu_layers"] is not None: model_params["n_gpu_layers"] = values["n_gpu_layers"] model_params.update(values["model_kwargs"]) try: values["client"] = Llama(model_path, **model_params) except Exception as e: raise ValueError( f"Could not load Llama model from path: {model_path}. " f"Received error {e}" ) if values["grammar"] and values["grammar_path"]: grammar = values["grammar"] grammar_path = values["grammar_path"] raise ValueError( "Can only pass in one of grammar and grammar_path. Received " f"{grammar=} and {grammar_path=}." ) elif isinstance(values["grammar"], str): values["grammar"] = LlamaGrammar.from_string(values["grammar"]) elif values["grammar_path"]: values["grammar"] = LlamaGrammar.from_file(values["grammar_path"]) else: pass return values @root_validator(pre=True) def build_model_kwargs(cls, values: Dict[str, Any]) -> Dict[str, Any]: """从传入的额外参数构建额外的kwargs。""" all_required_field_names = get_pydantic_field_names(cls) extra = values.get("model_kwargs", {}) values["model_kwargs"] = build_extra_kwargs( extra, values, all_required_field_names ) return values @property def _default_params(self) -> Dict[str, Any]: """获取调用llama_cpp的默认参数。""" params = { "suffix": self.suffix, "max_tokens": self.max_tokens, "temperature": self.temperature, "top_p": self.top_p, "logprobs": self.logprobs, "echo": self.echo, "stop_sequences": self.stop, # key here is convention among LLM classes "repeat_penalty": self.repeat_penalty, "top_k": self.top_k, } if self.grammar: params["grammar"] = self.grammar return params @property def _identifying_params(self) -> Dict[str, Any]: """获取识别参数。""" return {**{"model_path": self.model_path}, **self._default_params} @property def _llm_type(self) -> str: """llm的返回类型。""" return "llamacpp" def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]: """执行健全性检查,准备 llama_cpp 需要的参数格式。 参数: stop (Optional[List[str]]): llama_cpp 的停止序列列表。 返回: 包含合并参数的字典。 """ # Raise error if stop sequences are in both input and default params if self.stop and stop is not None: raise ValueError("`stop` found in both the input and default params.") params = self._default_params # llama_cpp expects the "stop" key not this, so we remove it: params.pop("stop_sequences") # then sets it as configured, or default to an empty list: params["stop"] = self.stop or stop or [] return params def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: """调用Llama模型并返回输出。 参数: prompt: 用于生成的提示。 stop: 遇到时停止生成的字符串列表。 返回: 生成的文本。 示例: .. code-block:: python from langchain_community.llms import LlamaCpp llm = LlamaCpp(model_path="/path/to/local/llama/model.bin") llm.invoke("This is a prompt.") """ if self.streaming: # If streaming is enabled, we use the stream # method that yields as they are generated # and return the combined strings from the first choices's text: combined_text_output = "" for chunk in self._stream( prompt=prompt, stop=stop, run_manager=run_manager, **kwargs, ): combined_text_output += chunk.text return combined_text_output else: params = self._get_parameters(stop) params = {**params, **kwargs} result = self.client(prompt=prompt, **params) return result["choices"][0]["text"] def _stream( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> Iterator[GenerationChunk]: """以实时生成的结果对象作为生成器。 它还使用类似于OpenAI LLM类同名方法的参数调用回调管理器的on_llm_new_token事件。 参数: prompt:传递给模型的提示。 stop:生成时使用的可选停止词列表。 返回: 表示正在生成的令牌流的生成器。 生成: 类似于包含字符串令牌和元数据的对象的字典。 有关更多信息,请参阅llama-cpp-python文档和下面的内容。 示例: .. code-block:: python from langchain_community.llms import LlamaCpp llm = LlamaCpp( model_path="/path/to/local/model.bin", temperature = 0.5 ) for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'", stop=["'"," "]): result = chunk["choices"][0] print(result["text"], end='', flush=True) # noqa: T201 """ params = {**self._get_parameters(stop), **kwargs} result = self.client(prompt=prompt, stream=True, **params) for part in result: logprobs = part["choices"][0].get("logprobs", None) chunk = GenerationChunk( text=part["choices"][0]["text"], generation_info={"logprobs": logprobs}, ) if run_manager: run_manager.on_llm_new_token( token=chunk.text, verbose=self.verbose, log_probs=logprobs ) yield chunk
[docs] def get_num_tokens(self, text: str) -> int: tokenized_text = self.client.tokenize(text.encode("utf-8")) return len(tokenized_text)