Source code for langchain_community.llms.friendli

from __future__ import annotations

import os
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional

from langchain_core.callbacks.manager import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
)
from langchain_core.language_models.llms import LLM
from langchain_core.load.serializable import Serializable
from langchain_core.outputs import GenerationChunk, LLMResult
from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
from langchain_core.utils.env import get_from_dict_or_env
from langchain_core.utils.utils import convert_to_secret_str


def _stream_response_to_generation_chunk(stream_response: Any) -> GenerationChunk:
    """将流响应转换为生成块。"""
    if stream_response.event == "token_sampled":
        return GenerationChunk(
            text=stream_response.text,
            generation_info={"token": str(stream_response.token)},
        )
    return GenerationChunk(text="")


[docs]class BaseFriendli(Serializable): """Friendli的基类。""" # Friendli client. client: Any = Field(default=None, exclude=True) # Friendli Async client. async_client: Any = Field(default=None, exclude=True) # Model name to use. model: str = "mixtral-8x7b-instruct-v0-1" # Friendli personal access token to run as. friendli_token: Optional[SecretStr] = None # Friendli team ID to run as. friendli_team: Optional[str] = None # Whether to enable streaming mode. streaming: bool = False # Number between -2.0 and 2.0. Positive values penalizes tokens that have been # sampled, taking into account their frequency in the preceding text. This # penalization diminishes the model's tendency to reproduce identical lines # verbatim. frequency_penalty: Optional[float] = None # Number between -2.0 and 2.0. Positive values penalizes tokens that have been # sampled at least once in the existing text. presence_penalty: Optional[float] = None # The maximum number of tokens to generate. The length of your input tokens plus # `max_tokens` should not exceed the model's maximum length (e.g., 2048 for OpenAI # GPT-3) max_tokens: Optional[int] = None # When one of the stop phrases appears in the generation result, the API will stop # generation. The phrase is included in the generated result. If you are using # beam search, all of the active beams should contain the stop phrase to terminate # generation. Before checking whether a stop phrase is included in the result, the # phrase is converted into tokens. stop: Optional[List[str]] = None # Sampling temperature. Smaller temperature makes the generation result closer to # greedy, argmax (i.e., `top_k = 1`) sampling. If it is `None`, then 1.0 is used. temperature: Optional[float] = None # Tokens comprising the top `top_p` probability mass are kept for sampling. Numbers # between 0.0 (exclusive) and 1.0 (inclusive) are allowed. If it is `None`, then 1.0 # is used by default. top_p: Optional[float] = None @root_validator() def validate_environment(cls, values: Dict) -> Dict: """验证环境中是否提供了个人访问令牌。""" try: import friendli except ImportError as e: raise ImportError( "Could not import friendli-client python package. " "Please install it with `pip install friendli-client`." ) from e friendli_token = convert_to_secret_str( get_from_dict_or_env(values, "friendli_token", "FRIENDLI_TOKEN") ) values["friendli_token"] = friendli_token friendli_token_str = friendli_token.get_secret_value() friendli_team = values["friendli_team"] or os.getenv("FRIENDLI_TEAM") values["friendli_team"] = friendli_team values["client"] = values["client"] or friendli.Friendli( token=friendli_token_str, team_id=friendli_team ) values["async_client"] = values["async_client"] or friendli.AsyncFriendli( token=friendli_token_str, team_id=friendli_team ) return values
[docs]class Friendli(LLM, BaseFriendli): """友好的LLM。 应使用`pip install friendli-client`安装``friendli-client``包。 您必须设置``FRIENDLI_TOKEN``环境变量或提供您的个人访问令牌的值 作为``friendli_token``参数的值。 示例: .. code-block:: python from langchain_community.llms import Friendli friendli = Friendli( model="mixtral-8x7b-instruct-v0-1", friendli_token="YOUR FRIENDLI TOKEN" ) """ @property def lc_secrets(self) -> Dict[str, str]: return {"friendli_token": "FRIENDLI_TOKEN"} @property def _default_params(self) -> Dict[str, Any]: """获取调用Friendli完成API的默认参数。""" return { "frequency_penalty": self.frequency_penalty, "presence_penalty": self.presence_penalty, "max_tokens": self.max_tokens, "stop": self.stop, "temperature": self.temperature, "top_p": self.top_p, } @property def _identifying_params(self) -> Dict[str, Any]: """获取识别参数。""" return {"model": self.model, **self._default_params} @property def _llm_type(self) -> str: """llm的返回类型。""" return "friendli" def _get_invocation_params( self, stop: Optional[List[str]] = None, **kwargs: Any ) -> Dict[str, Any]: """获取用于调用模型的参数。""" params = self._default_params if self.stop is not None and stop is not None: raise ValueError("`stop` found in both the input and default params.") elif self.stop is not None: params["stop"] = self.stop else: params["stop"] = stop return {**params, **kwargs} def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: """调用Friendli的完成API。 参数: prompt(str):要生成完成的文本提示。 stop(Optional[List[str],可选):当生成结果中出现其中一个停止短语时,API将停止生成。停止短语将从结果中排除。如果启用了波束搜索,所有活动波束都应包含停止短语以终止生成。在检查结果中是否包含停止短语之前,将短语转换为标记。我们建议使用stop_tokens,因为这样更清晰。例如,在标记化之后,短语“clear”和“clear”由于前置空格字符而导致不同的标记序列。默认为None。 返回: str:生成的文本输出。 示例: .. code-block:: python response = frienldi("给我一个Old Fashioned鸡尾酒的食谱。") """ params = self._get_invocation_params(stop=stop, **kwargs) completion = self.client.completions.create( model=self.model, prompt=prompt, stream=False, **params ) return completion.choices[0].text async def _acall( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: """异步调用Friendli的完成API。 参数: prompt (str): 生成完成的文本提示。 stop (Optional[List[str]], optional): 当生成结果中出现停止短语之一时,API将停止生成。停止短语将从结果中排除。如果启用了波束搜索,所有活动波束都应包含停止短语以终止生成。在检查结果中是否包含停止短语之前,将短语转换为标记。我们建议使用stop_tokens,因为这样更清晰。例如,在标记化之后,短语"clear"和" clear"由于前置空格字符而可能导致不同的标记序列。默认为None。 返回: str: 生成的文本输出。 示例: .. code-block:: python response = await frienldi("Tell me a joke.") """ params = self._get_invocation_params(stop=stop, **kwargs) completion = await self.async_client.completions.create( model=self.model, prompt=prompt, stream=False, **params ) return completion.choices[0].text def _stream( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> Iterator[GenerationChunk]: params = self._get_invocation_params(stop=stop, **kwargs) stream = self.client.completions.create( model=self.model, prompt=prompt, stream=True, **params ) for line in stream: chunk = _stream_response_to_generation_chunk(line) yield chunk if run_manager: run_manager.on_llm_new_token(line.text, chunk=chunk) async def _astream( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, **kwargs: Any, ) -> AsyncIterator[GenerationChunk]: params = self._get_invocation_params(stop=stop, **kwargs) stream = await self.async_client.completions.create( model=self.model, prompt=prompt, stream=True, **params ) async for line in stream: chunk = _stream_response_to_generation_chunk(line) yield chunk if run_manager: await run_manager.on_llm_new_token(line.text, chunk=chunk) def _generate( self, prompts: list[str], stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> LLMResult: """使用k个唯一提示调用Friendli的完成API。 参数: prompt (str): 用于生成完成的文本提示。 stop (Optional[List[str]], optional): 当生成结果中出现停止短语之一时,API将停止生成。停止短语将从结果中排除。如果启用波束搜索,所有活动波束都应包含停止短语以终止生成。在检查结果中是否包含停止短语之前,将短语转换为标记。我们建议使用stop_tokens,因为这样更清晰。例如,在标记化之后,短语"clear"和" clear"由于前置空格字符而可能导致不同的标记序列。默认为None。 返回: str: 生成的文本输出。 示例: .. code-block:: python response = frienldi.generate(["Tell me a joke."]) """ llm_output = {"model": self.model} if self.streaming: if len(prompts) > 1: raise ValueError("Cannot stream results with multiple prompts.") generation: Optional[GenerationChunk] = None for chunk in self._stream(prompts[0], stop, run_manager, **kwargs): if generation is None: generation = chunk else: generation += chunk assert generation is not None return LLMResult(generations=[[generation]], llm_output=llm_output) llm_result = super()._generate(prompts, stop, run_manager, **kwargs) llm_result.llm_output = llm_output return llm_result async def _agenerate( self, prompts: list[str], stop: Optional[List[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, **kwargs: Any, ) -> LLMResult: """使用k个唯一提示异步调用Friendli的完成API。 参数: prompt(str):要为其生成完成的文本提示。 stop(Optional[List[str],optional):当生成结果中出现停止短语之一时,API将停止生成。停止短语将从结果中排除。如果启用波束搜索,所有活动波束都应包含停止短语以终止生成。在检查结果中是否包含停止短语之前,将短语转换为标记。我们建议使用stop_tokens,因为这样更清晰。例如,在标记化之后,短语“clear”和“clear”由于前置空格字符而导致不同的标记序列。默认为None。 返回: str:生成的文本输出。 示例: .. code-block:: python response = await frienldi.agenerate( ["给我一个老式鸡尾酒的食谱。"] ) """ llm_output = {"model": self.model} if self.streaming: if len(prompts) > 1: raise ValueError("Cannot stream results with multiple prompts.") generation = None async for chunk in self._astream(prompts[0], stop, run_manager, **kwargs): if generation is None: generation = chunk else: generation += chunk assert generation is not None return LLMResult(generations=[[generation]], llm_output=llm_output) llm_result = await super()._agenerate(prompts, stop, run_manager, **kwargs) llm_result.llm_output = llm_output return llm_result