Source code for langchain_community.llms.openllm

from __future__ import annotations

import copy
import json
import logging
from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    List,
    Literal,
    Optional,
    TypedDict,
    Union,
    overload,
)

from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
)
from langchain_core.language_models.llms import LLM
from langchain_core.pydantic_v1 import PrivateAttr

if TYPE_CHECKING:
    import openllm


ServerType = Literal["http", "grpc"]


[docs]class IdentifyingParams(TypedDict): """用于将模型识别为类型字典的参数。""" model_name: str model_id: Optional[str] server_url: Optional[str] server_type: Optional[ServerType] embedded: bool llm_kwargs: Dict[str, Any]
logger = logging.getLogger(__name__)
[docs]class OpenLLM(LLM): """OpenLLM支持本地模型实例和远程OpenLLM服务器。 要使用,您应该安装openllm库: .. code-block:: bash pip install openllm 了解更多信息:https://github.com/bentoml/openllm 示例:在本地使用OpenLLM管理的LLM模型运行: .. code-block:: python from langchain_community.llms import OpenLLM llm = OpenLLM( model_name='flan-t5', model_id='google/flan-t5-large', ) llm.invoke("What is the difference between a duck and a goose?") 对于所有可用的支持模型,您可以运行'openllm models'。 如果您有一个运行的OpenLLM服务器,您也可以远程使用它: .. code-block:: python from langchain_community.llms import OpenLLM llm = OpenLLM(server_url='http://localhost:3000') llm.invoke("What is the difference between a duck and a goose?")""" model_name: Optional[str] = None """要使用的模型名称。查看“openllm models”以获取所有可用模型。""" model_id: Optional[str] = None """要使用的模型ID。如果未提供,将使用模型名称的默认模型。 请查看“openllm models”以获取所有可用的模型变体。""" server_url: Optional[str] = None """当前运行LLMServer的可选服务器URL,使用'openllm start'。""" timeout: int = 30 """openllm客户端的超时时间""" server_type: ServerType = "http" """可选的服务器类型。可以是 'http' 或 'grpc'。""" embedded: bool = True """在当前进程中默认初始化此LLM实例。仅在与BentoML Service一起使用时设置为False。""" llm_kwargs: Dict[str, Any] """要传递给openllm.LLM的关键字参数""" _runner: Optional[openllm.LLMRunner] = PrivateAttr(default=None) _client: Union[ openllm.client.HTTPClient, openllm.client.GrpcClient, None ] = PrivateAttr(default=None) class Config: extra = "forbid" @overload def __init__( self, model_name: Optional[str] = ..., *, model_id: Optional[str] = ..., embedded: Literal[True, False] = ..., **llm_kwargs: Any, ) -> None: ... @overload def __init__( self, *, server_url: str = ..., server_type: Literal["grpc", "http"] = ..., **llm_kwargs: Any, ) -> None: ... def __init__( self, model_name: Optional[str] = None, *, model_id: Optional[str] = None, server_url: Optional[str] = None, timeout: int = 30, server_type: Literal["grpc", "http"] = "http", embedded: bool = True, **llm_kwargs: Any, ): try: import openllm except ImportError as e: raise ImportError( "Could not import openllm. Make sure to install it with " "'pip install openllm.'" ) from e llm_kwargs = llm_kwargs or {} if server_url is not None: logger.debug("'server_url' is provided, returning a openllm.Client") assert ( model_id is None and model_name is None ), "'server_url' and {'model_id', 'model_name'} are mutually exclusive" client_cls = ( openllm.client.HTTPClient if server_type == "http" else openllm.client.GrpcClient ) client = client_cls(server_url, timeout) super().__init__( **{ # type: ignore[arg-type] "server_url": server_url, "timeout": timeout, "server_type": server_type, "llm_kwargs": llm_kwargs, } ) self._runner = None # type: ignore self._client = client else: assert model_name is not None, "Must provide 'model_name' or 'server_url'" # since the LLM are relatively huge, we don't actually want to convert the # Runner with embedded when running the server. Instead, we will only set # the init_local here so that LangChain users can still use the LLM # in-process. Wrt to BentoML users, setting embedded=False is the expected # behaviour to invoke the runners remotely. # We need to also enable ensure_available to download and setup the model. runner = openllm.Runner( model_name=model_name, model_id=model_id, init_local=embedded, ensure_available=True, **llm_kwargs, ) super().__init__( **{ # type: ignore[arg-type] "model_name": model_name, "model_id": model_id, "embedded": embedded, "llm_kwargs": llm_kwargs, } ) self._client = None # type: ignore self._runner = runner @property def runner(self) -> openllm.LLMRunner: """获取与BentoML集成的底层openllm.LLMRunner实例。 示例: .. code-block:: python llm = OpenLLM( model_name='flan-t5', model_id='google/flan-t5-large', embedded=False, ) tools = load_tools(["serpapi", "llm-math"], llm=llm) agent = initialize_agent( tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION ) svc = bentoml.Service("langchain-openllm", runners=[llm.runner]) @svc.api(input=Text(), output=Text()) def chat(input_text: str): return agent.run(input_text) """ if self._runner is None: raise ValueError("OpenLLM must be initialized locally with 'model_name'") return self._runner @property def _identifying_params(self) -> IdentifyingParams: """获取识别参数。""" if self._client is not None: self.llm_kwargs.update(self._client._config) model_name = self._client._metadata.model_dump()["model_name"] model_id = self._client._metadata.model_dump()["model_id"] else: if self._runner is None: raise ValueError("Runner must be initialized.") model_name = self.model_name model_id = self.model_id try: self.llm_kwargs.update( json.loads(self._runner.identifying_params["configuration"]) ) except (TypeError, json.JSONDecodeError): pass return IdentifyingParams( server_url=self.server_url, server_type=self.server_type, embedded=self.embedded, llm_kwargs=self.llm_kwargs, model_name=model_name, model_id=model_id, ) @property def _llm_type(self) -> str: return "openllm_client" if self._client else "openllm" def _call( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: try: import openllm except ImportError as e: raise ImportError( "Could not import openllm. Make sure to install it with " "'pip install openllm'." ) from e copied = copy.deepcopy(self.llm_kwargs) copied.update(kwargs) config = openllm.AutoConfig.for_model( self._identifying_params["model_name"], **copied ) if self._client: res = ( self._client.generate(prompt, **config.model_dump(flatten=True)) .outputs[0] .text ) else: assert self._runner is not None res = self._runner(prompt, **config.model_dump(flatten=True)) if isinstance(res, dict) and "text" in res: return res["text"] elif isinstance(res, str): return res else: raise ValueError( "Expected result to be a dict with key 'text' or a string. " f"Received {res}" ) async def _acall( self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: try: import openllm except ImportError as e: raise ImportError( "Could not import openllm. Make sure to install it with " "'pip install openllm'." ) from e copied = copy.deepcopy(self.llm_kwargs) copied.update(kwargs) config = openllm.AutoConfig.for_model( self._identifying_params["model_name"], **copied ) if self._client: async_client = openllm.client.AsyncHTTPClient(self.server_url, self.timeout) res = ( (await async_client.generate(prompt, **config.model_dump(flatten=True))) .outputs[0] .text ) else: assert self._runner is not None ( prompt, generate_kwargs, postprocess_kwargs, ) = self._runner.llm.sanitize_parameters(prompt, **kwargs) generated_result = await self._runner.generate.async_run( prompt, **generate_kwargs ) res = self._runner.llm.postprocess_generate( prompt, generated_result, **postprocess_kwargs ) if isinstance(res, dict) and "text" in res: return res["text"] elif isinstance(res, str): return res else: raise ValueError( "Expected result to be a dict with key 'text' or a string. " f"Received {res}" )