Source code for langchain_community.llms.chatglm

import logging
from typing import Any, List, Mapping, Optional

import requests
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM

from langchain_community.llms.utils import enforce_stop_tokens

logger = logging.getLogger(__name__)


[docs]class ChatGLM(LLM):
    """ChatGLM LLM 服务。

    示例:
        .. code-block:: python

            from langchain_community.llms import ChatGLM
            endpoint_url = (
                "http://127.0.0.1:8000"
            )
            ChatGLM_llm = ChatGLM(
                endpoint_url=endpoint_url
            )"""

    endpoint_url: str = "http://127.0.0.1:8000/"
    """要使用的端点URL。"""
    model_kwargs: Optional[dict] = None
    """传递给模型的关键字参数。"""
    max_token: int = 20000
    """模型允许通过的最大令牌数。"""
    temperature: float = 0.1
    """LLM模型温度从0到10。"""
    history: List[List] = []
    """对话历史"""
    top_p: float = 0.7
    """从0到1中对核采样的前P个。"""
    with_history: bool = False
    """是否使用历史记录。"""

    @property
    def _llm_type(self) -> str:
        return "chat_glm"

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """获取识别参数。"""
        _model_kwargs = self.model_kwargs or {}
        return {
            **{"endpoint_url": self.endpoint_url},
            **{"model_kwargs": _model_kwargs},
        }

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """调用ChatGLM LLM推理端点。

参数：
    prompt: 传递给模型的提示。
    stop: 生成时可选的停止词列表。

返回：
    模型生成的字符串。

示例：
    .. code-block:: python

        response = chatglm_llm.invoke("Who are you?")
"""

        _model_kwargs = self.model_kwargs or {}

        # HTTP headers for authorization
        headers = {"Content-Type": "application/json"}

        payload = {
            "prompt": prompt,
            "temperature": self.temperature,
            "history": self.history,
            "max_length": self.max_token,
            "top_p": self.top_p,
        }
        payload.update(_model_kwargs)
        payload.update(kwargs)

        logger.debug(f"ChatGLM payload: {payload}")

        # call api
        try:
            response = requests.post(self.endpoint_url, headers=headers, json=payload)
        except requests.exceptions.RequestException as e:
            raise ValueError(f"Error raised by inference endpoint: {e}")

        logger.debug(f"ChatGLM response: {response}")

        if response.status_code != 200:
            raise ValueError(f"Failed with response: {response}")

        try:
            parsed_response = response.json()

            # Check if response content does exists
            if isinstance(parsed_response, dict):
                content_keys = "response"
                if content_keys in parsed_response:
                    text = parsed_response[content_keys]
                else:
                    raise ValueError(f"No content in response : {parsed_response}")
            else:
                raise ValueError(f"Unexpected response type: {parsed_response}")

        except requests.exceptions.JSONDecodeError as e:
            raise ValueError(
                f"Error raised during decoding response from inference endpoint: {e}."
                f"\nResponse: {response.text}"
            )

        if stop is not None:
            text = enforce_stop_tokens(text, stop)
        if self.with_history:
            self.history = parsed_response["history"]
        return text