import logging
from typing import Any, Dict, List, Optional
import requests
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.pydantic_v1 import Field, root_validator
from langchain_core.utils import get_from_dict_or_env
logger = logging.getLogger(__name__)
DEFAULT_TIME_OUT = 300
DEFAULT_CONTENT_TYPE_JSON = "application/json"
[docs]class OCIModelDeploymentLLM(LLM):
"""用于在OCI数据科学模型部署上部署LLM的基类。"""
auth: dict = Field(default_factory=dict, exclude=True)
"""用于OCI身份验证的ADS auth字典:
https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html。
可以通过调用`ads.common.auth.api_keys()`或`ads.common.auth.resource_principal()`来生成。
如果未提供此内容,则将使用`ads.common.default_signer()`。"""
max_tokens: int = 256
"""表示每代要预测的令牌数量。"""
temperature: float = 0.2
"""一个非负浮点数,用于调整生成过程中的随机程度。"""
k: int = 0
"""每个步骤考虑的最有可能的令牌数量。"""
p: float = 0.75
"""每一步需要考虑的标记的总概率质量。"""
endpoint: str = ""
"""部署的模型部署模型的端点的URI。"""
best_of: int = 1
"""生成服务器端的最佳完成并返回“最佳”(每个标记的对数概率最高的完成)。"""
stop: Optional[List[str]] = None
"""在生成模型输出时要使用的停用词。模型输出在出现这些子字符串的第一次截断。"""
@root_validator()
def validate_environment( # pylint: disable=no-self-argument
cls, values: Dict
) -> Dict:
"""验证Python包是否存在于环境中。"""
try:
import ads
except ImportError as ex:
raise ImportError(
"Could not import ads python package. "
"Please install it with `pip install oracle_ads`."
) from ex
if not values.get("auth", None):
values["auth"] = ads.common.auth.default_signer()
values["endpoint"] = get_from_dict_or_env(
values,
"endpoint",
"OCI_LLM_ENDPOINT",
)
return values
@property
def _default_params(self) -> Dict[str, Any]:
"""模型的默认参数。"""
raise NotImplementedError
@property
def _identifying_params(self) -> Dict[str, Any]:
"""获取识别参数。"""
return {
**{"endpoint": self.endpoint},
**self._default_params,
}
def _construct_json_body(self, prompt: str, params: dict) -> dict:
"""构造请求体作为一个字典(JSON)。"""
raise NotImplementedError
def _invocation_params(self, stop: Optional[List[str]], **kwargs: Any) -> dict:
"""将调用参数与默认参数结合在一起。"""
params = self._default_params
if self.stop is not None and stop is not None:
raise ValueError("`stop` found in both the input and default params.")
elif self.stop is not None:
params["stop"] = self.stop
elif stop is not None:
params["stop"] = stop
else:
# Don't set "stop" in param as None. It should be a list.
params["stop"] = []
return {**params, **kwargs}
def _process_response(self, response_json: dict) -> str:
raise NotImplementedError
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""调用OCI数据科学模型部署端点。
参数:
prompt (str):
传递给模型的提示。
stop (List[str], Optional):
生成时要使用的停止词列表。
kwargs:
requests_kwargs:
要传递给requests.post的额外``**kwargs``
返回:
模型生成的字符串。
示例:
.. code-block:: python
response = oci_md("Tell me a joke.")
"""
requests_kwargs = kwargs.pop("requests_kwargs", {})
params = self._invocation_params(stop, **kwargs)
body = self._construct_json_body(prompt, params)
logger.info(f"LLM API Request:\n{prompt}")
response = self._send_request(
data=body, endpoint=self.endpoint, **requests_kwargs
)
completion = self._process_response(response)
logger.info(f"LLM API Completion:\n{completion}")
return completion
def _send_request(
self,
data: Any,
endpoint: str,
header: Optional[dict] = {},
**kwargs: Any,
) -> Dict:
"""发送请求到oci数据科学模型部署端点。
参数:
data (Json可序列化):
需要发送到端点的数据。
endpoint (str):
模型HTTP端点。
header (dict, optional):
要发送到指定url的HTTP标头字典。
默认为{}。
kwargs:
传递给requests.post的额外``**kwargs``。
引发:
Exception:
在调用失败时引发。
返回:
requests.Response对象的JSON表示形式。
"""
if not header:
header = {}
header["Content-Type"] = (
header.pop("content_type", DEFAULT_CONTENT_TYPE_JSON)
or DEFAULT_CONTENT_TYPE_JSON
)
request_kwargs = {"json": data}
request_kwargs["headers"] = header
timeout = kwargs.pop("timeout", DEFAULT_TIME_OUT)
attempts = 0
while attempts < 2:
request_kwargs["auth"] = self.auth.get("signer")
response = requests.post(
endpoint, timeout=timeout, **request_kwargs, **kwargs
)
if response.status_code == 401:
self._refresh_signer()
attempts += 1
continue
break
try:
response.raise_for_status()
response_json = response.json()
except Exception:
logger.error(
"DEBUG INFO: request_kwargs=%s, status_code=%s, content=%s",
request_kwargs,
response.status_code,
response.content,
)
raise
return response_json
def _refresh_signer(self) -> None:
if self.auth.get("signer", None) and hasattr(
self.auth["signer"], "refresh_security_token"
):
self.auth["signer"].refresh_security_token()
[docs]class OCIModelDeploymentTGI(OCIModelDeploymentLLM):
"""OCI数据科学模型部署TGI端点。
要使用,必须提供已部署模型的模型HTTP端点,例如 https://<MD_OCID>/predict。
要进行身份验证,已使用 `oracle-ads` 自动加载凭据:https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html
确保具有访问OCI数据科学模型部署端点所需的策略。请参阅:
https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm#model_dep_policies_auth__predict-endpoint
示例:
.. code-block:: python
from langchain_community.llms import ModelDeploymentTGI
oci_md = ModelDeploymentTGI(endpoint="https://<MD_OCID>/predict")"""
do_sample: bool = True
"""如果设置为True,则此参数将启用解码策略,例如多项式抽样、束搜索多项式抽样、Top-K抽样和Top-p抽样。"""
watermark = True
"""使用“A Watermark for Large Language Models <https://arxiv.org/abs/2301.10226>`_”进行水印处理。默认为True。"""
return_full_text = False
"""是否将提示添加到生成的文本之前。默认为False。"""
@property
def _llm_type(self) -> str:
"""llm的返回类型。"""
return "oci_model_deployment_tgi_endpoint"
@property
def _default_params(self) -> Dict[str, Any]:
"""获取调用OCI模型部署TGI端点的默认参数。"""
return {
"best_of": self.best_of,
"max_new_tokens": self.max_tokens,
"temperature": self.temperature,
"top_k": self.k
if self.k > 0
else None, # `top_k` must be strictly positive'
"top_p": self.p,
"do_sample": self.do_sample,
"return_full_text": self.return_full_text,
"watermark": self.watermark,
}
def _construct_json_body(self, prompt: str, params: dict) -> dict:
return {
"inputs": prompt,
"parameters": params,
}
def _process_response(self, response_json: dict) -> str:
return str(response_json.get("generated_text", response_json)) + "\n"
[docs]class OCIModelDeploymentVLLM(OCIModelDeploymentLLM):
"""VLLM部署在OCI数据科学模型部署上
要使用,必须提供已部署模型的模型HTTP端点,例如 https://<MD_OCID>/predict。
为了进行身份验证,已使用`oracle-ads`自动加载凭据:https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html
确保具有访问OCI数据科学模型部署端点所需的策略。请参阅:
https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm#model_dep_policies_auth__predict-endpoint
示例:
.. code-block:: python
from langchain_community.llms import OCIModelDeploymentVLLM
oci_md = OCIModelDeploymentVLLM(
endpoint="https://<MD_OCID>/predict",
model="mymodel"
)"""
model: str
"""模型的名称。"""
n: int = 1
"""给定提示返回的输出序列数量。"""
k: int = -1
"""每个步骤考虑的最有可能的令牌数量。"""
frequency_penalty: float = 0.0
"""根据频率惩罚重复的标记。取值范围在0到1之间。"""
presence_penalty: float = 0.0
"""惩罚重复的标记。取值范围在0到1之间。"""
use_beam_search: bool = False
"""是否使用束搜索而不是采样。"""
ignore_eos: bool = False
"""是否忽略EOS标记并在生成EOS标记后继续生成标记。"""
logprobs: Optional[int] = None
"""每个输出标记返回的对数概率数量。"""
@property
def _llm_type(self) -> str:
"""llm的返回类型。"""
return "oci_model_deployment_vllm_endpoint"
@property
def _default_params(self) -> Dict[str, Any]:
"""获取调用vllm的默认参数。"""
return {
"best_of": self.best_of,
"frequency_penalty": self.frequency_penalty,
"ignore_eos": self.ignore_eos,
"logprobs": self.logprobs,
"max_tokens": self.max_tokens,
"model": self.model,
"n": self.n,
"presence_penalty": self.presence_penalty,
"stop": self.stop,
"temperature": self.temperature,
"top_k": self.k,
"top_p": self.p,
"use_beam_search": self.use_beam_search,
}
def _construct_json_body(self, prompt: str, params: dict) -> dict:
return {
"prompt": prompt,
**params,
}
def _process_response(self, response_json: dict) -> str:
return response_json["choices"][0]["text"]