"""加载数据集和评估器。"""
from typing import Any, Dict, List, Optional, Sequence, Type, Union
from langchain_core.language_models import BaseLanguageModel
from langchain.chains.base import Chain
from langchain.evaluation.agents.trajectory_eval_chain import TrajectoryEvalChain
from langchain.evaluation.comparison import PairwiseStringEvalChain
from langchain.evaluation.comparison.eval_chain import LabeledPairwiseStringEvalChain
from langchain.evaluation.criteria.eval_chain import (
CriteriaEvalChain,
LabeledCriteriaEvalChain,
)
from langchain.evaluation.embedding_distance.base import (
EmbeddingDistanceEvalChain,
PairwiseEmbeddingDistanceEvalChain,
)
from langchain.evaluation.exact_match.base import ExactMatchStringEvaluator
from langchain.evaluation.parsing.base import (
JsonEqualityEvaluator,
JsonValidityEvaluator,
)
from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator
from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator
from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator
from langchain.evaluation.schema import EvaluatorType, LLMEvalChain, StringEvaluator
from langchain.evaluation.scoring.eval_chain import (
LabeledScoreStringEvalChain,
ScoreStringEvalChain,
)
from langchain.evaluation.string_distance.base import (
PairwiseStringDistanceEvalChain,
StringDistanceEvalChain,
)
[docs]def load_dataset(uri: str) -> List[Dict]:
"""从`HuggingFace上的LangChainDatasets <https://huggingface.co/LangChainDatasets>`_加载数据集。
参数:
uri: 要加载的数据集的uri。
返回:
一个字典列表,每个字典代表数据集中的一行。
**先决条件**
.. code-block:: shell
pip install datasets
示例
--------
.. code-block:: python
from langchain.evaluation import load_dataset
ds = load_dataset("llm-math")
""" # noqa: E501
try:
from datasets import load_dataset
except ImportError:
raise ImportError(
"load_dataset requires the `datasets` package."
" Please install with `pip install datasets`"
)
dataset = load_dataset(f"LangChainDatasets/{uri}")
return [d for d in dataset["train"]]
_EVALUATOR_MAP: Dict[
EvaluatorType, Union[Type[LLMEvalChain], Type[Chain], Type[StringEvaluator]]
] = {
EvaluatorType.QA: QAEvalChain,
EvaluatorType.COT_QA: CotQAEvalChain,
EvaluatorType.CONTEXT_QA: ContextQAEvalChain,
EvaluatorType.PAIRWISE_STRING: PairwiseStringEvalChain,
EvaluatorType.SCORE_STRING: ScoreStringEvalChain,
EvaluatorType.LABELED_PAIRWISE_STRING: LabeledPairwiseStringEvalChain,
EvaluatorType.LABELED_SCORE_STRING: LabeledScoreStringEvalChain,
EvaluatorType.AGENT_TRAJECTORY: TrajectoryEvalChain,
EvaluatorType.CRITERIA: CriteriaEvalChain,
EvaluatorType.LABELED_CRITERIA: LabeledCriteriaEvalChain,
EvaluatorType.STRING_DISTANCE: StringDistanceEvalChain,
EvaluatorType.PAIRWISE_STRING_DISTANCE: PairwiseStringDistanceEvalChain,
EvaluatorType.EMBEDDING_DISTANCE: EmbeddingDistanceEvalChain,
EvaluatorType.PAIRWISE_EMBEDDING_DISTANCE: PairwiseEmbeddingDistanceEvalChain,
EvaluatorType.JSON_VALIDITY: JsonValidityEvaluator,
EvaluatorType.JSON_EQUALITY: JsonEqualityEvaluator,
EvaluatorType.JSON_EDIT_DISTANCE: JsonEditDistanceEvaluator,
EvaluatorType.JSON_SCHEMA_VALIDATION: JsonSchemaEvaluator,
EvaluatorType.REGEX_MATCH: RegexMatchStringEvaluator,
EvaluatorType.EXACT_MATCH: ExactMatchStringEvaluator,
}
[docs]def load_evaluator(
evaluator: EvaluatorType,
*,
llm: Optional[BaseLanguageModel] = None,
**kwargs: Any,
) -> Union[Chain, StringEvaluator]:
"""加载指定的评估链。
参数
----------
evaluator:EvaluatorType
要加载的评估器类型。
llm:BaseLanguageModel,可选
用于评估的语言模型,默认为None。
**kwargs:Any
要传递给评估器的其他关键字参数。
返回
-------
Chain
加载的评估链。
示例
--------
>>> from langchain.evaluation import load_evaluator, EvaluatorType
>>> evaluator = load_evaluator(EvaluatorType.QA)
"""
if evaluator not in _EVALUATOR_MAP:
raise ValueError(
f"Unknown evaluator type: {evaluator}"
f"\nValid types are: {list(_EVALUATOR_MAP.keys())}"
)
evaluator_cls = _EVALUATOR_MAP[evaluator]
if issubclass(evaluator_cls, LLMEvalChain):
try:
try:
from langchain_openai import ChatOpenAI
except ImportError:
try:
from langchain_community.chat_models.openai import ChatOpenAI
except ImportError:
raise ImportError(
"Could not import langchain_openai or fallback onto "
"langchain_community. Please install langchain_openai "
"or specify a language model explicitly. "
"It's recommended to install langchain_openai AND "
"specify a language model explicitly."
)
llm = llm or ChatOpenAI( # type: ignore[call-arg]
model="gpt-4", model_kwargs={"seed": 42}, temperature=0
)
except Exception as e:
raise ValueError(
f"Evaluation with the {evaluator_cls} requires a "
"language model to function."
" Failed to create the default 'gpt-4' model."
" Please manually provide an evaluation LLM"
" or check your openai credentials."
) from e
return evaluator_cls.from_llm(llm=llm, **kwargs)
else:
return evaluator_cls(**kwargs)
[docs]def load_evaluators(
evaluators: Sequence[EvaluatorType],
*,
llm: Optional[BaseLanguageModel] = None,
config: Optional[dict] = None,
**kwargs: Any,
) -> List[Union[Chain, StringEvaluator]]:
"""加载由评估器类型列表指定的评估器。
参数
----------
evaluators:Sequence[EvaluatorType]
要加载的评估器类型列表。
llm:BaseLanguageModel,可选
用于评估的语言模型,如果未提供,则将使用默认的ChatOpenAI gpt-4模型。
config:dict,可选
将评估器类型映射到其他关键字参数的字典,默认为None。
**kwargs:Any
传递给所有评估器的其他关键字参数。
返回
-------
List[Chain]
加载的评估器。
示例
--------
>>> from langchain.evaluation import load_evaluators, EvaluatorType
>>> evaluators = [EvaluatorType.QA, EvaluatorType.CRITERIA]
>>> loaded_evaluators = load_evaluators(evaluators, criteria="helpfulness")
"""
loaded = []
for evaluator in evaluators:
_kwargs = config.get(evaluator, {}) if config else {}
loaded.append(load_evaluator(evaluator, llm=llm, **{**kwargs, **_kwargs}))
return loaded