Skip to content

集成

ragas.integrations.langchain

EvaluatorChain

EvaluatorChain(metric: Metric, **kwargs: Any)

Bases: Chain, RunEvaluator

Wrapper around ragas Metrics to use them with langsmith.

Source code in src/ragas/integrations/langchain.py
def __init__(self, metric: Metric, **kwargs: t.Any):
    kwargs["metric"] = metric
    super().__init__(**kwargs)
    if "run_config" in kwargs:
        run_config = kwargs["run_config"]
    else:
        run_config = RunConfig()
    if isinstance(self.metric, MetricWithLLM):
        llm = get_or_init(kwargs, "llm", ChatOpenAI)
        t.cast(MetricWithLLM, self.metric).llm = LangchainLLMWrapper(llm)
    if isinstance(self.metric, MetricWithEmbeddings):
        embeddings = get_or_init(kwargs, "embeddings", OpenAIEmbeddings)
        t.cast(MetricWithEmbeddings, self.metric).embeddings = (
            LangchainEmbeddingsWrapper(embeddings)
        )
    self.metric.init(run_config)

    assert isinstance(
        self.metric, SingleTurnMetric
    ), "Metric must be SingleTurnMetric"

evaluate_run

evaluate_run(
    run: Run, example: Optional[Example] = None
) -> EvaluationResult

Evaluate a langsmith run

Source code in src/ragas/integrations/langchain.py
def evaluate_run(
    self, run: Run, example: t.Optional[Example] = None
) -> EvaluationResult:
    """
    Evaluate a langsmith run
    """
    self._validate_langsmith_eval(run, example)

    # this is just to suppress the type checker error
    # actual check and error message is in the _validate_langsmith_eval
    assert run.outputs is not None
    assert example is not None
    assert example.inputs is not None
    assert example.outputs is not None

    chain_eval = run.outputs
    chain_eval["question"] = example.inputs["question"]
    if "ground_truth" in get_required_columns_v1(self.metric):
        if example.outputs is None or "ground_truth" not in example.outputs:
            raise ValueError("expected `ground_truth` in example outputs.")
        chain_eval["ground_truth"] = example.outputs["ground_truth"]
    eval_output = self.invoke(chain_eval, include_run_info=True)

    evaluation_result = EvaluationResult(
        key=self.metric.name, score=eval_output[self.metric.name]
    )
    if RUN_KEY in eval_output:
        evaluation_result.evaluator_info[RUN_KEY] = eval_output[RUN_KEY]
    return evaluation_result

ragas.integrations.langsmith

upload_dataset

upload_dataset(
    dataset: Testset,
    dataset_name: str,
    dataset_desc: str = "",
) -> Dataset

Uploads a new dataset to LangSmith, converting it from a TestDataset object to a pandas DataFrame before upload. If a dataset with the specified name already exists, the function raises an error.

Parameters:

Name Type Description Default
dataset TestDataset

The dataset to be uploaded.

required
dataset_name str

The name for the new dataset in LangSmith.

required
dataset_desc str

A description for the new dataset. The default is an empty string.

''

Returns:

Type Description
Dataset

The dataset object as stored in LangSmith after upload.

Raises:

Type Description
ValueError

If a dataset with the specified name already exists in LangSmith.

Notes

The function attempts to read a dataset by the given name to check its existence. If not found, it proceeds to upload the dataset after converting it to a pandas DataFrame. This involves specifying input and output keys for the dataset being uploaded.

Source code in src/ragas/integrations/langsmith.py
def upload_dataset(
    dataset: Testset, dataset_name: str, dataset_desc: str = ""
) -> LangsmithDataset:
    """
    Uploads a new dataset to LangSmith, converting it from a TestDataset object to a
    pandas DataFrame before upload. If a dataset with the specified name already
    exists, the function raises an error.

    Parameters
    ----------
    dataset : TestDataset
        The dataset to be uploaded.
    dataset_name : str
        The name for the new dataset in LangSmith.
    dataset_desc : str, optional
        A description for the new dataset. The default is an empty string.

    Returns
    -------
    LangsmithDataset
        The dataset object as stored in LangSmith after upload.

    Raises
    ------
    ValueError
        If a dataset with the specified name already exists in LangSmith.

    Notes
    -----
    The function attempts to read a dataset by the given name to check its existence.
    If not found, it proceeds to upload the dataset after converting it to a pandas
    DataFrame. This involves specifying input and output keys for the dataset being
    uploaded.
    """
    client = Client()
    try:
        # check if dataset exists
        langsmith_dataset: LangsmithDataset = client.read_dataset(
            dataset_name=dataset_name
        )
        raise ValueError(
            f"Dataset {dataset_name} already exists in langsmith. [{langsmith_dataset}]"
        )
    except LangSmithNotFoundError:
        # if not create a new one with the generated query examples
        langsmith_dataset: LangsmithDataset = client.upload_dataframe(
            df=dataset.to_pandas(),
            name=dataset_name,
            input_keys=["question"],
            output_keys=["ground_truth"],
            description=dataset_desc,
        )

        print(
            f"Created a new dataset '{langsmith_dataset.name}'. Dataset is accessible at {langsmith_dataset.url}"
        )
        return langsmith_dataset

evaluate

evaluate(
    dataset_name: str,
    llm_or_chain_factory: Any,
    experiment_name: Optional[str] = None,
    metrics: Optional[list] = None,
    verbose: bool = False,
) -> Dict[str, Any]

Evaluates a language model or a chain factory on a specified dataset using LangSmith, with the option to customize metrics and verbosity.

Parameters:

Name Type Description Default
dataset_name str

The name of the dataset to use for evaluation. This dataset must exist in LangSmith.

required
llm_or_chain_factory Any

The language model or chain factory to be evaluated. This parameter is flexible and can accept a variety of objects depending on the implementation.

required
experiment_name Optional[str]

The name of the experiment. This can be used to categorize or identify the evaluation run within LangSmith. The default is None.

None
metrics Optional[list]

A list of custom metrics (functions or evaluators) to be used for the evaluation. If None, a default set of metrics (answer relevancy, context precision, context recall, and faithfulness) are used. The default is None.

None
verbose bool

If True, detailed progress and results will be printed during the evaluation process. The default is False.

False

Returns:

Type Description
Dict[str, Any]

A dictionary containing the results of the evaluation.

Raises:

Type Description
ValueError

If the specified dataset does not exist in LangSmith.

See Also

Client.read_dataset : Method to read an existing dataset. Client.run_on_dataset : Method to run the evaluation on the specified dataset.

Examples:

>>> results = evaluate(
...     dataset_name="MyDataset",
...     llm_or_chain_factory=my_llm,
...     experiment_name="experiment_1_with_vanila_rag",
...     verbose=True
... )
>>> print(results)
{'evaluation_result': ...}
Notes

The function initializes a client to interact with LangSmith, validates the existence of the specified dataset, prepares evaluation metrics, and runs the evaluation, returning the results. Custom evaluation metrics can be specified, or a default set will be used if none are provided.

Source code in src/ragas/integrations/langsmith.py
def evaluate(
    dataset_name: str,
    llm_or_chain_factory: t.Any,
    experiment_name: t.Optional[str] = None,
    metrics: t.Optional[list] = None,
    verbose: bool = False,
) -> t.Dict[str, t.Any]:
    """
    Evaluates a language model or a chain factory on a specified dataset using
    LangSmith, with the option to customize metrics and verbosity.

    Parameters
    ----------
    dataset_name : str
        The name of the dataset to use for evaluation. This dataset must exist in
        LangSmith.
    llm_or_chain_factory : Any
        The language model or chain factory to be evaluated. This parameter is
        flexible and can accept a variety of objects depending on the implementation.
    experiment_name : Optional[str], optional
        The name of the experiment. This can be used to categorize or identify the
        evaluation run within LangSmith. The default is None.
    metrics : Optional[list], optional
        A list of custom metrics (functions or evaluators) to be used for the
        evaluation. If None, a default set of metrics (answer relevancy, context
        precision, context recall, and faithfulness) are used.
        The default is None.
    verbose : bool, optional
        If True, detailed progress and results will be printed during the evaluation
        process.
        The default is False.

    Returns
    -------
    Dict[str, Any]
        A dictionary containing the results of the evaluation.

    Raises
    ------
    ValueError
        If the specified dataset does not exist in LangSmith.

    See Also
    --------
    Client.read_dataset : Method to read an existing dataset.
    Client.run_on_dataset : Method to run the evaluation on the specified dataset.

    Examples
    --------
    >>> results = evaluate(
    ...     dataset_name="MyDataset",
    ...     llm_or_chain_factory=my_llm,
    ...     experiment_name="experiment_1_with_vanila_rag",
    ...     verbose=True
    ... )
    >>> print(results)
    {'evaluation_result': ...}

    Notes
    -----
    The function initializes a client to interact with LangSmith, validates the existence
    of the specified dataset, prepares evaluation metrics, and runs the evaluation,
    returning the results. Custom evaluation metrics can be specified, or a default set
    will be used if none are provided.
    """
    # init client and validate dataset
    client = Client()
    try:
        _ = client.read_dataset(dataset_name=dataset_name)
    except LangSmithNotFoundError:
        raise ValueError(
            f"Dataset {dataset_name} not found in langsmith, make sure it exists in langsmith"
        )

    # make config
    if metrics is None:
        from ragas.metrics import (
            answer_relevancy,
            context_precision,
            context_recall,
            faithfulness,
        )

        metrics = [answer_relevancy, context_precision, faithfulness, context_recall]

    metrics = [EvaluatorChain(m) for m in metrics]
    eval_config = RunEvalConfig(
        custom_evaluators=metrics,
    )

    # run evaluation with langsmith
    run = client.run_on_dataset(
        dataset_name=dataset_name,
        llm_or_chain_factory=llm_or_chain_factory,
        evaluation=eval_config,
        verbose=verbose,
        # Any experiment metadata can be specified here
        project_name=experiment_name,
    )

    return run

ragas.integrations.llama_index

ragas.integrations.opik

OpikTracer

Bases: OpikTracer

Callback for Opik that can be used to log traces and evaluation scores to the Opik platform.

Attributes:

Name Type Description
tags list[string]

The tags to set on each trace.

metadata dict

Additional metadata to log for each trace.

ragas.integrations.helicone