dask_ml.metrics.classification 源代码

from typing import Optional

import dask
import dask.array as da
import numpy as np
import sklearn.metrics
import sklearn.utils.multiclass

from .._typing import ArrayLike


[文档]def accuracy_score( y_true: ArrayLike, y_pred: ArrayLike, normalize: bool = True, sample_weight: Optional[ArrayLike] = None, compute: bool = True, ) -> ArrayLike: """Accuracy classification score. In multilabel classification, this function computes subset accuracy: the set of labels predicted for a sample must *exactly* match the corresponding set of labels in y_true. Read more in the :ref:`User Guide <accuracy_score>`. Parameters ---------- y_true : 1d array-like, or label indicator array Ground truth (correct) labels. y_pred : 1d array-like, or label indicator array Predicted labels, as returned by a classifier. normalize : bool, optional (default=True) If ``False``, return the number of correctly classified samples. Otherwise, return the fraction of correctly classified samples. sample_weight : 1d array-like, optional Sample weights. .. versionadded:: 0.7.0 Returns ------- score : scalar dask Array If ``normalize == True``, return the correctly classified samples (float), else it returns the number of correctly classified samples (int). The best performance is 1 with ``normalize == True`` and the number of samples with ``normalize == False``. Notes ----- In binary and multiclass classification, this function is equal to the ``jaccard_similarity_score`` function. Examples -------- >>> import dask.array as da >>> import numpy as np >>> from dask_ml.metrics import accuracy_score >>> y_pred = da.from_array(np.array([0, 2, 1, 3]), chunks=2) >>> y_true = da.from_array(np.array([0, 1, 2, 3]), chunks=2) >>> accuracy_score(y_true, y_pred) dask.array<mean_agg-aggregate, shape=(), dtype=float64, chunksize=()> >>> _.compute() 0.5 >>> accuracy_score(y_true, y_pred, normalize=False).compute() 2 In the multilabel case with binary label indicators: >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) 0.5 """ if y_true.ndim > 1: differing_labels = ((y_true - y_pred) == 0).all(1) score = differing_labels != 0 else: score = y_true == y_pred if normalize: score = da.average(score, weights=sample_weight) elif sample_weight is not None: score = da.dot(score, sample_weight) else: score = score.sum() if compute: score = score.compute() return score
def _log_loss_inner( x: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike], **kwargs ): # da.map_blocks wasn't able to concatenate together the results # when we reduce down to a scalar per block. So we make an # array with 1 element. if sample_weight is not None: sample_weight = sample_weight.ravel() return np.array( [sklearn.metrics.log_loss(x, y, sample_weight=sample_weight, **kwargs)] )
[文档]def log_loss( y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None, labels=None ): if not (dask.is_dask_collection(y_true) and dask.is_dask_collection(y_pred)): return sklearn.metrics.log_loss( y_true, y_pred, eps=eps, normalize=normalize, sample_weight=sample_weight, labels=labels, ) if y_pred.ndim > 1 and y_true.ndim == 1: y_true = y_true.reshape(-1, 1) drop_axis: Optional[int] = 1 if sample_weight is not None: sample_weight = sample_weight.reshape(-1, 1) else: drop_axis = None result = da.map_blocks( _log_loss_inner, y_true, y_pred, sample_weight, chunks=(1,), drop_axis=drop_axis, dtype="f8", eps=eps, normalize=normalize, labels=labels, ) if normalize and sample_weight is not None: sample_weight = sample_weight.ravel() block_weights = sample_weight.map_blocks(np.sum, chunks=(1,), keepdims=True) return da.average(result, 0, weights=block_weights) elif normalize: return result.mean() else: return result.sum()
log_loss.__doc__ = getattr(sklearn.metrics.log_loss, "__doc__")