Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,111 changes: 1,111 additions & 0 deletions examples/cookbook/metrics.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/evidently/future/metric_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def labels(self) -> List[Label]:
return list(self.values.keys())

def get_label_result(self, label: Label) -> SingleValue:
value = SingleValue(self.values[label])
value = SingleValue(self.values[str(label)])
metric = self.metric
value._metric = metric
if not isinstance(metric, ByLabelCalculation):
Expand Down
2 changes: 2 additions & 0 deletions src/evidently/metric_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class PredictionData(MetricResult):
class Config:
type_alias = "evidently:metric_result:PredictionData"
dict_include = False
smart_union = True

predictions: pd.Series
labels: LabelList
Expand Down Expand Up @@ -151,6 +152,7 @@ class Config:
dict_exclude_fields = {"task", "target_type"}
pd_include = False
tags = {IncludeTags.Parameter}
smart_union = True

utility_columns: DatasetUtilityColumns
target_type: Optional[str]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class Config:
"reference_matrix": {IncludeTags.Reference},
"target_names": {IncludeTags.Parameter},
}
smart_union = True

current_matrix: ConfusionMatrix
reference_matrix: Optional[ConfusionMatrix]
Expand Down
5 changes: 3 additions & 2 deletions src/evidently/metrics/classification_performance/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Config:
class ClassificationReport(MetricResult):
class Config:
type_alias = "evidently:metric_result:ClassificationReport"
smart_union = True

classes: ClassesMetrics
accuracy: float
Expand Down Expand Up @@ -59,6 +60,6 @@ def create(
if not isinstance(v, dict):
continue
v["f1"] = v.pop("f1-score")
class_metrics = {k: parse_obj_as(ClassMetric, report[str(k)]) for k in classes}
other = {k: v for k, v in report.items() if k not in [str(cl) for cl in classes]}
class_metrics = {str(k): parse_obj_as(ClassMetric, report[str(k)]) for k in classes}
other = {str(k): v for k, v in report.items() if k not in [str(cl) for cl in classes]}
return parse_obj_as(cls, {"classes": class_metrics, **other})
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
class ClassificationQuality(MetricResult):
class Config:
type_alias = "evidently:metric_result:ClassificationQuality"
smart_union = True

metrics: ClassesMetrics
roc_aucs: Optional[List[float]]
Expand All @@ -46,6 +47,7 @@ class Config:
"reference": {IncludeTags.Reference},
"columns": {IncludeTags.Parameter},
}
smart_union = True

columns: DatasetColumns
current: ClassificationQuality
Expand Down Expand Up @@ -97,7 +99,7 @@ def calculate(self, data: InputData) -> ClassificationQualityByClassResult:
binaraized_target, prediction.prediction_probas, average=None
).tolist()
for idx, item in enumerate(list(prediction.prediction_probas.columns)):
metrics_matrix[item].roc_auc = current_roc_aucs[idx]
metrics_matrix[str(item)].roc_auc = current_roc_aucs[idx]
reference_roc_aucs = None

reference = None
Expand All @@ -118,7 +120,7 @@ def calculate(self, data: InputData) -> ClassificationQualityByClassResult:
binaraized_target, ref_prediction.prediction_probas, average=None
).tolist()
for idx, item in enumerate(list(ref_prediction.prediction_probas.columns)):
ref_metrics[item].roc_auc = reference_roc_aucs[idx]
ref_metrics[str(item)].roc_auc = reference_roc_aucs[idx]
reference = ClassificationQuality(metrics=ref_metrics, roc_aucs=reference_roc_aucs)
return ClassificationQualityByClassResult(
columns=columns,
Expand All @@ -141,7 +143,7 @@ def render_html(self, obj: ClassificationQualityByClass) -> List[BaseWidgetInfo]
names = metrics_frame.columns.tolist()
if columns.target_names is not None and isinstance(columns.target_names, dict):
# todo: refactor columns data typing
names = [columns.target_names[int(x)] for x in names] # type: ignore
names = [columns.target_names.get(x) or columns.target_names.get(int(x)) for x in names] # type: ignore
z = metrics_frame.iloc[:-1].values
x = list(map(str, names))
y = ["precision", "recall", "f1-score"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

import pandas as pd
from sklearn import metrics
Expand All @@ -13,6 +15,7 @@
from evidently.metric_results import ROCCurve
from evidently.metric_results import ROCCurveData
from evidently.model.widget import BaseWidgetInfo
from evidently.pipeline.column_mapping import TargetNames
from evidently.renderers.base_renderer import MetricRenderer
from evidently.renderers.base_renderer import default_renderer
from evidently.renderers.html_widgets import TabData
Expand Down Expand Up @@ -46,18 +49,35 @@ def calculate(self, data: InputData) -> ClassificationRocCurveResults:
curr_predictions = get_prediction_data(data.current_data, dataset_columns, data.column_mapping.pos_label)
if curr_predictions.prediction_probas is None:
raise ValueError("Roc Curve can be calculated only on binary probabilistic predictions")
curr_roc_curve = self.calculate_metrics(data.current_data[target_name], curr_predictions)
curr_roc_curve = self.calculate_metrics(
data.current_data[target_name], curr_predictions, dataset_columns.target_names
)
ref_roc_curve = None
if data.reference_data is not None:
ref_predictions = get_prediction_data(data.reference_data, dataset_columns, data.column_mapping.pos_label)
ref_roc_curve = self.calculate_metrics(data.reference_data[target_name], ref_predictions)
ref_roc_curve = self.calculate_metrics(
data.reference_data[target_name], ref_predictions, dataset_columns.target_names
)
return ClassificationRocCurveResults(
current_roc_curve=curr_roc_curve,
reference_roc_curve=ref_roc_curve,
)

def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData) -> ROCCurve:
def calculate_metrics(
self,
target_data: pd.Series,
prediction: PredictionData,
target_names: Optional[TargetNames],
) -> ROCCurve:
labels = prediction.labels
tn: Dict[Union[int, str], str] = {}
if target_names is None:
tn = {}
elif isinstance(target_names, list):
tn = {idx: value for idx, value in enumerate(target_names)}
elif isinstance(target_names, dict):
tn = target_names

if prediction.prediction_probas is None:
raise ValueError("Roc Curve can be calculated only on binary probabilistic predictions")
binaraized_target = (target_data.to_numpy().reshape(-1, 1) == labels).astype(int)
Expand All @@ -75,8 +95,9 @@ def calculate_metrics(self, target_data: pd.Series, prediction: PredictionData)
binaraized_target.columns = labels

for label in labels:
mapped_label = tn.get(label, label)
fpr, tpr, thrs = metrics.roc_curve(binaraized_target[label], prediction.prediction_probas[label])
roc_curve[label] = ROCCurveData(fpr=fpr.tolist(), tpr=tpr.tolist(), thrs=thrs.tolist())
roc_curve[mapped_label] = ROCCurveData(fpr=fpr.tolist(), tpr=tpr.tolist(), thrs=thrs.tolist())
return roc_curve


Expand Down
6 changes: 3 additions & 3 deletions src/evidently/tests/classification_performance_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ def conf_matrix(self):
return self._conf_matrix

def calculate_value_for_test(self) -> Optional[Any]:
return self.get_value(self.by_class_metric.get_result().current.metrics[self.label])
return self.get_value(self.by_class_metric.get_result().current.metrics[str(self.label)])

def get_condition(self) -> TestValueCondition:
if self.condition.has_condition():
Expand All @@ -583,9 +583,9 @@ def get_condition(self) -> TestValueCondition:
ref_metrics = result.reference.metrics if result.reference is not None else None

if ref_metrics is not None:
return TestValueCondition(eq=approx(self.get_value(ref_metrics[self.label]), relative=0.2))
return TestValueCondition(eq=approx(self.get_value(ref_metrics[str(self.label)]), relative=0.2))

dummy_result = self.dummy_metric.get_result().metrics_matrix[self.label]
dummy_result = self.dummy_metric.get_result().metrics_matrix[str(self.label)]

if self.get_value(dummy_result) is None:
raise ValueError("Neither required test parameters nor reference data has been provided.")
Expand Down
Loading