Module ethik.classification_explainer
Expand source code
import colorlover as cl
import pandas as pd
from plotly.subplots import make_subplots
from .explainer import Explainer
from .utils import to_pandas
__all__ = ["ClassificationExplainer"]
class ClassificationExplainer(Explainer):
def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None):
"""Plot the bias for the features in `X_test`.
See `ethik.explainer.Explainer.plot_bias()`.
"""
if yrange is None:
yrange = [0, 1]
X_test = pd.DataFrame(to_pandas(X_test))
y_pred = pd.DataFrame(to_pandas(y_pred))
if len(y_pred.columns) == 1:
return super().plot_bias(
X_test, y_pred.iloc[:, 0], colors=colors, yrange=yrange, size=size
)
if colors is None:
features = X_test.columns
# Skip the lightest color as it is too light
scale = cl.interp(cl.scales["10"]["qual"]["Paired"], len(features) + 1)[1:]
colors = {feat: scale[i] for i, feat in enumerate(features)}
labels = y_pred.columns
plots = []
for label in labels:
plots.append(
super().plot_bias(X_test, y_pred[label], colors=colors, yrange=yrange)
)
fig = make_subplots(rows=len(labels), cols=1, shared_xaxes=True)
for ilabel, (label, plot) in enumerate(zip(labels, plots)):
fig.update_layout({f"yaxis{ilabel+1}": dict(title=f"Average {label}")})
for trace in plot["data"]:
trace["showlegend"] = ilabel == 0 and trace["showlegend"]
trace["legendgroup"] = trace["name"]
fig.add_trace(trace, row=ilabel + 1, col=1)
width = height = None
if size is not None:
width, height = size
fig.update_xaxes(
nticks=5,
showline=True,
showgrid=True,
zeroline=False,
linecolor="black",
gridcolor="#eee",
)
fig.update_yaxes(
range=yrange,
showline=True,
showgrid=True,
linecolor="black",
gridcolor="#eee",
)
fig.update_layout(
{
f"xaxis{len(labels)}": dict(
title="tau"
if len(X_test.columns) > 1
else f"Average {X_test.columns[0]}"
),
"plot_bgcolor": "white",
"width": width,
"height": height,
}
)
return fig
Classes
class ClassificationExplainer (alpha=0.05, n_taus=41, n_samples=1, sample_frac=0.8, conf_level=0.05, max_iterations=15, tol=0.001, n_jobs=1, memoize=False, verbose=True)
-
Explains the bias and reliability of model predictions.
Parameters
alpha
:float
- A
float
between0
and0.5
which indicates by how close theExplainer
should look at extreme values of a distribution. The closer to zero, the more so extreme values will be accounted for. The default is0.05
which means that all values beyond the 5th and 95th quantiles are ignored. n_taus
:int
- The number of τ values to consider. The results will be more fine-grained the
higher this value is. However the computation time increases linearly with
n_taus
. The default is41
and corresponds to each τ being separated by it's neighbors by0.05
. n_samples
:int
- The number of samples to use for the confidence interval.
If
1
, the default, no confidence interval is computed. sample_frac
:float
- The proportion of lines in the dataset sampled to
generate the samples for the confidence interval. If
n_samples
is1
, no confidence interval is computed and the whole dataset is used. Default is0.8
. conf_level
:float
- A
float
between0
and0.5
which indicates the quantile used for the confidence interval. Default is0.05
, which means that the confidence interval contains the data between the 5th and 95th quantiles. max_iterations
:int
- The maximum number of iterations used when applying the Newton step
of the optimization procedure. Default is
5
. tol
:float
- The bottom threshold for the gradient of the optimization
procedure. When reached, the procedure stops. Otherwise, a warning
is raised about the fact that the optimization did not converge.
Default is
1e-3
. n_jobs
:int
- The number of jobs to use for parallel computations. See
joblib.Parallel()
. Default is-1
. memoize
:bool
- Indicates whether or not memoization should be used or not. If
True
, then intermediate results will be stored in order to avoid recomputing results that can be reused by successively called methods. For example, if you callplot_bias
followed byplot_bias_ranking
andmemoize
isTrue
, then the intermediate results required byplot_bias
will be reused forplot_bias_ranking
. Memoization is turned off by default because it can lead to unexpected behavior depending on your usage. verbose
:bool
- Whether or not to show progress bars during
computations. Default is
True
.
Expand source code
class ClassificationExplainer(Explainer): def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None): """Plot the bias for the features in `X_test`. See `ethik.explainer.Explainer.plot_bias()`. """ if yrange is None: yrange = [0, 1] X_test = pd.DataFrame(to_pandas(X_test)) y_pred = pd.DataFrame(to_pandas(y_pred)) if len(y_pred.columns) == 1: return super().plot_bias( X_test, y_pred.iloc[:, 0], colors=colors, yrange=yrange, size=size ) if colors is None: features = X_test.columns # Skip the lightest color as it is too light scale = cl.interp(cl.scales["10"]["qual"]["Paired"], len(features) + 1)[1:] colors = {feat: scale[i] for i, feat in enumerate(features)} labels = y_pred.columns plots = [] for label in labels: plots.append( super().plot_bias(X_test, y_pred[label], colors=colors, yrange=yrange) ) fig = make_subplots(rows=len(labels), cols=1, shared_xaxes=True) for ilabel, (label, plot) in enumerate(zip(labels, plots)): fig.update_layout({f"yaxis{ilabel+1}": dict(title=f"Average {label}")}) for trace in plot["data"]: trace["showlegend"] = ilabel == 0 and trace["showlegend"] trace["legendgroup"] = trace["name"] fig.add_trace(trace, row=ilabel + 1, col=1) width = height = None if size is not None: width, height = size fig.update_xaxes( nticks=5, showline=True, showgrid=True, zeroline=False, linecolor="black", gridcolor="#eee", ) fig.update_yaxes( range=yrange, showline=True, showgrid=True, linecolor="black", gridcolor="#eee", ) fig.update_layout( { f"xaxis{len(labels)}": dict( title="tau" if len(X_test.columns) > 1 else f"Average {X_test.columns[0]}" ), "plot_bgcolor": "white", "width": width, "height": height, } ) return fig
Ancestors
Methods
def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None)
-
Plot the bias for the features in
X_test
.Expand source code
def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None): """Plot the bias for the features in `X_test`. See `ethik.explainer.Explainer.plot_bias()`. """ if yrange is None: yrange = [0, 1] X_test = pd.DataFrame(to_pandas(X_test)) y_pred = pd.DataFrame(to_pandas(y_pred)) if len(y_pred.columns) == 1: return super().plot_bias( X_test, y_pred.iloc[:, 0], colors=colors, yrange=yrange, size=size ) if colors is None: features = X_test.columns # Skip the lightest color as it is too light scale = cl.interp(cl.scales["10"]["qual"]["Paired"], len(features) + 1)[1:] colors = {feat: scale[i] for i, feat in enumerate(features)} labels = y_pred.columns plots = [] for label in labels: plots.append( super().plot_bias(X_test, y_pred[label], colors=colors, yrange=yrange) ) fig = make_subplots(rows=len(labels), cols=1, shared_xaxes=True) for ilabel, (label, plot) in enumerate(zip(labels, plots)): fig.update_layout({f"yaxis{ilabel+1}": dict(title=f"Average {label}")}) for trace in plot["data"]: trace["showlegend"] = ilabel == 0 and trace["showlegend"] trace["legendgroup"] = trace["name"] fig.add_trace(trace, row=ilabel + 1, col=1) width = height = None if size is not None: width, height = size fig.update_xaxes( nticks=5, showline=True, showgrid=True, zeroline=False, linecolor="black", gridcolor="#eee", ) fig.update_yaxes( range=yrange, showline=True, showgrid=True, linecolor="black", gridcolor="#eee", ) fig.update_layout( { f"xaxis{len(labels)}": dict( title="tau" if len(X_test.columns) > 1 else f"Average {X_test.columns[0]}" ), "plot_bgcolor": "white", "width": width, "height": height, } ) return fig
Inherited members