Module `ethik.classification_explainer`

Expand source code

import colorlover as cl
import pandas as pd
from plotly.subplots import make_subplots

from .explainer import Explainer
from .utils import to_pandas

__all__ = ["ClassificationExplainer"]


class ClassificationExplainer(Explainer):
    def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None):
        """Plot the bias for the features in `X_test`.

        See `ethik.explainer.Explainer.plot_bias()`.
        """
        if yrange is None:
            yrange = [0, 1]

        X_test = pd.DataFrame(to_pandas(X_test))
        y_pred = pd.DataFrame(to_pandas(y_pred))

        if len(y_pred.columns) == 1:
            return super().plot_bias(
                X_test, y_pred.iloc[:, 0], colors=colors, yrange=yrange, size=size
            )

        if colors is None:
            features = X_test.columns
            #  Skip the lightest color as it is too light
            scale = cl.interp(cl.scales["10"]["qual"]["Paired"], len(features) + 1)[1:]
            colors = {feat: scale[i] for i, feat in enumerate(features)}

        labels = y_pred.columns
        plots = []
        for label in labels:
            plots.append(
                super().plot_bias(X_test, y_pred[label], colors=colors, yrange=yrange)
            )

        fig = make_subplots(rows=len(labels), cols=1, shared_xaxes=True)
        for ilabel, (label, plot) in enumerate(zip(labels, plots)):
            fig.update_layout({f"yaxis{ilabel+1}": dict(title=f"Average {label}")})
            for trace in plot["data"]:
                trace["showlegend"] = ilabel == 0 and trace["showlegend"]
                trace["legendgroup"] = trace["name"]
                fig.add_trace(trace, row=ilabel + 1, col=1)

        width = height = None
        if size is not None:
            width, height = size

        fig.update_xaxes(
            nticks=5,
            showline=True,
            showgrid=True,
            zeroline=False,
            linecolor="black",
            gridcolor="#eee",
        )
        fig.update_yaxes(
            range=yrange,
            showline=True,
            showgrid=True,
            linecolor="black",
            gridcolor="#eee",
        )
        fig.update_layout(
            {
                f"xaxis{len(labels)}": dict(
                    title="tau"
                    if len(X_test.columns) > 1
                    else f"Average {X_test.columns[0]}"
                ),
                "plot_bgcolor": "white",
                "width": width,
                "height": height,
            }
        )
        return fig

Classes

class ClassificationExplainer (alpha=0.05, n_taus=41, n_samples=1, sample_frac=0.8, conf_level=0.05, max_iterations=15, tol=0.001, n_jobs=1, memoize=False, verbose=True)

Explains the bias and reliability of model predictions.

Parameters

alpha : float: A float between 0 and 0.5 which indicates by how close the Explainer should look at extreme values of a distribution. The closer to zero, the more so extreme values will be accounted for. The default is 0.05 which means that all values beyond the 5th and 95th quantiles are ignored.
n_taus : int: The number of τ values to consider. The results will be more fine-grained the higher this value is. However the computation time increases linearly with n_taus. The default is 41 and corresponds to each τ being separated by it's neighbors by 0.05.
n_samples : int: The number of samples to use for the confidence interval. If 1, the default, no confidence interval is computed.
sample_frac : float: The proportion of lines in the dataset sampled to generate the samples for the confidence interval. If n_samples is 1, no confidence interval is computed and the whole dataset is used. Default is 0.8.
conf_level : float: A float between 0 and 0.5 which indicates the quantile used for the confidence interval. Default is 0.05, which means that the confidence interval contains the data between the 5th and 95th quantiles.
max_iterations : int: The maximum number of iterations used when applying the Newton step of the optimization procedure. Default is 5.
tol : float: The bottom threshold for the gradient of the optimization procedure. When reached, the procedure stops. Otherwise, a warning is raised about the fact that the optimization did not converge. Default is 1e-3.
n_jobs : int: The number of jobs to use for parallel computations. See joblib.Parallel(). Default is -1.
memoize : bool: Indicates whether or not memoization should be used or not. If True, then intermediate results will be stored in order to avoid recomputing results that can be reused by successively called methods. For example, if you call plot_bias followed by plot_bias_ranking and memoize is True, then the intermediate results required by plot_bias will be reused for plot_bias_ranking. Memoization is turned off by default because it can lead to unexpected behavior depending on your usage.
verbose : bool: Whether or not to show progress bars during computations. Default is True.

Expand source code

class ClassificationExplainer(Explainer):
    def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None):
        """Plot the bias for the features in `X_test`.

        See `ethik.explainer.Explainer.plot_bias()`.
        """
        if yrange is None:
            yrange = [0, 1]

        X_test = pd.DataFrame(to_pandas(X_test))
        y_pred = pd.DataFrame(to_pandas(y_pred))

        if len(y_pred.columns) == 1:
            return super().plot_bias(
                X_test, y_pred.iloc[:, 0], colors=colors, yrange=yrange, size=size
            )

        if colors is None:
            features = X_test.columns
            #  Skip the lightest color as it is too light
            scale = cl.interp(cl.scales["10"]["qual"]["Paired"], len(features) + 1)[1:]
            colors = {feat: scale[i] for i, feat in enumerate(features)}

        labels = y_pred.columns
        plots = []
        for label in labels:
            plots.append(
                super().plot_bias(X_test, y_pred[label], colors=colors, yrange=yrange)
            )

        fig = make_subplots(rows=len(labels), cols=1, shared_xaxes=True)
        for ilabel, (label, plot) in enumerate(zip(labels, plots)):
            fig.update_layout({f"yaxis{ilabel+1}": dict(title=f"Average {label}")})
            for trace in plot["data"]:
                trace["showlegend"] = ilabel == 0 and trace["showlegend"]
                trace["legendgroup"] = trace["name"]
                fig.add_trace(trace, row=ilabel + 1, col=1)

        width = height = None
        if size is not None:
            width, height = size

        fig.update_xaxes(
            nticks=5,
            showline=True,
            showgrid=True,
            zeroline=False,
            linecolor="black",
            gridcolor="#eee",
        )
        fig.update_yaxes(
            range=yrange,
            showline=True,
            showgrid=True,
            linecolor="black",
            gridcolor="#eee",
        )
        fig.update_layout(
            {
                f"xaxis{len(labels)}": dict(
                    title="tau"
                    if len(X_test.columns) > 1
                    else f"Average {X_test.columns[0]}"
                ),
                "plot_bgcolor": "white",
                "width": width,
                "height": height,
            }
        )
        return fig

Ancestors

Explainer

Methods

def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None)

Plot the bias for the features in X_test.

See Explainer.plot_bias().

Expand source code

def plot_bias(self, X_test, y_pred, colors=None, yrange=None, size=None):
    """Plot the bias for the features in `X_test`.

    See `ethik.explainer.Explainer.plot_bias()`.
    """
    if yrange is None:
        yrange = [0, 1]

    X_test = pd.DataFrame(to_pandas(X_test))
    y_pred = pd.DataFrame(to_pandas(y_pred))

    if len(y_pred.columns) == 1:
        return super().plot_bias(
            X_test, y_pred.iloc[:, 0], colors=colors, yrange=yrange, size=size
        )

    if colors is None:
        features = X_test.columns
        #  Skip the lightest color as it is too light
        scale = cl.interp(cl.scales["10"]["qual"]["Paired"], len(features) + 1)[1:]
        colors = {feat: scale[i] for i, feat in enumerate(features)}

    labels = y_pred.columns
    plots = []
    for label in labels:
        plots.append(
            super().plot_bias(X_test, y_pred[label], colors=colors, yrange=yrange)
        )

    fig = make_subplots(rows=len(labels), cols=1, shared_xaxes=True)
    for ilabel, (label, plot) in enumerate(zip(labels, plots)):
        fig.update_layout({f"yaxis{ilabel+1}": dict(title=f"Average {label}")})
        for trace in plot["data"]:
            trace["showlegend"] = ilabel == 0 and trace["showlegend"]
            trace["legendgroup"] = trace["name"]
            fig.add_trace(trace, row=ilabel + 1, col=1)

    width = height = None
    if size is not None:
        width, height = size

    fig.update_xaxes(
        nticks=5,
        showline=True,
        showgrid=True,
        zeroline=False,
        linecolor="black",
        gridcolor="#eee",
    )
    fig.update_yaxes(
        range=yrange,
        showline=True,
        showgrid=True,
        linecolor="black",
        gridcolor="#eee",
    )
    fig.update_layout(
        {
            f"xaxis{len(labels)}": dict(
                title="tau"
                if len(X_test.columns) > 1
                else f"Average {X_test.columns[0]}"
            ),
            "plot_bgcolor": "white",
            "width": width,
            "height": height,
        }
    )
    return fig

Inherited members

Explainer:
- explain_bias
- explain_performance
- get_metric_name
- plot_bias_ranking
- plot_performance
- plot_performance_ranking
- rank_by_bias
- rank_by_performance