Comparison#

Learn how to easily compare plots from different models.

  • Compare two models by plotting all values: plot1 + plot2

  • Compare the performance between two models: plot2 - plot1

Confusion matrix#

Added in sklearn-evaluation version 0.7.2

import matplotlib
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn_evaluation import plot
matplotlib.rcParams["figure.figsize"] = (7, 7)
matplotlib.rcParams["font.size"] = 18
# get training and testing data
X, y = datasets.make_classification(
    1000, 20, n_informative=10, class_sep=0.80, n_classes=3, random_state=0
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


# fit decision tree and random forest, return confusion matrices
tree_pred, forest_pred = [
    est.fit(X_train, y_train).predict(X_test)
    for est in [DecisionTreeClassifier(), RandomForestClassifier()]
]

Decision tree confusion matrix#

tree_cm = plot.ConfusionMatrix.from_raw_data(y_test, tree_pred, normalize=False)
../_images/f65924917ce0961170f6b3992e3a57d6a06c9335bc01fd7a90fb7ab237cf852c.png

Random forest confusion matrix#

forest_cm = plot.ConfusionMatrix.from_raw_data(y_test, forest_pred, normalize=False)
../_images/ad549ba3d5173eba81c3e2d8121da08dbc94b6b1fd2cdf6df59716cc36e546e0.png

Compare confusion matrices#

compare = tree_cm + forest_cm
../_images/ecf298c87aef591705dd99e4623b532e8b98252675b2440e1fd1a1040044d2bc.png
diff = forest_cm - tree_cm
../_images/228bc91e4f5f09a967e6e98bd4c8f28b330e9360aa85779615b819ac6ee052c6.png

ROC#

logistic_score, forest_score = [
    est.fit(X_train, y_train).predict_proba(X_test)
    for est in [LogisticRegression(), RandomForestClassifier()]
]

Logistic regression ROC#

logistic_roc = plot.ROC.from_raw_data(y_test, logistic_score)
../_images/596fca67be0e175f83578f34c236e313d4749e6031d43e42ad2f0e8e0907e6c6.png

Random forest ROC#

forest_roc = plot.ROC.from_raw_data(y_test, forest_score)
../_images/5a905259c597cdb22d5c86320bd43afb5c62cd798174bcf44d2cb5fb34f0887d.png

Compare ROC#

compare = logistic_roc + forest_roc
../_images/90f94f7ae130b439fb6811f4e37275894a219abb1202f0c39899c621c177ab38.png

Classification report#

Added in sklearn-evaluation version 0.7.8

Decision tree classification report#

tree_cr = plot.ClassificationReport.from_raw_data(y_test, tree_pred)
../_images/02c1a9ba46a1000f7e1ab8225a7a389ca27379c3441506793eac27ae07ef4142.png

Random forest classification report#

forest_cr = plot.ClassificationReport.from_raw_data(y_test, forest_pred)
../_images/9f55b99872afe9ecefbfb2d25f214ded5bb22dd2eef67156eb3b349c56866fd0.png

Compare classification reports#

compare = tree_cr + forest_cr
../_images/b04ee571543cf1be3432d56a2e2ac788c1ba23949a9e1b131544b42bca12d443.png
diff = forest_cr - tree_cr
../_images/76ea2818348918135771aa648693a8ea4c6dd2dc75cd643e8224b7c80975c6e7.png

Precision Recall Curve#

tree_score, forest_score = [
    est.fit(X_train, y_train).predict_proba(X_test)
    for est in [DecisionTreeClassifier(), RandomForestClassifier()]
]

Decision tree PR#

tree_pr = plot.PrecisionRecall.from_raw_data(
    y_test,
    tree_score,
    label=["Decision Tree Class 1", "Decision Tree Class 2", "Decision Tree Class 3"],
)
../_images/0280bf705045a00f0124a3ba80ed5669b76de714dfa6e7673cc91d46bc07f091.png

Random forest PR#

forest_pr = plot.PrecisionRecall.from_raw_data(
    y_test,
    forest_score,
    label=["Random Forest Class 1", "Random Forest Class 2", "Random Forest Class 3"],
)
../_images/17ed91a4f910d837df8036bd593c06006130dbd6e3c32e4bbe359da6fc73cc2a.png

Compare PR#

compare = tree_pr + forest_pr
../_images/cca51a2bdc42b0277dd4822ed5504ae243c7235e0d5602ac2506684a6a2ec24e.png