Comparison

Comparison#

Learn how to easily compare plots from different models.

Compare two models by plotting all values: plot1 + plot2
Compare the performance between two models: plot2 - plot1

Confusion matrix#

Added in sklearn-evaluation version 0.7.2

import matplotlib
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn_evaluation import plot

matplotlib.rcParams["figure.figsize"] = (7, 7)
matplotlib.rcParams["font.size"] = 18

# get training and testing data
X, y = datasets.make_classification(
    1000, 20, n_informative=10, class_sep=0.80, n_classes=3, random_state=0
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


# fit decision tree and random forest, return confusion matrices
tree_pred, forest_pred = [
    est.fit(X_train, y_train).predict(X_test)
    for est in [DecisionTreeClassifier(), RandomForestClassifier()]
]

Decision tree confusion matrix#

tree_cm = plot.ConfusionMatrix.from_raw_data(y_test, tree_pred, normalize=False)

../_images/959b02ace08c6aeafe69cdff506d77ac320d80978249c7330326e3994a076cc3.png

Random forest confusion matrix#

forest_cm = plot.ConfusionMatrix.from_raw_data(y_test, forest_pred, normalize=False)

../_images/2a5d3d6d24f88f3184a7d24f8d333dfb75a8a6ec8d0cfcfa9cf2512f970347c4.png

Compare confusion matrices#

compare = tree_cm + forest_cm

../_images/f881863a17e3f441b0ab4850e8bd2c66b89e3e6cd7f45f78b0b4750dab714dc2.png

diff = forest_cm - tree_cm

../_images/ce5d6756c1c10dce7bddb9d997e492417ac4b49681a177c65671b8e97b21e9d8.png

ROC#

logistic_score, forest_score = [
    est.fit(X_train, y_train).predict_proba(X_test)
    for est in [LogisticRegression(), RandomForestClassifier()]
]

Logistic regression ROC#

logistic_roc = plot.ROC.from_raw_data(y_test, logistic_score)

../_images/2cad4125b412b205d9d465e8d220d03d4351b27e540a2daf7492329ddf6deba8.png

Random forest ROC#

forest_roc = plot.ROC.from_raw_data(y_test, forest_score)

../_images/2d774313b009aa6762a86fb908b6cbbf09bd17989939322f450075d1e5146a64.png

Compare ROC#

compare = logistic_roc + forest_roc

../_images/c78b60984089887238dbe8d91c270ae0b9d1870d341ef45d9ce5398a0ad965cf.png

Classification report#

Added in sklearn-evaluation version 0.7.8

Decision tree classification report#

tree_cr = plot.ClassificationReport.from_raw_data(y_test, tree_pred)

../_images/ea4c746f3e6baebbad1189a339b07fe3811d1dc7fbfa51697428edf48b0035cc.png

Random forest classification report#

forest_cr = plot.ClassificationReport.from_raw_data(y_test, forest_pred)

../_images/092e9c88f56032796e1f2e17be1550577303b7b86c721821afda59b276825985.png

Compare classification reports#

compare = tree_cr + forest_cr

../_images/1b0cc86e6dfc0c98a74ac82c400c358d9a936257ae9f49cdb22c3fb8edad7c6f.png

diff = forest_cr - tree_cr

../_images/0e305f8d0d631ea87dc3c08c314c6504f39feafc1de65f62270e1ff4b28d1232.png

Precision Recall Curve#

tree_score, forest_score = [
    est.fit(X_train, y_train).predict_proba(X_test)
    for est in [DecisionTreeClassifier(), RandomForestClassifier()]
]

Decision tree PR#

tree_pr = plot.PrecisionRecall.from_raw_data(
    y_test,
    tree_score,
    label=["Decision Tree Class 1", "Decision Tree Class 2", "Decision Tree Class 3"],
)

../_images/52f2af18c2002de40a783e009261762f071bd32feb95a5f304b7712af5f2e3f6.png

Random forest PR#

forest_pr = plot.PrecisionRecall.from_raw_data(
    y_test,
    forest_score,
    label=["Random Forest Class 1", "Random Forest Class 2", "Random Forest Class 3"],
)

../_images/23b38246113a1d195c33afd117864402c06553c3c368645d67abe90330b8e401.png

Compare PR#

compare = tree_pr + forest_pr

../_images/85159ad9711f153ee344d341435e4a09beae7ea19f8bec2e1d077d8a6ea0f105.png

Comparison

Contents

Comparison#

Confusion matrix#

Decision tree confusion matrix#

Random forest confusion matrix#

Compare confusion matrices#

ROC#

Logistic regression ROC#

Random forest ROC#

Compare ROC#

Classification report#

Decision tree classification report#

Random forest classification report#

Compare classification reports#

Precision Recall Curve#

Decision tree PR#

Random forest PR#

Compare PR#