Experiment tracking#

Important: SQliteTracker got a big upgrade in version 0.8.2 so ensure you are running such version or a higher one.

SQLiteTracker provides a powerful and flexible way to track computational (e.g., Machine Learning) experiments using a SQLite database. Allows you to use SQL as the query language, giving you a powerful tool for experiment comparison, and comes with plotting features to compare plots side-by-side and to combine plots for better comparison.

Read more about the motivations in our blog post, check out the HN discussion.

This tutorial will walk you through the features with a Machine Learning use case; however, the tracker is generic enough to be used in any other domains.

from pathlib import Path

from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, RocCurveDisplay
# delete our example database, if any
db = Path("my_experiments.db")

if db.exists():
    db.unlink()
from sklearn_evaluation import SQLiteTracker

tracker = SQLiteTracker("my_experiments.db")
X, y = datasets.make_classification(200, 10, n_informative=5, class_sep=0.65)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

models = [RandomForestClassifier(), LogisticRegression(), DecisionTreeClassifier()]

Training and logging models#

for m in models:
    model = type(m).__name__
    print(f"Fitting {model}")

    experiment = tracker.new_experiment()
    m.fit(X_train, y_train)
    y_pred = m.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    # log a dictionary with log_dict
    experiment.log_dict({"accuracy": acc, "model": model, **m.get_params()})
Hide code cell output
Fitting RandomForestClassifier
Fitting LogisticRegression
Fitting DecisionTreeClassifier

Or use .log(key, value) to log individual values:

svc = SVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
acc = accuracy_score(y_test, y_pred)

experiment = tracker.new_experiment()

# log individual values
experiment.log("accuracy", acc)
experiment.log("model", type(svc).__name__)

_ = experiment.log_dict(svc.get_params())

Displaying latest experiments#

Display the tracker object to show last experiments:

tracker

SQLiteTracker

uuid created parameters comment
9342a30c2023-03-20 14:24:28{"accuracy": 0.6060606060606061, "model": "DecisionTreeClassifier", "ccp_alpha": 0.0, "class_weight": null, "criterion": "gini", "max_depth": null, "max_features": null, "max_leaf_nodes": null, "min_impurity_decrease": 0.0, "min_samples_leaf": 1, "min_samples_split": 2, "min_weight_fraction_leaf": 0.0, "random_state": null, "splitter": "best"}
bee3b09a2023-03-20 14:24:28{"accuracy": 0.696969696969697, "model": "SVC", "C": 1.0, "break_ties": false, "cache_size": 200, "class_weight": null, "coef0": 0.0, "decision_function_shape": "ovr", "degree": 3, "gamma": "scale", "kernel": "rbf", "max_iter": -1, "probability": false, "random_state": null, "shrinking": true, "tol": 0.001, "verbose": false}
651ee13d2023-03-20 14:24:27{"accuracy": 0.6818181818181818, "model": "RandomForestClassifier", "bootstrap": true, "ccp_alpha": 0.0, "class_weight": null, "criterion": "gini", "max_depth": null, "max_features": "sqrt", "max_leaf_nodes": null, "max_samples": null, "min_impurity_decrease": 0.0, "min_samples_leaf": 1, "min_samples_split": 2, "min_weight_fraction_leaf": 0.0, "n_estimators": 100, "n_jobs": null, "oob_score": false, "random_state": null, "verbose": 0, "warm_start": false}
b032c3ef2023-03-20 14:24:27{"accuracy": 0.6212121212121212, "model": "LogisticRegression", "C": 1.0, "class_weight": null, "dual": false, "fit_intercept": true, "intercept_scaling": 1, "l1_ratio": null, "max_iter": 100, "multi_class": "auto", "n_jobs": null, "penalty": "l2", "random_state": null, "solver": "lbfgs", "tol": 0.0001, "verbose": 0, "warm_start": false}

(Most recent experiments)

Querying experiments with SQL using .query()#

You can use SQL to query your experiments. To see what’s been logged, use get_parameters_keys():

keys = tracker.get_parameters_keys()
# show first 5 keys
keys[:5]
['C', 'accuracy', 'bootstrap', 'break_ties', 'cache_size']

To generate a sample query, use .get_sample_query():

print(tracker.get_sample_query())
SELECT
    uuid,
    json_extract(parameters, '$.C') as C,
    json_extract(parameters, '$.accuracy') as accuracy,
    json_extract(parameters, '$.bootstrap') as bootstrap,
    json_extract(parameters, '$.break_ties') as break_ties,
    json_extract(parameters, '$.cache_size') as cache_size,
    json_extract(parameters, '$.ccp_alpha') as ccp_alpha,
    json_extract(parameters, '$.class_weight') as class_weight,
    json_extract(parameters, '$.coef0') as coef0,
    json_extract(parameters, '$.criterion') as criterion,
    json_extract(parameters, '$.decision_function_shape') as decision_function_shape,
    json_extract(parameters, '$.degree') as degree,
    json_extract(parameters, '$.dual') as dual,
    json_extract(parameters, '$.fit_intercept') as fit_intercept,
    json_extract(parameters, '$.gamma') as gamma,
    json_extract(parameters, '$.intercept_scaling') as intercept_scaling,
    json_extract(parameters, '$.kernel') as kernel,
    json_extract(parameters, '$.l1_ratio') as l1_ratio,
    json_extract(parameters, '$.max_depth') as max_depth,
    json_extract(parameters, '$.max_features') as max_features,
    json_extract(parameters, '$.max_iter') as max_iter,
    json_extract(parameters, '$.max_leaf_nodes') as max_leaf_nodes,
    json_extract(parameters, '$.max_samples') as max_samples,
    json_extract(parameters, '$.min_impurity_decrease') as min_impurity_decrease,
    json_extract(parameters, '$.min_samples_leaf') as min_samples_leaf,
    json_extract(parameters, '$.min_samples_split') as min_samples_split,
    json_extract(parameters, '$.min_weight_fraction_leaf') as min_weight_fraction_leaf,
    json_extract(parameters, '$.model') as model,
    json_extract(parameters, '$.multi_class') as multi_class,
    json_extract(parameters, '$.n_estimators') as n_estimators,
    json_extract(parameters, '$.n_jobs') as n_jobs,
    json_extract(parameters, '$.oob_score') as oob_score,
    json_extract(parameters, '$.penalty') as penalty,
    json_extract(parameters, '$.probability') as probability,
    json_extract(parameters, '$.random_state') as random_state,
    json_extract(parameters, '$.shrinking') as shrinking,
    json_extract(parameters, '$.solver') as solver,
    json_extract(parameters, '$.splitter') as splitter,
    json_extract(parameters, '$.tol') as tol,
    json_extract(parameters, '$.verbose') as verbose,
    json_extract(parameters, '$.warm_start') as warm_start
    FROM experiments
LIMIT 10

To execute a query, use .query():

ordered = tracker.query(
    """
SELECT uuid,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy
FROM experiments
ORDER BY accuracy DESC
"""
)
ordered
model accuracy
uuid
bee3b09a SVC 0.696970
651ee13d RandomForestClassifier 0.681818
b032c3ef LogisticRegression 0.621212
9342a30c DecisionTreeClassifier 0.606061

The query method returns a data frame with “uuid” as the index:

type(ordered)
pandas.core.frame.DataFrame

Storing plots#

You can log a confusion matrix and classification reports:

%%capture


def fit(model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    experiment = tracker.new_experiment()
    experiment.log_dict(
        {"accuracy": acc, "model": type(model).__name__, **model.get_params()}
    )

    # log plots
    experiment.log_confusion_matrix(y_test, y_pred)
    experiment.log_classification_report(y_test, y_pred)

    # log generic matplotlib figure
    roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
    experiment.log_figure("roc", roc.figure_)


fit(model=RandomForestClassifier(n_estimators=100))
fit(model=RandomForestClassifier(n_estimators=10))
tracker.recent(2)
created parameters comment
uuid
38448728 2023-03-20 14:24:30 {"accuracy": 0.6212121212121212, "model": "Ran... None
da902cd9 2023-03-20 14:24:29 {"accuracy": 0.6818181818181818, "model": "Ran... None

Rendering plots in table view#

The .query() method also allows rendering plots in the table view:

results = tracker.query(
    """
SELECT uuid,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy,
       json_extract(parameters, '$.confusion_matrix') AS cm,
       json_extract(parameters, '$.roc') AS roc
FROM experiments
WHERE accuracy IS NOT NULL
AND cm IS NOT NULL
AND roc IS NOT NULL
ORDER BY created DESC
LIMIT 2
""",
    as_frame=False,
    render_plots=True,
)

results
uuidmodelaccuracycmroc
38448728RandomForestClassifier0.621212
da902cd9RandomForestClassifier0.681818

Side-by-side comparison#

From the .query() results, you can extract a given column for a side by side comparison:

results.get("cm")

You can change the labels in the tabs with the index_by argument:

results.get("cm", index_by="accuracy")

Combining plots#

With a side-by-side comparison, it might be hard to spot the model performance differents, you can get individual experiments, extract their plots and combine them:

# get the uuids for the latest 2 experiments
uuid1, uuid2 = results.get("uuid")

# get the experiments
one = tracker.get(uuid1)
another = tracker.get(uuid2)

Combine statistics from both confusion matrices:

one["confusion_matrix"] + another["confusion_matrix"]
<sklearn_evaluation.plot.classification.ConfusionMatrixAdd at 0x7f9158f06b00>
../_images/e81085e34fc7a47cb74a0b0082bfc671d0c4fa23ff5d24bd51f8f220c04a8691.png

Get confusion matrix differences:

one["confusion_matrix"] - another["confusion_matrix"]
<sklearn_evaluation.plot.classification.ConfusionMatrixSub at 0x7f9158762740>
../_images/a2cc6468c192c141f6f34b0bf5b4ac73d648462dfae5b0df0ff7fec9492e2d62.png

Combine classification reports:

one["classification_report"] + another["classification_report"]
<sklearn_evaluation.plot.classification_report.ClassificationReportAdd at 0x7f915a3679d0>
../_images/5fd7dc1e84bc27d884fd6a81021bff39186c407c359975fae4247d420b1c6146.png

You can also retrieve generic figures (logged with log_figure):

one["roc"]

However, note that plot combination (plot1 + plot2 and plot1 - plot2) is only supported by plots logged via the experiment.log_* and not by the generic experiment.log_figure method.

Adding comments#

one.comment("This is some comment")
tracker.query(
    """
SELECT uuid,
       comment,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy
FROM experiments
WHERE comment is not NULL
"""
)
comment model accuracy
uuid
38448728 This is some comment RandomForestClassifier 0.621212

Pandas integration#

Getting recent experiments#

The recent method also returns a data frame:

df = tracker.recent()
df
created parameters comment
uuid
38448728 2023-03-20 14:24:30 {"accuracy": 0.6212121212121212, "model": "Ran... This is some comment
da902cd9 2023-03-20 14:24:29 {"accuracy": 0.6818181818181818, "model": "Ran... None
9342a30c 2023-03-20 14:24:28 {"accuracy": 0.6060606060606061, "model": "Dec... None
bee3b09a 2023-03-20 14:24:28 {"accuracy": 0.696969696969697, "model": "SVC"... None
651ee13d 2023-03-20 14:24:27 {"accuracy": 0.6818181818181818, "model": "Ran... None

Pass normalize=True to convert the nested JSON dictionary into columns:

df = tracker.recent(normalize=True)
df
created accuracy model bootstrap ccp_alpha class_weight criterion max_depth max_features max_leaf_nodes ... coef0 decision_function_shape degree gamma kernel max_iter probability shrinking tol comment
uuid
38448728 2023-03-20 14:24:30 0.621212 RandomForestClassifier True 0.0 None gini NaN sqrt NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN This is some comment
da902cd9 2023-03-20 14:24:29 0.681818 RandomForestClassifier True 0.0 None gini NaN sqrt NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN None
9342a30c 2023-03-20 14:24:28 0.606061 DecisionTreeClassifier NaN 0.0 None gini NaN None NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN None
bee3b09a 2023-03-20 14:24:28 0.696970 SVC NaN NaN None NaN NaN NaN NaN ... 0.0 ovr 3.0 scale rbf -1.0 False True 0.001 None
651ee13d 2023-03-20 14:24:27 0.681818 RandomForestClassifier True 0.0 None gini NaN sqrt NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN None

5 rows Ă— 46 columns

Querying#

You can also use the .query() method with as_frame=True (default value) to get a pandas.DataFrame

df = tracker.query(
    """
SELECT uuid,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy
FROM experiments
ORDER BY accuracy DESC
LIMIT 3
"""
)
df
model accuracy
uuid
bee3b09a SVC 0.696970
651ee13d RandomForestClassifier 0.681818
da902cd9 RandomForestClassifier 0.681818