Experiment tracking

Experiment tracking#

Important: SQliteTracker got a big upgrade in version 0.8.2 so ensure you are running such version or a higher one.

SQLiteTracker provides a powerful and flexible way to track computational (e.g., Machine Learning) experiments using a SQLite database. Allows you to use SQL as the query language, giving you a powerful tool for experiment comparison, and comes with plotting features to compare plots side-by-side and to combine plots for better comparison.

Read more about the motivations in our blog post, check out the HN discussion.

This tutorial will walk you through the features with a Machine Learning use case; however, the tracker is generic enough to be used in any other domains.

from pathlib import Path

from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, RocCurveDisplay

# delete our example database, if any
db = Path("my_experiments.db")

if db.exists():
    db.unlink()

from sklearn_evaluation import SQLiteTracker

tracker = SQLiteTracker("my_experiments.db")

X, y = datasets.make_classification(200, 10, n_informative=5, class_sep=0.65)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

models = [RandomForestClassifier(), LogisticRegression(), DecisionTreeClassifier()]

Training and logging models#

for m in models:
    model = type(m).__name__
    print(f"Fitting {model}")

    experiment = tracker.new_experiment()
    m.fit(X_train, y_train)
    y_pred = m.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    # log a dictionary with log_dict
    experiment.log_dict({"accuracy": acc, "model": model, **m.get_params()})

Or use .log(key, value) to log individual values:

svc = SVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
acc = accuracy_score(y_test, y_pred)

experiment = tracker.new_experiment()

# log individual values
experiment.log("accuracy", acc)
experiment.log("model", type(svc).__name__)

_ = experiment.log_dict(svc.get_params())

Displaying latest experiments#

Display the tracker object to show last experiments:

tracker

SQLiteTracker

uuid	created	parameters
747fa9d4	2023-04-11 16:51:24	{"accuracy": 0.696969696969697, "model": "DecisionTreeClassifier", "ccp_alpha": 0.0, "class_weight": null, "criterion": "gini", "max_depth": null, "max_features": null, "max_leaf_nodes": null, "min_impurity_decrease": 0.0, "min_samples_leaf": 1, "min_samples_split": 2, "min_weight_fraction_leaf": 0.0, "random_state": null, "splitter": "best"}
68edf4bf	2023-04-11 16:51:24	{"accuracy": 0.7878787878787878, "model": "SVC", "C": 1.0, "break_ties": false, "cache_size": 200, "class_weight": null, "coef0": 0.0, "decision_function_shape": "ovr", "degree": 3, "gamma": "scale", "kernel": "rbf", "max_iter": -1, "probability": false, "random_state": null, "shrinking": true, "tol": 0.001, "verbose": false}
c5d9d833	2023-04-11 16:51:23	{"accuracy": 0.7727272727272727, "model": "RandomForestClassifier", "bootstrap": true, "ccp_alpha": 0.0, "class_weight": null, "criterion": "gini", "max_depth": null, "max_features": "sqrt", "max_leaf_nodes": null, "max_samples": null, "min_impurity_decrease": 0.0, "min_samples_leaf": 1, "min_samples_split": 2, "min_weight_fraction_leaf": 0.0, "n_estimators": 100, "n_jobs": null, "oob_score": false, "random_state": null, "verbose": 0, "warm_start": false}
e2d541ae	2023-04-11 16:51:23	{"accuracy": 0.6818181818181818, "model": "LogisticRegression", "C": 1.0, "class_weight": null, "dual": false, "fit_intercept": true, "intercept_scaling": 1, "l1_ratio": null, "max_iter": 100, "multi_class": "auto", "n_jobs": null, "penalty": "l2", "random_state": null, "solver": "lbfgs", "tol": 0.0001, "verbose": 0, "warm_start": false}

(Most recent experiments)

Querying experiments with SQL using `.query()`#

You can use SQL to query your experiments. To see what’s been logged, use get_parameters_keys():

keys = tracker.get_parameters_keys()
# show first 5 keys
keys[:5]

['C', 'accuracy', 'bootstrap', 'break_ties', 'cache_size']

To generate a sample query, use .get_sample_query():

print(tracker.get_sample_query())

SELECT
    uuid,
    json_extract(parameters, '$.C') as C,
    json_extract(parameters, '$.accuracy') as accuracy,
    json_extract(parameters, '$.bootstrap') as bootstrap,
    json_extract(parameters, '$.break_ties') as break_ties,
    json_extract(parameters, '$.cache_size') as cache_size,
    json_extract(parameters, '$.ccp_alpha') as ccp_alpha,
    json_extract(parameters, '$.class_weight') as class_weight,
    json_extract(parameters, '$.coef0') as coef0,
    json_extract(parameters, '$.criterion') as criterion,
    json_extract(parameters, '$.decision_function_shape') as decision_function_shape,
    json_extract(parameters, '$.degree') as degree,
    json_extract(parameters, '$.dual') as dual,
    json_extract(parameters, '$.fit_intercept') as fit_intercept,
    json_extract(parameters, '$.gamma') as gamma,
    json_extract(parameters, '$.intercept_scaling') as intercept_scaling,
    json_extract(parameters, '$.kernel') as kernel,
    json_extract(parameters, '$.l1_ratio') as l1_ratio,
    json_extract(parameters, '$.max_depth') as max_depth,
    json_extract(parameters, '$.max_features') as max_features,
    json_extract(parameters, '$.max_iter') as max_iter,
    json_extract(parameters, '$.max_leaf_nodes') as max_leaf_nodes,
    json_extract(parameters, '$.max_samples') as max_samples,
    json_extract(parameters, '$.min_impurity_decrease') as min_impurity_decrease,
    json_extract(parameters, '$.min_samples_leaf') as min_samples_leaf,
    json_extract(parameters, '$.min_samples_split') as min_samples_split,
    json_extract(parameters, '$.min_weight_fraction_leaf') as min_weight_fraction_leaf,
    json_extract(parameters, '$.model') as model,
    json_extract(parameters, '$.multi_class') as multi_class,
    json_extract(parameters, '$.n_estimators') as n_estimators,
    json_extract(parameters, '$.n_jobs') as n_jobs,
    json_extract(parameters, '$.oob_score') as oob_score,
    json_extract(parameters, '$.penalty') as penalty,
    json_extract(parameters, '$.probability') as probability,
    json_extract(parameters, '$.random_state') as random_state,
    json_extract(parameters, '$.shrinking') as shrinking,
    json_extract(parameters, '$.solver') as solver,
    json_extract(parameters, '$.splitter') as splitter,
    json_extract(parameters, '$.tol') as tol,
    json_extract(parameters, '$.verbose') as verbose,
    json_extract(parameters, '$.warm_start') as warm_start
    FROM experiments
LIMIT 10

To execute a query, use .query():

ordered = tracker.query(
    """
SELECT uuid,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy
FROM experiments
ORDER BY accuracy DESC
"""
)
ordered

	model	accuracy
uuid
68edf4bf	SVC	0.787879
c5d9d833	RandomForestClassifier	0.772727
747fa9d4	DecisionTreeClassifier	0.696970
e2d541ae	LogisticRegression	0.681818

The query method returns a data frame with “uuid” as the index:

type(ordered)

pandas.core.frame.DataFrame

Storing plots#

You can log a confusion matrix and classification reports:

%%capture


def fit(model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    experiment = tracker.new_experiment()
    experiment.log_dict(
        {"accuracy": acc, "model": type(model).__name__, **model.get_params()}
    )

    # log plots
    experiment.log_confusion_matrix(y_test, y_pred)
    experiment.log_classification_report(y_test, y_pred)

    # log generic matplotlib figure
    roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
    experiment.log_figure("roc", roc.figure_)


fit(model=RandomForestClassifier(n_estimators=100))
fit(model=RandomForestClassifier(n_estimators=10))

tracker.recent(2)

	created	parameters	comment
uuid
747fa9d4	2023-04-11 16:51:24	{"accuracy": 0.696969696969697, "model": "Deci...	None
68edf4bf	2023-04-11 16:51:24	{"accuracy": 0.7878787878787878, "model": "SVC...	None

Rendering plots in table view#

The .query() method also allows rendering plots in the table view:

results = tracker.query(
    """
SELECT uuid,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy,
       json_extract(parameters, '$.confusion_matrix') AS cm,
       json_extract(parameters, '$.roc') AS roc
FROM experiments
WHERE accuracy IS NOT NULL
AND cm IS NOT NULL
AND roc IS NOT NULL
ORDER BY created DESC
LIMIT 2
""",
    as_frame=False,
    render_plots=True,
)

results

uuid	model	accuracy	cm	roc
4b2656e5	RandomForestClassifier	0.772727
be5f2c0c	RandomForestClassifier	0.757576

Side-by-side comparison#

From the .query() results, you can extract a given column for a side by side comparison:

results.get("cm")

You can change the labels in the tabs with the index_by argument:

results.get("cm", index_by="accuracy")

Combining plots#

With a side-by-side comparison, it might be hard to spot the model performance differents, you can get individual experiments, extract their plots and combine them:

# get the uuids for the latest 2 experiments
uuid1, uuid2 = results.get("uuid")

# get the experiments
one = tracker.get(uuid1)
another = tracker.get(uuid2)

Combine statistics from both confusion matrices:

one["confusion_matrix"] + another["confusion_matrix"]

<sklearn_evaluation.plot.classification.ConfusionMatrixAdd at 0x7f4b631a4d60>

../_images/9c97e9e94b0fe07c65373b290517f6c2009731c775c19cf49693b2818701528a.png

Get confusion matrix differences:

one["confusion_matrix"] - another["confusion_matrix"]

<sklearn_evaluation.plot.classification.ConfusionMatrixSub at 0x7f4b619f9e10>

../_images/9a7daffb683024abb7ff6aafcf6cff980699ddea3619e324ba2bf5bc1b75dfa1.png

Combine classification reports:

one["classification_report"] + another["classification_report"]

<sklearn_evaluation.plot.classification_report.ClassificationReportAdd at 0x7f4b62dc2c80>

../_images/00e73087466db954c3ad1eb07e83400fd6b0c15082dc347b1ee584c95b204b1d.png

You can also retrieve generic figures (logged with log_figure):

one["roc"]

However, note that plot combination (plot1 + plot2 and plot1 - plot2) is only supported by plots logged via the experiment.log_* and not by the generic experiment.log_figure method.

Adding comments#

one.comment("This is some comment")

tracker.query(
    """
SELECT uuid,
       comment,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy
FROM experiments
WHERE comment is not NULL
"""
)

	comment	model	accuracy
uuid
4b2656e5	This is some comment	RandomForestClassifier	0.772727

Append experiment parameters#

Log initial metric_a values for the experiment

one.log("metric_a", [0.2, 0.3])
tracker.get(one.uuid)["metric_a"]

[0.2, 0.3]

Appending new “metric_a” values and adding “metric_b” values

tracker.upsert_append(one.uuid, {"metric_a": 0.4, "metric_b": [0.8, 0.9]})

df = tracker.query(
    """
SELECT uuid,
       json_extract(parameters, '$.metric_a') AS metric_a,
       json_extract(parameters, '$.metric_b') AS metric_b
FROM experiments
"""
)
df

	metric_a	metric_b
uuid
c5d9d833	None	None
e2d541ae	None	None
747fa9d4	None	None
68edf4bf	None	None
4b2656e5	[0.2,0.3,0.4]	[0.8,0.9]
be5f2c0c	None	None

Pandas integration#

Getting recent experiments#

The recent method also returns a data frame:

df = tracker.recent()
df

	created	parameters	comment
uuid
747fa9d4	2023-04-11 16:51:24	{"accuracy": 0.696969696969697, "model": "Deci...	None
68edf4bf	2023-04-11 16:51:24	{"accuracy": 0.7878787878787878, "model": "SVC...	None
4b2656e5	2023-04-11 16:51:24	{"accuracy": 0.7727272727272727, "model": "Ran...	This is some comment
be5f2c0c	2023-04-11 16:51:24	{"accuracy": 0.7575757575757576, "model": "Ran...	None
c5d9d833	2023-04-11 16:51:23	{"accuracy": 0.7727272727272727, "model": "Ran...	None

Pass normalize=True to convert the nested JSON dictionary into columns:

df = tracker.recent(normalize=True)
df

	created	accuracy	model	ccp_alpha	class_weight	criterion	max_depth	max_features	max_leaf_nodes	min_impurity_decrease	...	confusion_matrix.cm	confusion_matrix.normalize	confusion_matrix.target_names	confusion_matrix.version	classification_report.class	classification_report.matrix	classification_report.keys	classification_report.target_names	classification_report.version	comment
uuid
747fa9d4	2023-04-11 16:51:24	0.696970	DecisionTreeClassifier	0.0	None	gini	NaN	None	NaN	0.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
68edf4bf	2023-04-11 16:51:24	0.787879	SVC	NaN	None	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None
4b2656e5	2023-04-11 16:51:24	0.772727	RandomForestClassifier	0.0	None	gini	NaN	sqrt	NaN	0.0	...	[[26, 5], [10, 25]]	False	[Class 0, Class 1]	0.12.1dev	sklearn_evaluation.plot.ClassificationReport	[[0.7222222222222222, 0.8387096774193549, 0.77...	[precision, recall, f1-score, support]	[0, 1]	0.12.1dev	This is some comment
be5f2c0c	2023-04-11 16:51:24	0.757576	RandomForestClassifier	0.0	None	gini	NaN	sqrt	NaN	0.0	...	[[28, 3], [13, 22]]	False	[Class 0, Class 1]	0.12.1dev	sklearn_evaluation.plot.ClassificationReport	[[0.6829268292682927, 0.9032258064516129, 0.77...	[precision, recall, f1-score, support]	[0, 1]	0.12.1dev	None
c5d9d833	2023-04-11 16:51:23	0.772727	RandomForestClassifier	0.0	None	gini	NaN	sqrt	NaN	0.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	None

5 rows × 48 columns

Querying#

You can also use the .query() method with as_frame=True (default value) to get a pandas.DataFrame

df = tracker.query(
    """
SELECT uuid,
       json_extract(parameters, '$.model') AS model,
       json_extract(parameters, '$.accuracy') AS accuracy
FROM experiments
ORDER BY accuracy DESC
LIMIT 3
"""
)
df

	model	accuracy
uuid
68edf4bf	SVC	0.787879
c5d9d833	RandomForestClassifier	0.772727
4b2656e5	RandomForestClassifier	0.772727