Real-time tracking#
SQLiteTracker provides a powerful and flexible way to track computational (e.g., Machine Learning) experiments using an SQLite database.
This tutorial demonstrates training a small network on the Fashion MNIST dataset and tracking the training and validation metrics in real time. We would also see how you can query the tracked metrics while training is ongoing and visualize the metrics vs. epoch plots.
Create the experiment tracker#
Show code cell content
from pathlib import Path
db = Path("nn_experiments.db")
if db.exists():
db.unlink()
from sklearn_evaluation import SQLiteTracker
tracker = SQLiteTracker("nn_experiments.db")
experiment = tracker.new_experiment()
uuid = experiment.uuid
MNIST Dataset#
Fashion-MNIST is a dataset of Zalando’s article images—consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.
import tensorflow as tf
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
Show code cell output
2023-04-11 16:51:39.955653: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
8192/29515 [=======>......................] - ETA: 0s
29515/29515 [==============================] - 0s 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
8192/26421880 [..............................] - ETA: 0s
131072/26421880 [..............................] - ETA: 10s
1024000/26421880 [>.............................] - ETA: 2s
5128192/26421880 [====>.........................] - ETA: 0s
8396800/26421880 [========>.....................] - ETA: 0s
12599296/26421880 [=============>................] - ETA: 0s
16785408/26421880 [==================>...........] - ETA: 0s
21209088/26421880 [=======================>......] - ETA: 0s
25829376/26421880 [============================>.] - ETA: 0s
26421880/26421880 [==============================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
5148/5148 [==============================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
8192/4422102 [..............................] - ETA: 0s
131072/4422102 [..............................] - ETA: 1s
1015808/4422102 [=====>........................] - ETA: 0s
4422102/4422102 [==============================] - 0s 0us/step
# create a validation set
from sklearn.model_selection import train_test_split
train_images, val_images, train_labels, val_labels = train_test_split(
train_images, train_labels, test_size=0.2
)
# Inspect an image in the dataset (Pixel values fall in the range 0-255)
import matplotlib.pyplot as plt
plt.figure()
plt.imshow(train_images[0])
plt.colorbar()
plt.grid(False)
plt.show()

train_images = train_images[:500]
train_labels = train_labels[:500]
val_images = val_images[:500]
val_labels = val_labels[:500]
# Scale the images to range (0,1)
train_images = train_images / 255.0
val_images = val_images / 255.0
Train the model#
# Create all metrics arrays
loss = []
val_loss = []
accuracy = []
val_accuracy = []
Define a callback that will track the metrics during the training, and log in the experiment tracker.
class TrackLossandAccuracyCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
loss.append(logs["loss"])
val_loss.append(logs["val_loss"])
accuracy.append(logs["accuracy"])
val_accuracy.append(logs["val_accuracy"])
tracker.upsert_append(
uuid,
{
"loss": loss,
"accuracy": accuracy,
"val_loss": val_loss,
"val_accuracy": val_accuracy,
},
)
model = tf.keras.Sequential(
[
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(10),
]
)
2023-04-11 16:51:43.225216: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
model.compile(
optimizer="adam",
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=["accuracy"],
)
epoch_count = 10
history = model.fit(
train_images,
train_labels,
validation_data=(val_images, val_labels),
epochs=epoch_count,
verbose=0,
callbacks=[TrackLossandAccuracyCallback()],
)
Track metrics while training#
While the training is ongoing, you may visualize the metrics plot by opening another terminal/notebook and running the steps below.
Query the experiment with SQL:
results = tracker.query(
"""
SELECT
uuid,
json_extract(parameters, '$.accuracy') as accuracy,
json_extract(parameters, '$.loss') as loss,
json_extract(parameters, '$.val_accuracy') as val_accuracy,
json_extract(parameters, '$.val_loss') as val_loss
FROM experiments
"""
)
Extract and plot the relevant metrics against epochs:
import json
training_accuracy = json.loads(results["accuracy"].to_list()[0])
val_accuracy = json.loads(results["val_accuracy"].to_list()[0])
training_loss = json.loads(results["loss"].to_list()[0])
val_loss = json.loads(results["val_loss"].to_list()[0])
epoch_range = range(1, len(training_accuracy) + 1)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
ax1.plot(epoch_range, training_loss, color="#725BD0", linestyle="--", label="Train")
ax1.plot(epoch_range, val_loss, color="#725BD0", linestyle="-", label="Validation")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax1.legend(loc="best")
ax1.grid()
ax2.plot(epoch_range, training_accuracy, color="#BA2932", linestyle="--", label="Train")
ax2.plot(epoch_range, val_accuracy, color="#BA2932", linestyle="-", label="Validation")
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Accuracy")
ax2.legend(loc="best")
ax2.grid()
