Querying notebooks with SQL#
Added in sklearn-evaluation version 0.6. Questions? Join our community!
NotebookDatabase
indexes outputs from a collection of notebooks in a SQLite database so you can query them. Any tagged cells will be captured and indexed by the database.
Requirements:
pip install sklearn-evaluation ploomber-engine
from pathlib import Path
import jupytext
# to produce parameter grid
from sklearn.model_selection import ParameterGrid
# to create SQLite database
from sklearn_evaluation import NotebookDatabase
Code#
NotebookDatabase
indexes the output of tagged cells. In this example, weβre using Python scripts (and tag cells using # %% tags=["some-tag"]
). We convert these scripts to notebooks using jupytext
, but the same concept applies for user-created notebooks (.ipynb
)β see here to learn how to tag cells in .ipynb
files.
# data loading script
data = """
# %%
from sklearn import datasets
# %%
ca_housing = datasets.fetch_california_housing(as_frame=True)
df = ca_housing['frame']
df.to_csv('data.csv', index=False)
"""
# model fitting script
model = """
# %% tags=["parameters"]
model = None
params = None
# %%
import importlib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# %%
df = pd.read_csv('data.csv')
# %%
X = df.drop('MedHouseVal', axis='columns')
y = df.MedHouseVal
# %%
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=0.33,
random_state=0)
# %% tags=["model"]
mod, _, attr = model.rpartition('.')
reg = getattr(importlib.import_module(mod), attr)(**params)
reg.fit(X_train, y_train)
print(model)
# %% tags=["params"]
print(reg.get_params())
# %% tags=["mse"]
y_pred = reg.predict(X_test)
mean_squared_error(y_test, y_pred)
"""
data_nb = jupytext.reads(data, fmt="py:percent")
model_nb = jupytext.reads(model, fmt="py:percent")
jupytext.write(data_nb, "data.ipynb")
jupytext.write(model_nb, "model.ipynb")
Executing notebooks#
Using the execute_notebook
method from ploomber-engine, each experiment will create an output .ipynb
file.
from ploomber_engine import execute_notebook
experiments = {
"sklearn.tree.DecisionTreeRegressor": ParameterGrid(
dict(
criterion=["squared_error", "friedman_mse"],
splitter=["best", "random"],
max_depth=[3, 5],
)
),
"sklearn.linear_model.Lasso": ParameterGrid(
dict(alpha=[1.0, 2.0, 3.0], fit_intercept=[True, False])
),
"sklearn.linear_model.Ridge": ParameterGrid(
dict(alpha=[1.0, 2.0, 3.0], fit_intercept=[True, False])
),
"sklearn.linear_model.ElasticNet": ParameterGrid(
dict(alpha=[1.0, 2.0, 3.0], fit_intercept=[True, False])
),
}
# executes data.ipynb, creates output.ipynb and data.csv
execute_notebook(Path("data.ipynb"), "output.ipynb")
p = Path("output/models")
p.mkdir(parents=True, exist_ok=True)
# generate one task per set of parameter
for model, grid in experiments.items():
for i, params in enumerate(grid):
name = f"{model}-{i}"
task = execute_notebook(
Path("model.ipynb"),
Path(f"output/models/{name}.ipynb"),
parameters=dict(model=model, params=params),
)
Show code cell output
0%| | 0/3 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/3 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/3 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/3 [00:00<?, ?it/s]
Executing cell: 3: 100%|ββββββββββββββββββββββββββ| 3/3 [00:00<00:00, 13.98it/s]
Executing cell: 3: 100%|ββββββββββββββββββββββββββ| 3/3 [00:00<00:00, 13.92it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 110.35it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 181.67it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 101.54it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 186.43it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 124.85it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 190.23it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 100.08it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 189.77it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 177.63it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 165.11it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 189.05it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 158.37it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 172.09it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 167.73it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 197.17it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 183.36it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 172.32it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 172.96it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 196.09it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 194.22it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 194.68it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 172.56it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 170.20it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 160.42it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 179.10it/s]
0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 1: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 2: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 3: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 4: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 5: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 6: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 7: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 8: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 9: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 0%| | 0/10 [00:00<?, ?it/s]
Executing cell: 10: 100%|ββββββββββββββββββββββ| 10/10 [00:00<00:00, 153.65it/s]
Indexing notebooks#
# initialize db with notebooks in the outputs directory
db = NotebookDatabase("nb.db", "output/models/*.ipynb")
# Note: pass update=True if you want to update the database if
# the output notebook changes
db.index(verbose=True, update=False);
Show code cell output
Indexing output/models/sklearn.linear_model.Lasso-1.ipynb
Indexing output/models/sklearn.linear_model.ElasticNet-5.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-3.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-6.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-5.ipynb
Indexing output/models/sklearn.linear_model.Ridge-0.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-2.ipynb
Indexing output/models/sklearn.linear_model.Lasso-2.ipynb
Indexing output/models/sklearn.linear_model.ElasticNet-1.ipynb
Indexing output/models/sklearn.linear_model.ElasticNet-3.ipynb
Indexing output/models/sklearn.linear_model.ElasticNet-4.ipynb
Indexing output/models/sklearn.linear_model.Lasso-0.ipynb
Indexing output/models/sklearn.linear_model.Ridge-1.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-4.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-1.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-7.ipynb
Indexing output/models/sklearn.linear_model.Lasso-5.ipynb
Indexing output/models/sklearn.linear_model.ElasticNet-2.ipynb
Indexing output/models/sklearn.linear_model.Ridge-5.ipynb
Indexing output/models/sklearn.linear_model.Lasso-4.ipynb
Indexing output/models/sklearn.linear_model.Lasso-3.ipynb
Indexing output/models/sklearn.linear_model.Ridge-4.ipynb
Indexing output/models/sklearn.linear_model.Ridge-3.ipynb
Indexing output/models/sklearn.linear_model.ElasticNet-0.ipynb
Indexing output/models/sklearn.tree.DecisionTreeRegressor-0.ipynb
Indexing output/models/sklearn.linear_model.Ridge-2.ipynb
Note: the update
argument in index()
was added in sklearn-evaluation version 0.7
Querying notebooks#
NotebookDatabase
uses SQLite. Here we use JupySQL to query our experiments.
# load jupysql magic
%load_ext sql
Best performing models#
%%sql sqlite:///nb.db
SELECT
path,
json_extract(c, '$.model') AS model,
json_extract(c, '$.mse') AS mse
FROM nbs
ORDER BY 3 ASC
LIMIT 3
Done.
path | model | mse |
---|---|---|
output/models/sklearn.tree.DecisionTreeRegressor-2.ipynb | sklearn.tree.DecisionTreeRegressor | 0.5371201844717582 |
output/models/sklearn.linear_model.Ridge-0.ipynb | sklearn.linear_model.Ridge | 0.5373777886259665 |
output/models/sklearn.linear_model.Ridge-2.ipynb | sklearn.linear_model.Ridge | 0.5373794632905633 |
Note: If using SQLite 3.38.0 (which ships with Python >=3.10) or higher, you can use the shorter ->>
operator:
SELECT
path,
c ->> '$.model' AS model,
c ->> '$.mse' AS mse
FROM nbs
ORDER BY 3 ASC
LIMIT 3
See SQLiteβs documentation for details.
Average error by model type#
%%sql
SELECT
json_extract(c, '$.model') AS model,
AVG(json_extract(c, '$.mse')) AS avg_mse
FROM nbs
GROUP BY 1
ORDER BY 2 ASC
* sqlite:///nb.db
Done.
model | avg_mse |
---|---|
sklearn.linear_model.Ridge | 0.5841920916573025 |
sklearn.tree.DecisionTreeRegressor | 0.7619947292762443 |
sklearn.linear_model.ElasticNet | 1.0253005883728399 |
sklearn.linear_model.Lasso | 1.1976653911089405 |
DecisionTree by performance#
%%sql
SELECT
json_extract(c, '$.model') AS model,
json_extract(c, '$.mse') AS mse,
json_extract(c, '$.params.max_depth') AS max_depth,
json_extract(c, '$.params.criterion') AS criterion,
json_extract(c, '$.params.splitter') AS splitter
FROM nbs
WHERE json_extract(c, '$.model') = 'sklearn.tree.DecisionTreeRegressor'
ORDER BY mse ASC
LIMIT 5
* sqlite:///nb.db
Done.
model | mse | max_depth | criterion | splitter |
---|---|---|---|---|
sklearn.tree.DecisionTreeRegressor | 0.5371201844717582 | 5 | squared_error | best |
sklearn.tree.DecisionTreeRegressor | 0.5392077393143363 | 5 | friedman_mse | best |
sklearn.tree.DecisionTreeRegressor | 0.6624525754702242 | 3 | friedman_mse | best |
sklearn.tree.DecisionTreeRegressor | 0.6624525754702245 | 3 | squared_error | best |
sklearn.tree.DecisionTreeRegressor | 0.7799601737977705 | 3 | friedman_mse | random |