sklearn_evaluation.grid#

RandomForestClassifierGrid#

class sklearn_evaluation.grid.RandomForestClassifierGrid(grid, cv=3, verbose=0)#
confusion_matrix()#

Plots a confusion matrix based on GridSearchCV.best_estimator_.

Returns:

ax – Axes containing the plot

Return type:

matplotlib Axes

Examples

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from sklearn_evaluation import grid

# generate data
X, y = make_classification(
    n_samples=100, n_features=2, n_informative=2, n_redundant=0, random_state=0
)

# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=0
)
model = grid.RandomForestClassifierGrid(grid="tiny")
model.fit(X_train, y_train)
model.set_test_data(X_test, y_test)
model.confusion_matrix()
../_images/rf_grid_cm.png
feature_importances()#

Plots feature importances based on GridSearchCV.best_estimator_.

Returns:

ax – Axes containing the plot

Return type:

matplotlib Axes

Examples

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from sklearn_evaluation import grid

# generate data
X, y = make_classification(
    n_samples=100, n_features=2, n_informative=2, n_redundant=0, random_state=0
)

# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=0
)
model = grid.RandomForestClassifierGrid(grid="tiny")
model.fit(X_train, y_train)
model.set_test_data(X_test, y_test)
model.feature_importances()
../_images/rf_grid_feature_importances.png
fit(X, y)#

Fit estimator.

Parameters:
  • X ({array-like, sparse matrix} of shape (n_samples, n_features)) – The input samples. Use dtype=np.float32 for maximum efficiency. Sparse matrices are also supported, use sparse csc_matrix for maximum efficiency.

  • y (Ignored) – Not used, present for API consistency by convention.

Returns:

self – Returns the instance itself.

Return type:

object

grid_search_results(change='n_estimators', kind='line')#

Plots grid search results based on GridSearchCV.best_estimator_.

Parameters:
  • change (str or iterable with len<=2) – Parameter to change

  • kind (['line', 'bar']) – This only applies whe change is a single parameter. Changes the type of plot

Returns:

ax – Axes containing the plot

Return type:

matplotlib Axes

roc()#

Plots an ROC based on GridSearchCV.best_estimator_.

Returns:

ax – Axes containing the plot

Return type:

matplotlib Axes

Examples

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from sklearn_evaluation import grid

# generate data
X, y = make_classification(
    n_samples=100, n_features=2, n_informative=2, n_redundant=0, random_state=0
)

# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=0
)
model = grid.RandomForestClassifierGrid(grid="tiny")
model.fit(X_train, y_train)
model.set_test_data(X_test, y_test)
model.roc()
../_images/rf_grid_roc.png
set_test_data(X_test, y_test) None#

Set the test data

Parameters:
  • X_test (array-like of shape (n_samples, n_features)) – Training data, where n_samples is the number of samples and n_features is the number of features.

  • y_test (array-like of shape (n_samples,)) – The target variable for supervised learning problems.