SPORF
SPORF copied to clipboard
oob score changes test accuracy
When oob=True, the classification accuracy doesn't match oob=False and also shows variability even with a set seed.
from rerf.rerfClassifier import rerfClassifier
# Import scikit-learn dataset library
from sklearn import datasets
# Load dataset
iris = datasets.load_iris()
import pandas as pd
from sklearn.model_selection import train_test_split
X = data[["sepal length", "sepal width", "petal length", "petal width"]] # Features
y = data["species"] # Labels
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3
) # 70% training and 30% test
clf1 = rerfClassifier(n_estimators=10, oob_score=False, random_state=2)
clf2 = rerfClassifier(n_estimators=10, oob_score=True, random_state=2)
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
rerfClassifier(feature_combinations=1.5, image_height=None, image_width=None,
max_depth=None, max_features='auto', min_samples_split=1,
n_estimators=10, n_jobs=None, oob_score=True,
patch_height_max=None, patch_height_min=1, patch_width_max=None,
patch_width_min=1, projection_matrix='RerF', random_state=2)
y_pred1 = clf1.predict(X_test)
y_pred2 = clf2.predict(X_test)
from sklearn import metrics
print("Accuracy:", metrics.accuracy_score(y_test, y_pred1))
print("Accuracy oob:", metrics.accuracy_score(y_test, y_pred2))
Accuracy: 0.9555555555555556
Accuracy oob: 0.9333333333333333