Warning: Declaration of action_plugin_tablewidth::register(&$controller) should be compatible with DokuWiki_Action_Plugin::register(Doku_Event_Handler $controller) in /s/bach/b/class/cs545/public_html/fall16/lib/plugins/tablewidth/action.php on line 93
"""
ensemble methods in scikit-learn
http://scikit-learn.org/stable/modules/ensemble.html
"""
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from sklearn import metrics
from sklearn.datasets import load_digits
digits = load_digits()
X = digits.data
X = X + np.random.binomial(1, 0.5, X.shape) * np.random.uniform(1, 15, X.shape)
y = digits.target
X /= X.max()
cv = cross_validation.StratifiedKFold(y, 5, shuffle=True, random_state=0)
# let's look at accuracy as a function of the number of trees:
n_estimators = [10, 20, 50, 100, 200, 500]
accuracy = []
for estimators in n_estimators :
print ("num estimators: ", estimators)
classifier = RandomForestClassifier(n_estimators=estimators)
y_predict = cross_validation.cross_val_predict(classifier, X, y, cv=cv)
accuracy.append(metrics.accuracy_score(y, y_predict))
import matplotlib.pyplot as plt
plt.semilogx(n_estimators, accuracy, 'ob')
plt.title('performance of random forests on the digits data')
plt.xlabel('number of estimators')
plt.ylabel('accuracy')
plt.show()
# let's compare to decision trees:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(max_depth=None)
y_predict = cross_validation.cross_val_predict(classifier, X, y, cv=cv)
print(metrics.accuracy_score(y, y_predict))
# bagging
from sklearn.ensemble import BaggingClassifier
model = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=200, random_state=0)
y_predict = cross_validation.cross_val_predict(classifier, X, y, cv=cv)
print(metrics.accuracy_score(y, y_predict))
# AdaBoost
from sklearn.ensemble import AdaBoostClassifier
classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), n_estimators=500)
y_predict = cross_validation.cross_val_predict(classifier, X, y, cv=cv)
print(metrics.accuracy_score(y, y_predict))
# SVM
from sklearn import svm
from sklearn.grid_search import GridSearchCV
param_grid = [
{'C': [1, 10, 100], 'kernel': ['linear']},
{'C': [1, 10, 100], 'gamma': [1, 0.1, 0.01], 'kernel': ['rbf']},
]
classifier = GridSearchCV(estimator=svm.SVC(), param_grid=param_grid)
y_predict = cross_validation.cross_val_predict(classifier, X, y, cv=cv)
print(metrics.accuracy_score(y, y_predict))