교재 : 파이썬 머신러닝 완벽 가이드, 위키북스
min_samples_split
min_samples_leaf
max_features
max_depth
max_leaf_nodes
과적합 방지를 위해 세팅 필요
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
dt_clf = DecisionTreeClassifier(random_state=156)
iris = load_iris()
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2,random_state=11)
dt_clf.fit(X_train,y_train)
DecisionTreeClassifier(random_state=156)
from sklearn.tree import export_graphviz
export_graphviz(dt_clf,
'iris.dot',
class_names=iris.target_names,
feature_names=iris.feature_names,
filled=True)
import graphviz
with open('iris.dot') as f:
dot_graph = f.read()
graphviz.Source(dot_graph)
dt_clf = DecisionTreeClassifier(random_state=156,max_depth=2)
dt_clf.fit(X_train,y_train)
export_graphviz(dt_clf,
'iris1.dot',
class_names=iris.target_names,
feature_names=iris.feature_names,
filled=True)
with open('iris1.dot') as f:
dot_graph = f.read()
graphviz.Source(dot_graph)
dt_clf = DecisionTreeClassifier(random_state=156,min_samples_split=4)
dt_clf.fit(X_train,y_train)
export_graphviz(dt_clf,
'iris1.dot',
class_names=iris.target_names,
feature_names=iris.feature_names,
filled=True)
with open('iris1.dot') as f:
dot_graph = f.read()
graphviz.Source(dot_graph)
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
X, y = make_classification(n_features=2,n_redundant=0,n_classes=3,n_clusters_per_class=1,random_state=0)
plt.scatter(X[:,0],X[:,1])
plt.rcParams['axes.unicode_minus']=False
plt.scatter(X[:,0],X[:,1],c=y,edgecolors='k',s=25)
dt_clf = DecisionTreeClassifier(random_state=156).fit(X,y)
func01.visualize_boundary(dt_clf,X,y)
dt_clf = DecisionTreeClassifier(min_samples_leaf=6,random_state=156).fit(X,y)
func01.visualize_boundary(dt_clf,X,y)
*https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones