๋ด์ฉ | ์ํ | ๊ธฐํ |
---|---|---|
์๊ธ์จ๋ถ๋ฅํ๊ธฐ | ์ ์ถ์๋ฃ | ๊นํ๋ธ |
# ํ์ํ ๋ชจ๋ Import
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
digits = load_digits() # ์ธ์คํด์ค = ํด๋์ค()
digits_data = digits.data
digits_label = digits.target
X_train, X_test, y_train, y_test = train_test_split(digits_data,
digits_label,
test_size=0.2,
random_state=13)
from sklearn.tree import DecisionTreeClassifier
decision_tree = DecisionTreeClassifier(random_state=13)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
# ์ ํ๋(Accuracy) ํ์ธ
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
accuracy
precision recall f1-score support
0 0.97 0.93 0.95 30
1 0.83 0.78 0.81 37
2 0.87 0.87 0.87 38
3 0.79 0.84 0.82 37
4 0.89 0.87 0.88 38
5 0.89 0.93 0.91 45
6 0.95 0.93 0.94 42
7 0.96 0.92 0.94 26
8 0.93 0.79 0.86 34
9 0.72 0.88 0.79 33
accuracy 0.88 360
macro avg 0.88 0.87 0.88 360
weighted avg 0.88 0.88 0.88 360
[[28 0 1 0 0 0 0 0 0 1]
[ 0 29 2 2 2 0 0 0 0 2]
[ 0 1 33 1 1 0 1 0 1 0]
[ 0 1 0 31 0 1 0 0 1 3]
[ 0 0 1 1 33 1 0 1 0 1]
[ 0 0 0 0 1 42 0 0 0 2]
[ 0 0 0 2 0 1 39 0 0 0]
[ 0 2 0 0 0 0 0 24 0 0]
[ 0 2 0 1 0 1 1 0 27 2]
[ 1 0 1 1 0 1 0 0 0 29]]
0.875
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
# ์ ํ๋(Accuracy) ํ์ธ
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
accuracy
precision recall f1-score support
0 1.00 1.00 1.00 30
1 0.90 1.00 0.95 37
2 0.97 1.00 0.99 38
3 1.00 0.95 0.97 37
4 0.97 1.00 0.99 38
5 1.00 0.98 0.99 45
6 1.00 0.98 0.99 42
7 0.96 0.96 0.96 26
8 0.93 0.79 0.86 34
9 0.89 0.97 0.93 33
accuracy 0.96 360
macro avg 0.96 0.96 0.96 360
weighted avg 0.97 0.96 0.96 360
[[30 0 0 0 0 0 0 0 0 0]
[ 0 37 0 0 0 0 0 0 0 0]
[ 0 0 38 0 0 0 0 0 0 0]
[ 0 0 0 35 0 0 0 0 1 1]
[ 0 0 0 0 38 0 0 0 0 0]
[ 0 0 0 0 0 44 0 0 0 1]
[ 0 1 0 0 0 0 41 0 0 0]
[ 0 0 0 0 0 0 0 25 0 1]
[ 0 3 1 0 1 0 0 1 27 1]
[ 0 0 0 0 0 0 0 0 1 32]]
0.9638888888888889
from sklearn import svm
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
# ์ ํ๋(Accuracy) ํ์ธ
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
accuracy
precision recall f1-score support
0 1.00 1.00 1.00 30
1 0.93 1.00 0.96 37
2 0.97 1.00 0.99 38
3 1.00 0.97 0.99 37
4 1.00 1.00 1.00 38
5 1.00 0.98 0.99 45
6 1.00 1.00 1.00 42
7 1.00 0.96 0.98 26
8 1.00 0.91 0.95 34
9 0.94 1.00 0.97 33
accuracy 0.98 360
macro avg 0.98 0.98 0.98 360
weighted avg 0.98 0.98 0.98 360
[[30 0 0 0 0 0 0 0 0 0]
[ 0 37 0 0 0 0 0 0 0 0]
[ 0 0 38 0 0 0 0 0 0 0]
[ 0 0 1 36 0 0 0 0 0 0]
[ 0 0 0 0 38 0 0 0 0 0]
[ 0 0 0 0 0 44 0 0 0 1]
[ 0 0 0 0 0 0 42 0 0 0]
[ 0 0 0 0 0 0 0 25 0 1]
[ 0 3 0 0 0 0 0 0 31 0]
[ 0 0 0 0 0 0 0 0 0 33]]
0.9833333333333333
from sklearn.linear_model import SGDClassifier
sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
# ์ ํ๋(Accuracy) ํ์ธ
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
accuracy
precision recall f1-score support
0 1.00 1.00 1.00 30
1 0.80 0.97 0.88 37
2 1.00 0.95 0.97 38
3 1.00 0.92 0.96 37
4 1.00 1.00 1.00 38
5 1.00 0.91 0.95 45
6 0.98 1.00 0.99 42
7 0.93 0.96 0.94 26
8 0.78 0.85 0.82 34
9 1.00 0.88 0.94 33
accuracy 0.94 360
macro avg 0.95 0.94 0.94 360
weighted avg 0.95 0.94 0.95 360
[[30 0 0 0 0 0 0 0 0 0]
[ 0 36 0 0 0 0 0 0 1 0]
[ 0 2 36 0 0 0 0 0 0 0]
[ 0 0 0 34 0 0 0 1 2 0]
[ 0 0 0 0 38 0 0 0 0 0]
[ 0 3 0 0 0 41 1 0 0 0]
[ 0 0 0 0 0 0 42 0 0 0]
[ 0 0 0 0 0 0 0 25 1 0]
[ 0 4 0 0 0 0 0 1 29 0]
[ 0 0 0 0 0 0 0 0 4 29]]
0.9444444444444444
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
print(classification_report(y_test, y_pred))
print("######์ ๋ต๊ณผ ์ค๋ต์ ๊ตฌ๋ถํด์ ํ์ธ#######")
print(confusion_matrix(y_test, y_pred))
# ์ ํ๋(Accuracy) ํ์ธ
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
accuracy
precision recall f1-score support
0 1.00 1.00 1.00 30
1 0.85 0.92 0.88 37
2 1.00 0.95 0.97 38
3 0.97 0.92 0.94 37
4 0.95 1.00 0.97 38
5 0.98 0.96 0.97 45
6 1.00 0.98 0.99 42
7 0.96 0.92 0.94 26
8 0.97 0.82 0.89 34
9 0.82 1.00 0.90 33
accuracy 0.95 360
macro avg 0.95 0.95 0.95 360
weighted avg 0.95 0.95 0.95 360
######์ ๋ต๊ณผ ์ค๋ต์ ๊ตฌ๋ถํด์ ํ์ธ#######
[[30 0 0 0 0 0 0 0 0 0]
[ 0 34 0 0 1 0 0 1 0 1]
[ 0 2 36 0 0 0 0 0 0 0]
[ 0 0 0 34 0 0 0 0 1 2]
[ 0 0 0 0 38 0 0 0 0 0]
[ 0 0 0 1 0 43 0 0 0 1]
[ 0 1 0 0 0 0 41 0 0 0]
[ 0 0 0 0 0 1 0 24 0 1]
[ 0 3 0 0 1 0 0 0 28 2]
[ 0 0 0 0 0 0 0 0 0 33]]
/Users/baekgun/opt/anaconda3/envs/dev/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,
0.9472222222222222
Recall : ์ค์ ๋ก True์ธ ๋ฐ์ดํฐ๋ฅผ ๋ชจ๋ธ์ด True ๋ผ๊ณ ์ธ์ํ ๋ฐ์ดํฐ์
Precision : ๋ชจ๋ธ์ด True๋ก ์์ธกํ ๋ฐ์ดํฐ ์ค ์ค์ ๋ก True์ธ ๋ฐ์ดํฐ์
F1 Score : Recall ๊ฐ๊ณผ Precision์ ๊ฐ๊ณผ ์ฐจ์ด๊ฐ ์ ์ ์๋ก ๋
ธ์ด์ฆ๊ฐ ์ ์ ๊ฒ.
SVM Model
๋ค๋ฅธ ๋ชจ๋ธ์ ๋นํด ๊ฐ์ฅ ๋์ ์์น๊ฐ ๋์์ผ๋ฉฐ ์์น์ ๊ฐ์ฅ ๋ฐ์ด๋ ์ฑ๋ฅ์ ๋ณด์ด๋ ๊ฒ์ผ๋ก ํ๋จ๋ฉ๋๋ค.
์ ํ๋๋ง ๊ฐ์ง๊ณ ํ๋จํ ๊ฒฝ์ฐ SVM ๋ชจ๋ธ ๋ฟ๋ง ์๋๋ผ Random Forest ์ญ์ ๋น์ทํ ์ ํ๋๋ฅผ ๋ณด์ฌ์ฃผ์์ผ๋ ์ ํ๋ ๋ง์ผ๋ก ํ๋จํ๋ ๊ฒ์ ๋ถ๊ท ํํ ๋ฐ์ดํฐ๋ก ์ธํด ์ค๋ฅ๊ฐ ๋ฐ์ํ ์ ์๊ธฐ ๋๋ฌธ์ Precision, Recall, F1-score ๊ฐ์ ๋ชจ๋ ๊ณ ๋ คํ์์ต๋๋ค.
์ด์์ ๋ด์ฉ์ ๋ฐ๋ผ SVM ๋ชจ๋ธ์ ์ฑํํ์์ต๋๋ค.
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
wine = load_wine()
wine_data = wine.data
wine_label = wine.target
X_train, X_test, y_train, y_test = train_test_split(wine_data,
wine_label,
test_size=0.2,
random_state=7)
# DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test) # test ๋ฐ์ดํฐ๋ก ์์ธก
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 1.00 1.00 1.00 7
1 0.89 1.00 0.94 17
2 1.00 0.83 0.91 12
accuracy 0.94 36
macro avg 0.96 0.94 0.95 36
weighted avg 0.95 0.94 0.94 36
array([[ 7, 0, 0],
[ 0, 17, 0],
[ 0, 2, 10]])
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 1.00 1.00 1.00 7
1 1.00 1.00 1.00 17
2 1.00 1.00 1.00 12
accuracy 1.00 36
macro avg 1.00 1.00 1.00 36
weighted avg 1.00 1.00 1.00 36
array([[ 7, 0, 0],
[ 0, 17, 0],
[ 0, 0, 12]])
# Support Vector Machine (SVM)
from sklearn import svm
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 0.86 0.86 0.86 7
1 0.58 0.88 0.70 17
2 0.33 0.08 0.13 12
accuracy 0.61 36
macro avg 0.59 0.61 0.56 36
weighted avg 0.55 0.61 0.54 36
array([[ 6, 0, 1],
[ 1, 15, 1],
[ 0, 11, 1]])
# SGD Classifier
from sklearn.linear_model import SGDClassifier
sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 0.78 1.00 0.88 7
1 0.59 0.94 0.73 17
2 0.00 0.00 0.00 12
accuracy 0.64 36
macro avg 0.46 0.65 0.53 36
weighted avg 0.43 0.64 0.51 36
/Users/baekgun/opt/anaconda3/envs/dev/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/Users/baekgun/opt/anaconda3/envs/dev/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/Users/baekgun/opt/anaconda3/envs/dev/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
array([[ 7, 0, 0],
[ 1, 16, 0],
[ 1, 11, 0]])
# Logistic Regression
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 1.00 0.86 0.92 7
1 0.94 1.00 0.97 17
2 1.00 1.00 1.00 12
accuracy 0.97 36
macro avg 0.98 0.95 0.96 36
weighted avg 0.97 0.97 0.97 36
/Users/baekgun/opt/anaconda3/envs/dev/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,
array([[ 6, 1, 0],
[ 0, 17, 0],
[ 0, 0, 12]])
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
breast_cancer = load_breast_cancer()
breast_cancer_data = breast_cancer.data
breast_cancer_label = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(breast_cancer_data,
breast_cancer_label,
test_size=0.2,
random_state=7)
# DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 0.92 0.82 0.87 40
1 0.91 0.96 0.93 74
accuracy 0.91 114
macro avg 0.91 0.89 0.90 114
weighted avg 0.91 0.91 0.91 114
array([[33, 7],
[ 3, 71]])
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 1.00 1.00 1.00 40
1 1.00 1.00 1.00 74
accuracy 1.00 114
macro avg 1.00 1.00 1.00 114
weighted avg 1.00 1.00 1.00 114
array([[40, 0],
[ 0, 74]])
# Support Vector Machine (SVM)
from sklearn import svm
svm_model = svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 1.00 0.72 0.84 40
1 0.87 1.00 0.93 74
accuracy 0.90 114
macro avg 0.94 0.86 0.89 114
weighted avg 0.92 0.90 0.90 114
array([[29, 11],
[ 0, 74]])
# SGD Classifier
from sklearn.linear_model import SGDClassifier
sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 0.85 0.82 0.84 40
1 0.91 0.92 0.91 74
accuracy 0.89 114
macro avg 0.88 0.87 0.87 114
weighted avg 0.89 0.89 0.89 114
array([[33, 7],
[ 6, 68]])
# Logistic Regression
from sklearn.linear_model import LogisticRegression
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
print(classification_report(y_test, y_pred))
confusion_matrix(y_test, y_pred)
precision recall f1-score support
0 1.00 0.82 0.90 40
1 0.91 1.00 0.95 74
accuracy 0.94 114
macro avg 0.96 0.91 0.93 114
weighted avg 0.94 0.94 0.94 114
/Users/baekgun/opt/anaconda3/envs/dev/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,
array([[33, 7],
[ 0, 74]])