Case2(EDA)
from sklearn.model_selection import train_test_split
X = train_ss_ov.drop(columns=['Churn'])
y = train_ss_ov['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=70)
함수
def report(y_test, pred):
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy = accuracy_score(y_test, pred)
print("Accuracy:", accuracy)
precision = precision_score(y_test, pred)
print("Precision:", precision)
recall = recall_score(y_test, pred)
print("Recall:", recall)
f1 = f1_score(y_test, pred)
print("F1 Score:", f1)
tn = ((y_test == 0) & (pred == 0)).sum()
fp = ((y_test == 0) & (pred == 1)).sum()
specificity = tn / (tn + fp)
print("Specificity:", specificity)
def df_add(model_name, y_test, pred):
results_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'Specificity'])
accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred)
recall = recall_score(y_test, pred)
f1 = f1_score(y_test, pred)
tn = ((y_test == 0) & (pred == 0)).sum()
fp = ((y_test == 0) & (pred == 1)).sum()
specificity = tn / (tn + fp)
results_df.loc[len(results_df)] = [model_name, accuracy, precision, recall, f1, specificity]
DecisionTreeClassifier
dt_clf = DecisionTreeClassifier(random_state=70, max_depth=4)
dt_clf.fit(X_train, y_train)
pred = dt_clf.predict(X_test)
GridSearchCV
params = {
'max_depth': range(4, 101, 4)
}
grid_cv = GridSearchCV(dt_clf, param_grid=params, scoring='accuracy', cv=5, return_train_score=True)
grid_cv.fit(X_train, y_train)
best_model = grid_cv.best_estimator_
best_pred = best_model.predict(X_test)
models
models = []
models.append(('RandomForestClassifier', RandomForestClassifier(random_state=70)))
models.append(('DecisionTreeClassifier', DecisionTreeClassifier(random_state=70)))
models.append(('AdaBoostClassifier', AdaBoostClassifier(random_state=70)))
models.append(('GradientBoostingClassifier', GradientBoostingClassifier(random_state=70)))
models.append(('LogisticRegression', LogisticRegression(random_state=70, max_iter=500, solver='liblinear')))
results = []
names = []
for name, model in models:
kfold = KFold(n_splits=5, random_state=70, shuffle=True)
cv_results = cross_val_score(model, X_train, y_train,
cv=kfold, scoring='accuracy')
results.append(cv_results)
names.append(name)
print(name, cv_results.mean(), cv_results.std())
for name, model in models:
kfold = KFold(n_splits=5, random_state=70, shuffle=True)
cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring='accuracy')
model.fit(X_train, y_train)
pred = model.predict(X_test)
print(name)
report(y_test, pred)
df_add(name, y_test, pred)
KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, pred)
df_add('KNeighborsClassifier', y_test, pred)
print(accuracy_score(y_test, pred))
results_df
GradientBoostingClassifier
start_time = time.time()
gb_clf = GradientBoostingClassifier(random_state=70)
gb_clf.fit(X_train, y_train)
gb_pred = gb_clf.predict(X_test)
GridSearchCV
params = {
'n_estimators' : [100, 500],
'learning_rate' : [0.05, 0.1]
}
start_time = time.time()
grid = GridSearchCV(gb_clf, param_grid=params, cv=2, verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)
df_add('GradientBoostingClassifier', y_test, grid.best_estimator_.predict(X_test))
print('Fit time : ', time.time() - start_time)
XGBClassifier
start_time = time.time()
xgb = XGBClassifier(n_estimators=400, learning_rate=0.1, max_depth=3)
xgb.fit(X_train, y_train)
print('Fit time : ', time.time() - start_time)
evals = [(X_train, y_train)]
start_time = time.time()
xgb = XGBClassifier(n_estimators=400, learning_rate=0.1, max_depth=3)
xgb.fit(X_train, y_train, early_stopping_rounds=10, eval_set=evals)
df_add('XGBoostClassifier', y_test, xgb.predict(X_test))
LGBMClassifier
evals = [(X_train, y_train)]
start_time = time.time()
lgbm = LGBMClassifier(n_estimators=400)
lgbm.fit(X_train, y_train, early_stopping_rounds=100, eval_set=evals)
report(y_test, grid.best_estimator_.predict(X_test))
df_add('LGBMClassifier', y_test, lgbm.predict(X_test))
