import numpy as np # 1.18.5
import pandas as pd # 0.25.1
import gc
import time
import matplotlib.pyplot as plt # 3.2.2
import seaborn as sns # 0.10.1
import os
%matplotlib inline
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)
def get_dataset():
app_train = pd.read_csv('application_train.csv')
app_test = pd.read_csv('application_test.csv')
apps = pd.concat([app_train, app_test])
prev = pd.read_csv('previous_application.csv')
return apps, prev
apps, prev = get_dataset()
def get_apps_processed(apps):
# EXT_SOURCE_X FEATURE 가공
apps['APPS_EXT_SOURCE_MEAN'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].mean(axis=1)
apps['APPS_EXT_SOURCE_STD'] = apps[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].std(axis=1)
apps['APPS_EXT_SOURCE_STD'] = apps['APPS_EXT_SOURCE_STD'].fillna(apps['APPS_EXT_SOURCE_STD'].mean())
# AMT_CREDIT 비율로 Feature 가공
apps['APPS_ANNUITY_CREDIT_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_CREDIT']
apps['APPS_GOODS_CREDIT_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_CREDIT']
# AMT_INCOME_TOTAL 비율로 Feature 가공
apps['APPS_ANNUITY_INCOME_RATIO'] = apps['AMT_ANNUITY']/apps['AMT_INCOME_TOTAL']
apps['APPS_CREDIT_INCOME_RATIO'] = apps['AMT_CREDIT']/apps['AMT_INCOME_TOTAL']
apps['APPS_GOODS_INCOME_RATIO'] = apps['AMT_GOODS_PRICE']/apps['AMT_INCOME_TOTAL']
apps['APPS_CNT_FAM_INCOME_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['CNT_FAM_MEMBERS']
# DAYS_BIRTH, DAYS_EMPLOYED 비율로 Feature 가공
apps['APPS_EMPLOYED_BIRTH_RATIO'] = apps['DAYS_EMPLOYED']/apps['DAYS_BIRTH']
apps['APPS_INCOME_EMPLOYED_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_EMPLOYED']
apps['APPS_INCOME_BIRTH_RATIO'] = apps['AMT_INCOME_TOTAL']/apps['DAYS_BIRTH']
apps['APPS_CAR_BIRTH_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_BIRTH']
apps['APPS_CAR_EMPLOYED_RATIO'] = apps['OWN_CAR_AGE'] / apps['DAYS_EMPLOYED']
return apps
from sklearn.model_selection import train_test_split # 0.23.1
from lightgbm import LGBMClassifier # 3.1.1
def get_prev_processed(prev):
# 대출 신청 금액과 실제 대출액/대출 상품금액 차이 및 비율
prev['PREV_CREDIT_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_CREDIT']
prev['PREV_GOODS_DIFF'] = prev['AMT_APPLICATION'] - prev['AMT_GOODS_PRICE']
prev['PREV_CREDIT_APPL_RATIO'] = prev['AMT_CREDIT']/prev['AMT_APPLICATION']
# prev['PREV_ANNUITY_APPL_RATIO'] = prev['AMT_ANNUITY']/prev['AMT_APPLICATION']
prev['PREV_GOODS_APPL_RATIO'] = prev['AMT_GOODS_PRICE']/prev['AMT_APPLICATION']
prev['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
prev['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
prev['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
prev['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
prev['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)
# 첫번째 만기일과 마지막 만기일까지의 기간
prev['PREV_DAYS_LAST_DUE_DIFF'] = prev['DAYS_LAST_DUE_1ST_VERSION'] - prev['DAYS_LAST_DUE']
# 매월 납부 금액과 납부 횟수 곱해서 전체 납부 금액 구함.
all_pay = prev['AMT_ANNUITY'] * prev['CNT_PAYMENT']
# 전체 납부 금액 대비 AMT_CREDIT 비율을 구하고 여기에 다시 납부횟수로 나누어서 이자율 계산.
prev['PREV_INTERESTS_RATE'] = (all_pay/prev['AMT_CREDIT'] - 1)/prev['CNT_PAYMENT']
return prev
def get_prev_amt_agg(prev):
# 새롭게 생성된 대출 신청액 대비 다른 금액 차이 및 비율로 aggregation 수행.
agg_dict = {
# 기존 컬럼.
'SK_ID_CURR':['count'],
'AMT_CREDIT':['mean', 'max', 'sum'],
'AMT_ANNUITY':['mean', 'max', 'sum'],
'AMT_APPLICATION':['mean', 'max', 'sum'],
'AMT_DOWN_PAYMENT':['mean', 'max', 'sum'],
'AMT_GOODS_PRICE':['mean', 'max', 'sum'],
'RATE_DOWN_PAYMENT': ['min', 'max', 'mean'],
'DAYS_DECISION': ['min', 'max', 'mean'],
'CNT_PAYMENT': ['mean', 'sum'],
# 가공 컬럼
'PREV_CREDIT_DIFF':['mean', 'max', 'sum'],
'PREV_CREDIT_APPL_RATIO':['mean', 'max'],
'PREV_GOODS_DIFF':['mean', 'max', 'sum'],
'PREV_GOODS_APPL_RATIO':['mean', 'max'],
'PREV_DAYS_LAST_DUE_DIFF':['mean', 'max', 'sum'],
'PREV_INTERESTS_RATE':['mean', 'max']
}
prev_group = prev.groupby('SK_ID_CURR')
prev_amt_agg = prev_group.agg(agg_dict)
# multi index 컬럼을 '_'로 연결하여 컬럼명 변경
prev_amt_agg.columns = ["PREV_"+ "_".join(x).upper() for x in prev_amt_agg.columns.ravel()]
return prev_amt_agg
def get_prev_refused_appr_agg(prev):
# 원래 groupby 컬럼 + 세부 기준 컬럼으로 groupby 수행. 세분화된 레벨로 aggregation 수행 한 뒤에 unstack()으로 컬럼레벨로 변형.
prev_refused_appr_group = prev[prev['NAME_CONTRACT_STATUS'].isin(['Approved', 'Refused'])].groupby([ 'SK_ID_CURR', 'NAME_CONTRACT_STATUS'])
prev_refused_appr_agg = prev_refused_appr_group['SK_ID_CURR'].count().unstack()
# 컬럼명 변경.
prev_refused_appr_agg.columns = ['PREV_APPROVED_COUNT', 'PREV_REFUSED_COUNT' ]
# NaN값은 모두 0으로 변경.
prev_refused_appr_agg = prev_refused_appr_agg.fillna(0)
return prev_refused_appr_agg
def get_prev_agg(prev):
prev = get_prev_processed(prev)
prev_amt_agg = get_prev_amt_agg(prev)
prev_refused_appr_agg = get_prev_refused_appr_agg(prev)
# prev_amt_agg와 조인.
prev_agg = prev_amt_agg.merge(prev_refused_appr_agg, on='SK_ID_CURR', how='left')
# SK_ID_CURR별 과거 대출건수 대비 APPROVED_COUNT 및 REFUSED_COUNT 비율 생성.
prev_agg['PREV_REFUSED_RATIO'] = prev_agg['PREV_REFUSED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
prev_agg['PREV_APPROVED_RATIO'] = prev_agg['PREV_APPROVED_COUNT']/prev_agg['PREV_SK_ID_CURR_COUNT']
# 'PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT' 컬럼 drop
prev_agg = prev_agg.drop(['PREV_REFUSED_COUNT', 'PREV_APPROVED_COUNT'], axis=1)
return prev_agg
def get_apps_all_with_prev_agg(apps, prev):
apps_all = get_apps_processed(apps)
prev_agg = get_prev_agg(prev)
print('prev_agg shape:', prev_agg.shape)
print('apps_all before merge shape:', apps_all.shape)
apps_all = apps_all.merge(prev_agg, on='SK_ID_CURR', how='left')
print('apps_all after merge with prev_agg shape:', apps_all.shape)
return apps_all
def get_apps_all_encoded(apps_all):
object_columns = apps_all.dtypes[apps_all.dtypes == 'object'].index.tolist()
for column in object_columns:
apps_all[column] = pd.factorize(apps_all[column])[0]
return apps_all
def get_apps_all_train_test(apps_all):
apps_all_train = apps_all[~apps_all['TARGET'].isnull()]
apps_all_test = apps_all[apps_all['TARGET'].isnull()]
apps_all_test = apps_all_test.drop('TARGET', axis=1)
return apps_all_train, apps_all_test
def train_apps_all(apps_all_train):
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
clf = LGBMClassifier(
nthread=4,
n_estimators=2000,
learning_rate=0.01,
num_leaves=32,
colsample_bytree=0.8,
subsample=0.8,
max_depth=8,
reg_alpha=0.04,
reg_lambda=0.07,
min_child_weight=40,
silent=-1,
verbose=-1,
)
clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100,
early_stopping_rounds= 100)
return clf
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)
# bayesian optimization 패키지 설치
!pip install bayesian-optimization
from bayes_opt import BayesianOptimization # 1.2.0
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier
bayesian_params = {
'max_depth': (6, 16),
'num_leaves': (24, 64),
'min_child_samples': (10, 200),
'min_child_weight': (1, 50),
'subsample': (0.5, 1), # 0.5 이하로 가도 크게 좋아지는게 없음
'colsample_bytree': (0.5, 1),
'max_bin': (10, 500),
'reg_lambda': (0.001, 10),
'reg_alpha': (0.01, 50)
}
def lgb_roc_eval(max_depth, num_leaves, min_child_samples, min_child_weight, subsample,
colsample_bytree, max_bin, reg_lambda, reg_alpha):
params = {
'n_estimators': 500, # 1000이면 시간이 너무 오래 걸린다.
'learning_rate': 0.02,
'max_depth': int(round(max_depth)), # max_depth와 num_leaves는 실수형으로 받아지기에 정수형으로 바꿔줘야한다.
'num_leaves': int(round(num_leaves)),
'min_child_samples': int(round(min_child_samples)), # min_child_samples와 min_child_weight는 실수형으로 받아지기에 정수형으로 바꿔줘야한다.
'min_child_weight': int(round(min_child_weight)),
'subsample': max(min(subsample, 1), 0), # 위에서 필터링이 되긴했지만 다시 한번 제약해주는것이 좋다.
'colsample_bytree': max(min(colsample_bytree, 1), 0),
'max_bin': max(int(round(max_bin)),10),
'reg_lambda': max(reg_lambda,0),
'reg_alpha': max(reg_alpha, 0)
}
# print(params)
lgb_model = LGBMClassifier(**params)
lgb_model.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100,
early_stopping_rounds= 100)
valid_proba = lgb_model.predict_proba(valid_x)[:, 1]
roc_auc = roc_auc_score(valid_y, valid_proba)
return roc_auc
# BayesianOptimization객체를 수행할 함수와 search할 parameter 범위를 설정하여 생성.
lgbBO = BayesianOptimization(f=lgb_roc_eval, pbounds=bayesian_params, random_state=0)
# 함수 반환값이 최대가 되는 입력값 유추를 위한 iteration 수행. (24m 3s)
lgbBO.maximize(init_points=5, n_iter=25)
| iter | target | colsam... | max_bin | max_depth | min_ch... | min_ch... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.769375 training's binary_logloss: 0.246057 valid_1's auc: 0.755179 valid_1's binary_logloss: 0.248986
[200] training's auc: 0.787681 training's binary_logloss: 0.238342 valid_1's auc: 0.766691 valid_1's binary_logloss: 0.244129
[300] training's auc: 0.799234 training's binary_logloss: 0.233876 valid_1's auc: 0.77183 valid_1's binary_logloss: 0.242237
[400] training's auc: 0.808068 training's binary_logloss: 0.230569 valid_1's auc: 0.774252 valid_1's binary_logloss: 0.241339
[500] training's auc: 0.816074 training's binary_logloss: 0.227608 valid_1's auc: 0.775833 valid_1's binary_logloss: 0.240803
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.816074 training's binary_logloss: 0.227608 valid_1's auc: 0.775833 valid_1's binary_logloss: 0.240803
| [0m 1 [0m | [0m 0.7758 [0m | [0m 0.7744 [0m | [0m 360.4 [0m | [0m 12.03 [0m | [0m 113.5 [0m | [0m 21.76 [0m | [0m 49.84 [0m | [0m 21.88 [0m | [0m 8.918 [0m | [0m 0.9818 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.762756 training's binary_logloss: 0.247406 valid_1's auc: 0.753947 valid_1's binary_logloss: 0.249065
[200] training's auc: 0.780297 training's binary_logloss: 0.240447 valid_1's auc: 0.765879 valid_1's binary_logloss: 0.244232
[300] training's auc: 0.790938 training's binary_logloss: 0.236473 valid_1's auc: 0.771249 valid_1's binary_logloss: 0.24227
[400] training's auc: 0.799034 training's binary_logloss: 0.23352 valid_1's auc: 0.774004 valid_1's binary_logloss: 0.241278
[500] training's auc: 0.805948 training's binary_logloss: 0.231055 valid_1's auc: 0.775677 valid_1's binary_logloss: 0.240701
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.805948 training's binary_logloss: 0.231055 valid_1's auc: 0.775677 valid_1's binary_logloss: 0.240701
| [0m 2 [0m | [0m 0.7757 [0m | [0m 0.6917 [0m | [0m 397.9 [0m | [0m 11.29 [0m | [0m 117.9 [0m | [0m 46.35 [0m | [0m 26.84 [0m | [0m 4.366 [0m | [0m 0.2032 [0m | [0m 0.9163 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.775744 training's binary_logloss: 0.24382 valid_1's auc: 0.757621 valid_1's binary_logloss: 0.247886
[200] training's auc: 0.797027 training's binary_logloss: 0.235008 valid_1's auc: 0.768961 valid_1's binary_logloss: 0.24316
[300] training's auc: 0.811885 training's binary_logloss: 0.229277 valid_1's auc: 0.773437 valid_1's binary_logloss: 0.24154
[400] training's auc: 0.824812 training's binary_logloss: 0.224474 valid_1's auc: 0.776204 valid_1's binary_logloss: 0.240563
[500] training's auc: 0.835466 training's binary_logloss: 0.220404 valid_1's auc: 0.776976 valid_1's binary_logloss: 0.24022
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.835466 training's binary_logloss: 0.220404 valid_1's auc: 0.776976 valid_1's binary_logloss: 0.24022
| [95m 3 [0m | [95m 0.777 [0m | [95m 0.8891 [0m | [95m 436.3 [0m | [95m 15.79 [0m | [95m 161.8 [0m | [95m 23.61 [0m | [95m 55.22 [0m | [95m 5.923 [0m | [95m 6.4 [0m | [95m 0.5717 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.765688 training's binary_logloss: 0.246918 valid_1's auc: 0.753479 valid_1's binary_logloss: 0.249319
[200] training's auc: 0.783005 training's binary_logloss: 0.239795 valid_1's auc: 0.765172 valid_1's binary_logloss: 0.244577
[300] training's auc: 0.793616 training's binary_logloss: 0.23576 valid_1's auc: 0.770608 valid_1's binary_logloss: 0.242612
[400] training's auc: 0.801659 training's binary_logloss: 0.232783 valid_1's auc: 0.773426 valid_1's binary_logloss: 0.241615
[500] training's auc: 0.808285 training's binary_logloss: 0.230323 valid_1's auc: 0.774921 valid_1's binary_logloss: 0.241083
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.808285 training's binary_logloss: 0.230323 valid_1's auc: 0.774921 valid_1's binary_logloss: 0.241083
| [0m 4 [0m | [0m 0.7749 [0m | [0m 0.9723 [0m | [0m 265.7 [0m | [0m 10.15 [0m | [0m 60.27 [0m | [0m 38.94 [0m | [0m 42.25 [0m | [0m 28.43 [0m | [0m 0.1889 [0m | [0m 0.8088 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.765595 training's binary_logloss: 0.247278 valid_1's auc: 0.75368 valid_1's binary_logloss: 0.249609
[200] training's auc: 0.781691 training's binary_logloss: 0.240311 valid_1's auc: 0.764539 valid_1's binary_logloss: 0.244863
[300] training's auc: 0.791658 training's binary_logloss: 0.236498 valid_1's auc: 0.769765 valid_1's binary_logloss: 0.242951
[400] training's auc: 0.799097 training's binary_logloss: 0.233748 valid_1's auc: 0.772393 valid_1's binary_logloss: 0.242026
[500] training's auc: 0.805557 training's binary_logloss: 0.231406 valid_1's auc: 0.774123 valid_1's binary_logloss: 0.241423
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.805557 training's binary_logloss: 0.231406 valid_1's auc: 0.774123 valid_1's binary_logloss: 0.241423
| [0m 5 [0m | [0m 0.7741 [0m | [0m 0.806 [0m | [0m 312.3 [0m | [0m 15.44 [0m | [0m 139.5 [0m | [0m 18.62 [0m | [0m 41.48 [0m | [0m 34.88 [0m | [0m 0.6032 [0m | [0m 0.8334 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.778657 training's binary_logloss: 0.243805 valid_1's auc: 0.759224 valid_1's binary_logloss: 0.248021
[200] training's auc: 0.797782 training's binary_logloss: 0.234994 valid_1's auc: 0.768746 valid_1's binary_logloss: 0.243256
[300] training's auc: 0.812213 training's binary_logloss: 0.229331 valid_1's auc: 0.773293 valid_1's binary_logloss: 0.241553
[400] training's auc: 0.824605 training's binary_logloss: 0.224605 valid_1's auc: 0.775881 valid_1's binary_logloss: 0.240608
[500] training's auc: 0.835444 training's binary_logloss: 0.220508 valid_1's auc: 0.777366 valid_1's binary_logloss: 0.240098
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.835444 training's binary_logloss: 0.220508 valid_1's auc: 0.777366 valid_1's binary_logloss: 0.240098
| [95m 6 [0m | [95m 0.7774 [0m | [95m 0.6405 [0m | [95m 435.0 [0m | [95m 13.5 [0m | [95m 169.3 [0m | [95m 26.92 [0m | [95m 57.69 [0m | [95m 5.768 [0m | [95m 9.196 [0m | [95m 0.613 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.766035 training's binary_logloss: 0.247565 valid_1's auc: 0.754549 valid_1's binary_logloss: 0.249569
[200] training's auc: 0.780739 training's binary_logloss: 0.240964 valid_1's auc: 0.764736 valid_1's binary_logloss: 0.244872
[300] training's auc: 0.789504 training's binary_logloss: 0.237458 valid_1's auc: 0.769758 valid_1's binary_logloss: 0.24294
[400] training's auc: 0.795707 training's binary_logloss: 0.235043 valid_1's auc: 0.772359 valid_1's binary_logloss: 0.241973
[500] training's auc: 0.800676 training's binary_logloss: 0.23313 valid_1's auc: 0.77394 valid_1's binary_logloss: 0.241372
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.800676 training's binary_logloss: 0.23313 valid_1's auc: 0.77394 valid_1's binary_logloss: 0.241372
| [0m 7 [0m | [0m 0.7739 [0m | [0m 0.7422 [0m | [0m 428.5 [0m | [0m 6.316 [0m | [0m 194.0 [0m | [0m 44.28 [0m | [0m 50.76 [0m | [0m 26.01 [0m | [0m 5.85 [0m | [0m 0.9474 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.779486 training's binary_logloss: 0.244217 valid_1's auc: 0.760727 valid_1's binary_logloss: 0.248149
[200] training's auc: 0.796419 training's binary_logloss: 0.235656 valid_1's auc: 0.768793 valid_1's binary_logloss: 0.243432
[300] training's auc: 0.810285 training's binary_logloss: 0.230168 valid_1's auc: 0.77343 valid_1's binary_logloss: 0.241651
[400] training's auc: 0.821917 training's binary_logloss: 0.225749 valid_1's auc: 0.775774 valid_1's binary_logloss: 0.240811
[500] training's auc: 0.832404 training's binary_logloss: 0.221754 valid_1's auc: 0.777248 valid_1's binary_logloss: 0.240267
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.832404 training's binary_logloss: 0.221754 valid_1's auc: 0.777248 valid_1's binary_logloss: 0.240267
| [0m 8 [0m | [0m 0.7772 [0m | [0m 0.5334 [0m | [0m 449.0 [0m | [0m 12.21 [0m | [0m 174.8 [0m | [0m 29.78 [0m | [0m 62.2 [0m | [0m 9.646 [0m | [0m 8.556 [0m | [0m 0.716 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.778361 training's binary_logloss: 0.244066 valid_1's auc: 0.759265 valid_1's binary_logloss: 0.24797
[200] training's auc: 0.796634 training's binary_logloss: 0.235804 valid_1's auc: 0.768636 valid_1's binary_logloss: 0.243387
[300] training's auc: 0.808036 training's binary_logloss: 0.231091 valid_1's auc: 0.772468 valid_1's binary_logloss: 0.241819
[400] training's auc: 0.816655 training's binary_logloss: 0.227676 valid_1's auc: 0.774757 valid_1's binary_logloss: 0.24098
[500] training's auc: 0.824171 training's binary_logloss: 0.224775 valid_1's auc: 0.775976 valid_1's binary_logloss: 0.240529
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.824171 training's binary_logloss: 0.224775 valid_1's auc: 0.775976 valid_1's binary_logloss: 0.240529
| [0m 9 [0m | [0m 0.776 [0m | [0m 0.709 [0m | [0m 436.9 [0m | [0m 7.36 [0m | [0m 154.5 [0m | [0m 41.19 [0m | [0m 61.92 [0m | [0m 5.192 [0m | [0m 8.185 [0m | [0m 0.6971 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.776197 training's binary_logloss: 0.244176 valid_1's auc: 0.757329 valid_1's binary_logloss: 0.248272
[200] training's auc: 0.795529 training's binary_logloss: 0.235796 valid_1's auc: 0.767715 valid_1's binary_logloss: 0.243633
[300] training's auc: 0.808846 training's binary_logloss: 0.230566 valid_1's auc: 0.772539 valid_1's binary_logloss: 0.241868
[400] training's auc: 0.819773 training's binary_logloss: 0.226471 valid_1's auc: 0.774818 valid_1's binary_logloss: 0.241076
[500] training's auc: 0.829609 training's binary_logloss: 0.222731 valid_1's auc: 0.776241 valid_1's binary_logloss: 0.240578
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.829609 training's binary_logloss: 0.222731 valid_1's auc: 0.776241 valid_1's binary_logloss: 0.240578
| [0m 10 [0m | [0m 0.7762 [0m | [0m 0.8272 [0m | [0m 454.0 [0m | [0m 8.843 [0m | [0m 172.3 [0m | [0m 7.795 [0m | [0m 62.48 [0m | [0m 12.96 [0m | [0m 9.245 [0m | [0m 0.7441 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.781786 training's binary_logloss: 0.243117 valid_1's auc: 0.761 valid_1's binary_logloss: 0.247638
[200] training's auc: 0.800144 training's binary_logloss: 0.234309 valid_1's auc: 0.769258 valid_1's binary_logloss: 0.243081
[300] training's auc: 0.814131 training's binary_logloss: 0.228737 valid_1's auc: 0.773378 valid_1's binary_logloss: 0.241467
[400] training's auc: 0.824815 training's binary_logloss: 0.224639 valid_1's auc: 0.775246 valid_1's binary_logloss: 0.240755
[500] training's auc: 0.834881 training's binary_logloss: 0.220776 valid_1's auc: 0.776495 valid_1's binary_logloss: 0.240307
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.834881 training's binary_logloss: 0.220776 valid_1's auc: 0.776495 valid_1's binary_logloss: 0.240307
| [0m 11 [0m | [0m 0.7765 [0m | [0m 0.5893 [0m | [0m 454.3 [0m | [0m 8.131 [0m | [0m 177.8 [0m | [0m 27.34 [0m | [0m 60.68 [0m | [0m 4.726 [0m | [0m 1.43 [0m | [0m 0.8557 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.779788 training's binary_logloss: 0.243626 valid_1's auc: 0.761093 valid_1's binary_logloss: 0.247734
[200] training's auc: 0.798918 training's binary_logloss: 0.23461 valid_1's auc: 0.769601 valid_1's binary_logloss: 0.243006
[300] training's auc: 0.814191 training's binary_logloss: 0.228701 valid_1's auc: 0.773467 valid_1's binary_logloss: 0.241427
[400] training's auc: 0.82738 training's binary_logloss: 0.223755 valid_1's auc: 0.775969 valid_1's binary_logloss: 0.240535
[500] training's auc: 0.838862 training's binary_logloss: 0.219465 valid_1's auc: 0.777154 valid_1's binary_logloss: 0.240107
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.838862 training's binary_logloss: 0.219465 valid_1's auc: 0.777154 valid_1's binary_logloss: 0.240107
| [0m 12 [0m | [0m 0.7772 [0m | [0m 0.5053 [0m | [0m 415.8 [0m | [0m 14.28 [0m | [0m 173.3 [0m | [0m 14.82 [0m | [0m 54.13 [0m | [0m 1.876 [0m | [0m 3.377 [0m | [0m 0.9288 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.780625 training's binary_logloss: 0.242042 valid_1's auc: 0.75829 valid_1's binary_logloss: 0.24744
[200] training's auc: 0.804935 training's binary_logloss: 0.232207 valid_1's auc: 0.769608 valid_1's binary_logloss: 0.24281
[300] training's auc: 0.822925 training's binary_logloss: 0.225391 valid_1's auc: 0.774052 valid_1's binary_logloss: 0.241209
[400] training's auc: 0.838204 training's binary_logloss: 0.219677 valid_1's auc: 0.77614 valid_1's binary_logloss: 0.240494
[500] training's auc: 0.850876 training's binary_logloss: 0.214759 valid_1's auc: 0.776933 valid_1's binary_logloss: 0.240206
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.850876 training's binary_logloss: 0.214759 valid_1's auc: 0.776933 valid_1's binary_logloss: 0.240206
| [0m 13 [0m | [0m 0.7769 [0m | [0m 0.9517 [0m | [0m 404.2 [0m | [0m 13.15 [0m | [0m 169.7 [0m | [0m 11.36 [0m | [0m 60.67 [0m | [0m 2.685 [0m | [0m 1.096 [0m | [0m 0.5336 [0m |
[LightGBM] [Warning] Accuracy may be bad since you didn't explicitly set num_leaves OR 2^max_depth > num_leaves. (num_leaves=31).
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.761664 training's binary_logloss: 0.247856 valid_1's auc: 0.752443 valid_1's binary_logloss: 0.249556
[200] training's auc: 0.778738 training's binary_logloss: 0.241078 valid_1's auc: 0.764543 valid_1's binary_logloss: 0.244749
[300] training's auc: 0.788493 training's binary_logloss: 0.23738 valid_1's auc: 0.770193 valid_1's binary_logloss: 0.242752
[400] training's auc: 0.795566 training's binary_logloss: 0.234773 valid_1's auc: 0.772731 valid_1's binary_logloss: 0.241837
[500] training's auc: 0.801519 training's binary_logloss: 0.232602 valid_1's auc: 0.774401 valid_1's binary_logloss: 0.241236
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.801519 training's binary_logloss: 0.232602 valid_1's auc: 0.774401 valid_1's binary_logloss: 0.241236
| [0m 14 [0m | [0m 0.7744 [0m | [0m 0.9822 [0m | [0m 402.2 [0m | [0m 7.176 [0m | [0m 156.8 [0m | [0m 1.974 [0m | [0m 31.25 [0m | [0m 18.99 [0m | [0m 9.559 [0m | [0m 0.8367 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.781305 training's binary_logloss: 0.24224 valid_1's auc: 0.759845 valid_1's binary_logloss: 0.247339
[200] training's auc: 0.804132 training's binary_logloss: 0.232571 valid_1's auc: 0.7702 valid_1's binary_logloss: 0.242716
[300] training's auc: 0.82056 training's binary_logloss: 0.22617 valid_1's auc: 0.773939 valid_1's binary_logloss: 0.241311
[400] training's auc: 0.834494 training's binary_logloss: 0.220836 valid_1's auc: 0.776291 valid_1's binary_logloss: 0.240484
[500] training's auc: 0.846112 training's binary_logloss: 0.216345 valid_1's auc: 0.777442 valid_1's binary_logloss: 0.240088
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.846112 training's binary_logloss: 0.216345 valid_1's auc: 0.777442 valid_1's binary_logloss: 0.240088
| [95m 15 [0m | [95m 0.7774 [0m | [95m 0.7871 [0m | [95m 405.1 [0m | [95m 10.03 [0m | [95m 165.2 [0m | [95m 21.33 [0m | [95m 61.87 [0m | [95m 2.02 [0m | [95m 2.977 [0m | [95m 0.9291 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.782211 training's binary_logloss: 0.24224 valid_1's auc: 0.759948 valid_1's binary_logloss: 0.247453
[200] training's auc: 0.805055 training's binary_logloss: 0.232465 valid_1's auc: 0.770092 valid_1's binary_logloss: 0.242782
[300] training's auc: 0.822634 training's binary_logloss: 0.225782 valid_1's auc: 0.774303 valid_1's binary_logloss: 0.241222
[400] training's auc: 0.837137 training's binary_logloss: 0.220207 valid_1's auc: 0.776537 valid_1's binary_logloss: 0.240432
[500] training's auc: 0.849431 training's binary_logloss: 0.215529 valid_1's auc: 0.777267 valid_1's binary_logloss: 0.240164
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.849431 training's binary_logloss: 0.215529 valid_1's auc: 0.777267 valid_1's binary_logloss: 0.240164
| [0m 16 [0m | [0m 0.7773 [0m | [0m 0.7169 [0m | [0m 401.3 [0m | [0m 14.48 [0m | [0m 168.2 [0m | [0m 9.4 [0m | [0m 62.02 [0m | [0m 1.055 [0m | [0m 5.135 [0m | [0m 0.5538 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.783864 training's binary_logloss: 0.242193 valid_1's auc: 0.76167 valid_1's binary_logloss: 0.247351
[200] training's auc: 0.804527 training's binary_logloss: 0.232573 valid_1's auc: 0.770292 valid_1's binary_logloss: 0.242711
[300] training's auc: 0.82135 training's binary_logloss: 0.226009 valid_1's auc: 0.77442 valid_1's binary_logloss: 0.241114
[400] training's auc: 0.836029 training's binary_logloss: 0.220387 valid_1's auc: 0.776879 valid_1's binary_logloss: 0.240243
[500] training's auc: 0.848685 training's binary_logloss: 0.215547 valid_1's auc: 0.777961 valid_1's binary_logloss: 0.239883
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.848685 training's binary_logloss: 0.215547 valid_1's auc: 0.777961 valid_1's binary_logloss: 0.239883
| [95m 17 [0m | [95m 0.778 [0m | [95m 0.5824 [0m | [95m 371.2 [0m | [95m 15.06 [0m | [95m 168.3 [0m | [95m 23.89 [0m | [95m 63.79 [0m | [95m 3.633 [0m | [95m 0.3227 [0m | [95m 0.6384 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.774397 training's binary_logloss: 0.244187 valid_1's auc: 0.757294 valid_1's binary_logloss: 0.247932
[200] training's auc: 0.795365 training's binary_logloss: 0.23552 valid_1's auc: 0.768842 valid_1's binary_logloss: 0.243171
[300] training's auc: 0.809889 training's binary_logloss: 0.230002 valid_1's auc: 0.773203 valid_1's binary_logloss: 0.241577
[400] training's auc: 0.822257 training's binary_logloss: 0.225356 valid_1's auc: 0.775991 valid_1's binary_logloss: 0.240605
[500] training's auc: 0.832652 training's binary_logloss: 0.221477 valid_1's auc: 0.777198 valid_1's binary_logloss: 0.240181
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.832652 training's binary_logloss: 0.221477 valid_1's auc: 0.777198 valid_1's binary_logloss: 0.240181
| [0m 18 [0m | [0m 0.7772 [0m | [0m 0.8981 [0m | [0m 375.2 [0m | [0m 14.29 [0m | [0m 163.0 [0m | [0m 46.3 [0m | [0m 50.27 [0m | [0m 2.12 [0m | [0m 7.75 [0m | [0m 0.5721 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.776712 training's binary_logloss: 0.243681 valid_1's auc: 0.758389 valid_1's binary_logloss: 0.247737
[200] training's auc: 0.79801 training's binary_logloss: 0.234737 valid_1's auc: 0.768899 valid_1's binary_logloss: 0.243104
[300] training's auc: 0.813364 training's binary_logloss: 0.22888 valid_1's auc: 0.773103 valid_1's binary_logloss: 0.241565
[400] training's auc: 0.826068 training's binary_logloss: 0.224065 valid_1's auc: 0.775823 valid_1's binary_logloss: 0.240631
[500] training's auc: 0.836805 training's binary_logloss: 0.220032 valid_1's auc: 0.776766 valid_1's binary_logloss: 0.240291
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.836805 training's binary_logloss: 0.220032 valid_1's auc: 0.776766 valid_1's binary_logloss: 0.240291
| [0m 19 [0m | [0m 0.7768 [0m | [0m 0.8035 [0m | [0m 369.6 [0m | [0m 15.2 [0m | [0m 190.2 [0m | [0m 31.27 [0m | [0m 52.61 [0m | [0m 0.8187 [0m | [0m 9.05 [0m | [0m 0.7846 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.775143 training's binary_logloss: 0.244871 valid_1's auc: 0.757308 valid_1's binary_logloss: 0.24858
[200] training's auc: 0.793133 training's binary_logloss: 0.236696 valid_1's auc: 0.767305 valid_1's binary_logloss: 0.24385
[300] training's auc: 0.805686 training's binary_logloss: 0.231768 valid_1's auc: 0.771941 valid_1's binary_logloss: 0.242104
[400] training's auc: 0.815912 training's binary_logloss: 0.22789 valid_1's auc: 0.774142 valid_1's binary_logloss: 0.24129
[500] training's auc: 0.825252 training's binary_logloss: 0.224378 valid_1's auc: 0.775666 valid_1's binary_logloss: 0.240743
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.825252 training's binary_logloss: 0.224378 valid_1's auc: 0.775666 valid_1's binary_logloss: 0.240743
| [0m 20 [0m | [0m 0.7757 [0m | [0m 0.7313 [0m | [0m 368.5 [0m | [0m 14.43 [0m | [0m 160.4 [0m | [0m 38.38 [0m | [0m 63.29 [0m | [0m 21.19 [0m | [0m 2.212 [0m | [0m 0.6823 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.779656 training's binary_logloss: 0.242874 valid_1's auc: 0.75918 valid_1's binary_logloss: 0.247615
[200] training's auc: 0.801697 training's binary_logloss: 0.233473 valid_1's auc: 0.769144 valid_1's binary_logloss: 0.243028
[300] training's auc: 0.818 training's binary_logloss: 0.227175 valid_1's auc: 0.773828 valid_1's binary_logloss: 0.241325
[400] training's auc: 0.831466 training's binary_logloss: 0.222071 valid_1's auc: 0.776094 valid_1's binary_logloss: 0.240513
[500] training's auc: 0.842826 training's binary_logloss: 0.217744 valid_1's auc: 0.777063 valid_1's binary_logloss: 0.24015
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.842826 training's binary_logloss: 0.217744 valid_1's auc: 0.777063 valid_1's binary_logloss: 0.24015
| [0m 21 [0m | [0m 0.7771 [0m | [0m 0.7668 [0m | [0m 376.6 [0m | [0m 10.55 [0m | [0m 159.1 [0m | [0m 12.95 [0m | [0m 58.19 [0m | [0m 1.11 [0m | [0m 6.881 [0m | [0m 0.9496 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.77615 training's binary_logloss: 0.243649 valid_1's auc: 0.756564 valid_1's binary_logloss: 0.248151
[200] training's auc: 0.797533 training's binary_logloss: 0.234817 valid_1's auc: 0.768033 valid_1's binary_logloss: 0.243493
[300] training's auc: 0.812703 training's binary_logloss: 0.229025 valid_1's auc: 0.772842 valid_1's binary_logloss: 0.241797
[400] training's auc: 0.825473 training's binary_logloss: 0.224268 valid_1's auc: 0.775308 valid_1's binary_logloss: 0.240927
[500] training's auc: 0.836724 training's binary_logloss: 0.220024 valid_1's auc: 0.77686 valid_1's binary_logloss: 0.240394
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.836724 training's binary_logloss: 0.220024 valid_1's auc: 0.77686 valid_1's binary_logloss: 0.240394
| [0m 22 [0m | [0m 0.7769 [0m | [0m 0.9772 [0m | [0m 346.7 [0m | [0m 13.88 [0m | [0m 181.4 [0m | [0m 4.808 [0m | [0m 62.99 [0m | [0m 11.53 [0m | [0m 3.84 [0m | [0m 0.9113 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.777335 training's binary_logloss: 0.243067 valid_1's auc: 0.757873 valid_1's binary_logloss: 0.247564
[200] training's auc: 0.799929 training's binary_logloss: 0.233796 valid_1's auc: 0.769193 valid_1's binary_logloss: 0.242922
[300] training's auc: 0.815816 training's binary_logloss: 0.227695 valid_1's auc: 0.773629 valid_1's binary_logloss: 0.241334
[400] training's auc: 0.828929 training's binary_logloss: 0.222738 valid_1's auc: 0.775752 valid_1's binary_logloss: 0.24056
[500] training's auc: 0.840077 training's binary_logloss: 0.218474 valid_1's auc: 0.776472 valid_1's binary_logloss: 0.240302
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.840077 training's binary_logloss: 0.218474 valid_1's auc: 0.776472 valid_1's binary_logloss: 0.240302
| [0m 23 [0m | [0m 0.7765 [0m | [0m 0.9556 [0m | [0m 395.9 [0m | [0m 12.36 [0m | [0m 166.4 [0m | [0m 43.55 [0m | [0m 57.36 [0m | [0m 2.284 [0m | [0m 3.768 [0m | [0m 0.9566 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.782723 training's binary_logloss: 0.242071 valid_1's auc: 0.760665 valid_1's binary_logloss: 0.247344
[200] training's auc: 0.805964 training's binary_logloss: 0.232194 valid_1's auc: 0.770315 valid_1's binary_logloss: 0.242684
[300] training's auc: 0.823837 training's binary_logloss: 0.225434 valid_1's auc: 0.774051 valid_1's binary_logloss: 0.241213
[400] training's auc: 0.839027 training's binary_logloss: 0.219772 valid_1's auc: 0.776443 valid_1's binary_logloss: 0.240397
[500] training's auc: 0.852246 training's binary_logloss: 0.214837 valid_1's auc: 0.777449 valid_1's binary_logloss: 0.240067
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.852246 training's binary_logloss: 0.214837 valid_1's auc: 0.777449 valid_1's binary_logloss: 0.240067
| [0m 24 [0m | [0m 0.7774 [0m | [0m 0.6478 [0m | [0m 367.7 [0m | [0m 11.73 [0m | [0m 186.2 [0m | [0m 3.654 [0m | [0m 60.46 [0m | [0m 1.467 [0m | [0m 0.2006 [0m | [0m 0.9172 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.781805 training's binary_logloss: 0.242163 valid_1's auc: 0.760247 valid_1's binary_logloss: 0.24732
[200] training's auc: 0.804572 training's binary_logloss: 0.23237 valid_1's auc: 0.770314 valid_1's binary_logloss: 0.242677
[300] training's auc: 0.821572 training's binary_logloss: 0.225842 valid_1's auc: 0.774201 valid_1's binary_logloss: 0.241208
[400] training's auc: 0.835996 training's binary_logloss: 0.220404 valid_1's auc: 0.77626 valid_1's binary_logloss: 0.240527
[500] training's auc: 0.84808 training's binary_logloss: 0.2157 valid_1's auc: 0.776885 valid_1's binary_logloss: 0.240276
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.84808 training's binary_logloss: 0.2157 valid_1's auc: 0.776885 valid_1's binary_logloss: 0.240276
| [0m 25 [0m | [0m 0.7769 [0m | [0m 0.7182 [0m | [0m 339.7 [0m | [0m 13.76 [0m | [0m 167.5 [0m | [0m 32.55 [0m | [0m 61.99 [0m | [0m 0.4836 [0m | [0m 3.174 [0m | [0m 0.6534 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.769685 training's binary_logloss: 0.247401 valid_1's auc: 0.757291 valid_1's binary_logloss: 0.249538
[200] training's auc: 0.783386 training's binary_logloss: 0.240356 valid_1's auc: 0.76498 valid_1's binary_logloss: 0.244993
[300] training's auc: 0.7936 training's binary_logloss: 0.236349 valid_1's auc: 0.769056 valid_1's binary_logloss: 0.243352
[400] training's auc: 0.801776 training's binary_logloss: 0.233311 valid_1's auc: 0.771078 valid_1's binary_logloss: 0.242581
[500] training's auc: 0.808916 training's binary_logloss: 0.230676 valid_1's auc: 0.772438 valid_1's binary_logloss: 0.242114
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.808916 training's binary_logloss: 0.230676 valid_1's auc: 0.772438 valid_1's binary_logloss: 0.242114
| [0m 26 [0m | [0m 0.7724 [0m | [0m 0.5199 [0m | [0m 10.01 [0m | [0m 14.57 [0m | [0m 198.8 [0m | [0m 38.84 [0m | [0m 63.26 [0m | [0m 18.88 [0m | [0m 3.318 [0m | [0m 0.6235 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.757162 training's binary_logloss: 0.249588 valid_1's auc: 0.750677 valid_1's binary_logloss: 0.25056
[200] training's auc: 0.77122 training's binary_logloss: 0.243617 valid_1's auc: 0.761312 valid_1's binary_logloss: 0.24592
[300] training's auc: 0.779719 training's binary_logloss: 0.240453 valid_1's auc: 0.767013 valid_1's binary_logloss: 0.243882
[400] training's auc: 0.785638 training's binary_logloss: 0.23835 valid_1's auc: 0.77012 valid_1's binary_logloss: 0.242793
[500] training's auc: 0.79 training's binary_logloss: 0.236784 valid_1's auc: 0.771919 valid_1's binary_logloss: 0.242159
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.79 training's binary_logloss: 0.236784 valid_1's auc: 0.771919 valid_1's binary_logloss: 0.242159
| [0m 27 [0m | [0m 0.7719 [0m | [0m 0.8943 [0m | [0m 71.76 [0m | [0m 7.739 [0m | [0m 10.96 [0m | [0m 22.24 [0m | [0m 25.94 [0m | [0m 46.3 [0m | [0m 4.912 [0m | [0m 0.7233 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.767905 training's binary_logloss: 0.247137 valid_1's auc: 0.753404 valid_1's binary_logloss: 0.249821
[200] training's auc: 0.783995 training's binary_logloss: 0.239932 valid_1's auc: 0.763746 valid_1's binary_logloss: 0.245166
[300] training's auc: 0.794479 training's binary_logloss: 0.235865 valid_1's auc: 0.768935 valid_1's binary_logloss: 0.243283
[400] training's auc: 0.802465 training's binary_logloss: 0.232906 valid_1's auc: 0.771801 valid_1's binary_logloss: 0.242311
[500] training's auc: 0.809264 training's binary_logloss: 0.230405 valid_1's auc: 0.773466 valid_1's binary_logloss: 0.241735
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.809264 training's binary_logloss: 0.230405 valid_1's auc: 0.773466 valid_1's binary_logloss: 0.241735
| [0m 28 [0m | [0m 0.7735 [0m | [0m 0.8024 [0m | [0m 498.8 [0m | [0m 13.15 [0m | [0m 11.37 [0m | [0m 43.17 [0m | [0m 59.01 [0m | [0m 48.69 [0m | [0m 6.392 [0m | [0m 0.6109 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.761464 training's binary_logloss: 0.247636 valid_1's auc: 0.753328 valid_1's binary_logloss: 0.249126
[200] training's auc: 0.779486 training's binary_logloss: 0.240682 valid_1's auc: 0.765682 valid_1's binary_logloss: 0.244307
[300] training's auc: 0.790491 training's binary_logloss: 0.236669 valid_1's auc: 0.771108 valid_1's binary_logloss: 0.242385
[400] training's auc: 0.798809 training's binary_logloss: 0.233708 valid_1's auc: 0.77386 valid_1's binary_logloss: 0.241398
[500] training's auc: 0.805537 training's binary_logloss: 0.231303 valid_1's auc: 0.775389 valid_1's binary_logloss: 0.240869
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.805537 training's binary_logloss: 0.231303 valid_1's auc: 0.775389 valid_1's binary_logloss: 0.240869
| [0m 29 [0m | [0m 0.7754 [0m | [0m 0.857 [0m | [0m 183.0 [0m | [0m 15.88 [0m | [0m 199.4 [0m | [0m 46.09 [0m | [0m 25.53 [0m | [0m 0.4676 [0m | [0m 4.877 [0m | [0m 0.7367 [0m |
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.779024 training's binary_logloss: 0.243553 valid_1's auc: 0.759806 valid_1's binary_logloss: 0.247839
[200] training's auc: 0.798203 training's binary_logloss: 0.234756 valid_1's auc: 0.769321 valid_1's binary_logloss: 0.243067
[300] training's auc: 0.813191 training's binary_logloss: 0.228992 valid_1's auc: 0.77358 valid_1's binary_logloss: 0.241398
[400] training's auc: 0.825551 training's binary_logloss: 0.224261 valid_1's auc: 0.776089 valid_1's binary_logloss: 0.240516
[500] training's auc: 0.836676 training's binary_logloss: 0.220061 valid_1's auc: 0.777719 valid_1's binary_logloss: 0.239922
Did not meet early stopping. Best iteration is:
[500] training's auc: 0.836676 training's binary_logloss: 0.220061 valid_1's auc: 0.777719 valid_1's binary_logloss: 0.239922
| [0m 30 [0m | [0m 0.7777 [0m | [0m 0.5717 [0m | [0m 375.7 [0m | [0m 9.301 [0m | [0m 21.45 [0m | [0m 1.047 [0m | [0m 56.95 [0m | [0m 5.258 [0m | [0m 1.837 [0m | [0m 0.5756 [0m |
=====================================================================================================================================
# BayesianOptimization객체의 res는 iteration 수행 시마다 모든 함수 반환결과와 그때의 파라미터 결과값을 가지고 있음.
lgbBO.res
[{'target': 0.7758329290960616,
'params': {'colsample_bytree': 0.7744067519636624,
'max_bin': 360.44278952248555,
'max_depth': 12.027633760716439,
'min_child_samples': 113.52780476941041,
'min_child_weight': 21.75908516760633,
'num_leaves': 49.835764522666246,
'reg_alpha': 21.884984691022,
'reg_lambda': 8.917838234820016,
'subsample': 0.9818313802505146}},
{'target': 0.7756769340648957,
'params': {'colsample_bytree': 0.6917207594128889,
'max_bin': 397.94526866050563,
'max_depth': 11.288949197529044,
'min_child_samples': 117.92846660784714,
'min_child_weight': 46.35423527634039,
'num_leaves': 26.841442327915477,
'reg_alpha': 4.36559369208002,
'reg_lambda': 0.20316375600581688,
'subsample': 0.916309922773969}},
{'target': 0.776976163031267,
'params': {'colsample_bytree': 0.8890783754749252,
'max_bin': 436.30595264094137,
'max_depth': 15.78618342232764,
'min_child_samples': 161.8401272011775,
'min_child_weight': 23.61248875039366,
'num_leaves': 55.22116705145822,
'reg_alpha': 5.922538549187972,
'reg_lambda': 6.3995702922539115,
'subsample': 0.5716766437045232}},
{'target': 0.7749205409939958,
'params': {'colsample_bytree': 0.972334458524792,
'max_bin': 265.70567765753515,
'max_depth': 10.146619399905235,
'min_child_samples': 60.265566299879126,
'min_child_weight': 38.93745078227661,
'num_leaves': 42.24601328866194,
'reg_alpha': 28.426013103943742,
'reg_lambda': 0.18887921456311507,
'subsample': 0.8088177485379385}},
{'target': 0.7741227529929269,
'params': {'colsample_bytree': 0.8060478613612108,
'max_bin': 312.2976584686309,
'max_depth': 15.437480785146242,
'min_child_samples': 139.54585682966186,
'min_child_weight': 18.615887128115514,
'num_leaves': 41.481278151973655,
'reg_alpha': 34.88458348440397,
'reg_lambda': 0.6031944908210691,
'subsample': 0.8333833577228338}},
{'target': 0.7773664953021925,
'params': {'colsample_bytree': 0.640481830861817,
'max_bin': 435.0379450370509,
'max_depth': 13.497244758196743,
'min_child_samples': 169.30259380663517,
'min_child_weight': 26.924368410534857,
'num_leaves': 57.69153705029583,
'reg_alpha': 5.7675960060342195,
'reg_lambda': 9.196441703351635,
'subsample': 0.6129607288812317}},
{'target': 0.7739399036759036,
'params': {'colsample_bytree': 0.7422042468655807,
'max_bin': 428.5493261036247,
'max_depth': 6.315514435919972,
'min_child_samples': 194.03208286885086,
'min_child_weight': 44.28076387569238,
'num_leaves': 50.75815245101985,
'reg_alpha': 26.00785767987684,
'reg_lambda': 5.84993578502173,
'subsample': 0.9474227457443503}},
{'target': 0.7772482274769529,
'params': {'colsample_bytree': 0.5333893019492069,
'max_bin': 449.0387151171313,
'max_depth': 12.212922607023643,
'min_child_samples': 174.84888599731988,
'min_child_weight': 29.7836599504897,
'num_leaves': 62.20165943944433,
'reg_alpha': 9.645626544452428,
'reg_lambda': 8.556212406693492,
'subsample': 0.7159695007014182}},
{'target': 0.7759758734462505,
'params': {'colsample_bytree': 0.7089988759927754,
'max_bin': 436.85365598017887,
'max_depth': 7.359631240798348,
'min_child_samples': 154.5385686463433,
'min_child_weight': 41.19294583040448,
'num_leaves': 61.92388746629491,
'reg_alpha': 5.192125778690125,
'reg_lambda': 8.185415661160636,
'subsample': 0.6971456156633256}},
{'target': 0.7762406739235685,
'params': {'colsample_bytree': 0.8272321630248705,
'max_bin': 453.9911503476196,
'max_depth': 8.843259651649694,
'min_child_samples': 172.30104987269675,
'min_child_weight': 7.794567008014467,
'num_leaves': 62.47625442010728,
'reg_alpha': 12.961703208570627,
'reg_lambda': 9.245360556729961,
'subsample': 0.7440701179773979}},
{'target': 0.7764952479787675,
'params': {'colsample_bytree': 0.5893094622743,
'max_bin': 454.25925743286984,
'max_depth': 8.130813897590308,
'min_child_samples': 177.81673100376875,
'min_child_weight': 27.340136372084796,
'num_leaves': 60.68177908320981,
'reg_alpha': 4.7264905489443665,
'reg_lambda': 1.4296350748227027,
'subsample': 0.8556525461153135}},
{'target': 0.7771535306175148,
'params': {'colsample_bytree': 0.5053422801156844,
'max_bin': 415.7599122539792,
'max_depth': 14.280675319405542,
'min_child_samples': 173.30396119532506,
'min_child_weight': 14.822481601041163,
'num_leaves': 54.129410624131786,
'reg_alpha': 1.8761533482688952,
'reg_lambda': 3.376708317045844,
'subsample': 0.9287789856743024}},
{'target': 0.7769328889682356,
'params': {'colsample_bytree': 0.9517481615795167,
'max_bin': 404.2477687077706,
'max_depth': 13.148965978396912,
'min_child_samples': 169.69849449576012,
'min_child_weight': 11.36388815948627,
'num_leaves': 60.66794952296474,
'reg_alpha': 2.6851476344067766,
'reg_lambda': 1.09593657785921,
'subsample': 0.5336474737485655}},
{'target': 0.7744011918766316,
'params': {'colsample_bytree': 0.9822387316159654,
'max_bin': 402.15668708369657,
'max_depth': 7.175625383553321,
'min_child_samples': 156.79778867788764,
'min_child_weight': 1.9743669564334434,
'num_leaves': 31.252474422161967,
'reg_alpha': 18.99065827078324,
'reg_lambda': 9.559304444136552,
'subsample': 0.8367140366287154}},
{'target': 0.7774422118078091,
'params': {'colsample_bytree': 0.7870918673886453,
'max_bin': 405.12979724439043,
'max_depth': 10.0348944523723,
'min_child_samples': 165.21563020282142,
'min_child_weight': 21.332203264120697,
'num_leaves': 61.87277394494194,
'reg_alpha': 2.0197273145317167,
'reg_lambda': 2.9766850887569554,
'subsample': 0.929100400337473}},
{'target': 0.7772670569283046,
'params': {'colsample_bytree': 0.7169009573287946,
'max_bin': 401.29920144608667,
'max_depth': 14.481611276112087,
'min_child_samples': 168.21885542889288,
'min_child_weight': 9.400377234221835,
'num_leaves': 62.01997716429825,
'reg_alpha': 1.0553812402166765,
'reg_lambda': 5.134675927721137,
'subsample': 0.5538168614372474}},
{'target': 0.777961302008304,
'params': {'colsample_bytree': 0.5824262661493007,
'max_bin': 371.1746026096842,
'max_depth': 15.055455354136251,
'min_child_samples': 168.3108071725216,
'min_child_weight': 23.886438476304335,
'num_leaves': 63.790243708508754,
'reg_alpha': 3.6327436106737037,
'reg_lambda': 0.3227467203381007,
'subsample': 0.6383581316575138}},
{'target': 0.7771975861964955,
'params': {'colsample_bytree': 0.8981278289798678,
'max_bin': 375.17904217514155,
'max_depth': 14.294028004875575,
'min_child_samples': 162.998161863493,
'min_child_weight': 46.30425396039739,
'num_leaves': 50.26814189766682,
'reg_alpha': 2.119872012202335,
'reg_lambda': 7.75026606227078,
'subsample': 0.5721235630566639}},
{'target': 0.7767660924971176,
'params': {'colsample_bytree': 0.8034531103857304,
'max_bin': 369.6242799781358,
'max_depth': 15.199032111021513,
'min_child_samples': 190.1676925160255,
'min_child_weight': 31.266432243538546,
'num_leaves': 52.61415162295391,
'reg_alpha': 0.8187448116861759,
'reg_lambda': 9.049503029730001,
'subsample': 0.7846166969136641}},
{'target': 0.7756663645382131,
'params': {'colsample_bytree': 0.7312713109541252,
'max_bin': 368.4964543106341,
'max_depth': 14.434582170850263,
'min_child_samples': 160.44131032493473,
'min_child_weight': 38.384280547403044,
'num_leaves': 63.29294304841721,
'reg_alpha': 21.18724675147456,
'reg_lambda': 2.211595823213631,
'subsample': 0.6823413334921987}},
{'target': 0.777062749280059,
'params': {'colsample_bytree': 0.7668126154328806,
'max_bin': 376.60150379275547,
'max_depth': 10.550433710014056,
'min_child_samples': 159.1173396522813,
'min_child_weight': 12.946995377253387,
'num_leaves': 58.18919500833737,
'reg_alpha': 1.1100895577632468,
'reg_lambda': 6.880549128249925,
'subsample': 0.9496423116711239}},
{'target': 0.7768597060356665,
'params': {'colsample_bytree': 0.9772157533520727,
'max_bin': 346.7436582292931,
'max_depth': 13.876254817263995,
'min_child_samples': 181.44265891413107,
'min_child_weight': 4.80780888246455,
'num_leaves': 62.986026421548885,
'reg_alpha': 11.525816750893272,
'reg_lambda': 3.84025929892199,
'subsample': 0.9112982141954422}},
{'target': 0.7764720217059765,
'params': {'colsample_bytree': 0.9555808114097751,
'max_bin': 395.92778276370973,
'max_depth': 12.358022314339884,
'min_child_samples': 166.36714200138337,
'min_child_weight': 43.54970412573226,
'num_leaves': 57.35810926133701,
'reg_alpha': 2.283967990656195,
'reg_lambda': 3.76832425549122,
'subsample': 0.9566072753814255}},
{'target': 0.7774493312274952,
'params': {'colsample_bytree': 0.6477934739226694,
'max_bin': 367.6675876644748,
'max_depth': 11.730941829672222,
'min_child_samples': 186.24556849168766,
'min_child_weight': 3.6543971928297454,
'num_leaves': 60.45787106398964,
'reg_alpha': 1.4674262765362538,
'reg_lambda': 0.20062291235053142,
'subsample': 0.917246239044115}},
{'target': 0.7768854976504461,
'params': {'colsample_bytree': 0.7181743901580139,
'max_bin': 339.74009241419066,
'max_depth': 13.755247729359372,
'min_child_samples': 167.4652128981975,
'min_child_weight': 32.54665377571506,
'num_leaves': 61.98705999392814,
'reg_alpha': 0.4835884388562424,
'reg_lambda': 3.174121356240033,
'subsample': 0.6533734295040801}},
{'target': 0.7724379618621265,
'params': {'colsample_bytree': 0.5198595964676527,
'max_bin': 10.009965107411501,
'max_depth': 14.567189854879851,
'min_child_samples': 198.83377097677672,
'min_child_weight': 38.83879647130123,
'num_leaves': 63.26339023800588,
'reg_alpha': 18.88483420805432,
'reg_lambda': 3.3178810933279985,
'subsample': 0.623510096683173}},
{'target': 0.7719190876904308,
'params': {'colsample_bytree': 0.8942600105113289,
'max_bin': 71.76236405792643,
'max_depth': 7.739261374710775,
'min_child_samples': 10.955232692724033,
'min_child_weight': 22.241171239900133,
'num_leaves': 25.937394082071023,
'reg_alpha': 46.30009587104107,
'reg_lambda': 4.911624003613104,
'subsample': 0.7233194243539027}},
{'target': 0.7734656076822688,
'params': {'colsample_bytree': 0.8023945725189823,
'max_bin': 498.8176421280064,
'max_depth': 13.147765338582632,
'min_child_samples': 11.368447925498895,
'min_child_weight': 43.16584829876573,
'num_leaves': 59.00751702322004,
'reg_alpha': 48.691441585857994,
'reg_lambda': 6.391621338153364,
'subsample': 0.6108519192680875}},
{'target': 0.7753891042822209,
'params': {'colsample_bytree': 0.8570231928678625,
'max_bin': 182.96367624517637,
'max_depth': 15.883849845960137,
'min_child_samples': 199.35390110332432,
'min_child_weight': 46.088945332777996,
'num_leaves': 25.529505286243747,
'reg_alpha': 0.46761095131558106,
'reg_lambda': 4.877007626377918,
'subsample': 0.736680130706592}},
{'target': 0.7777188541578983,
'params': {'colsample_bytree': 0.5717394348713019,
'max_bin': 375.7377466381185,
'max_depth': 9.3014465175732,
'min_child_samples': 21.453403880742023,
'min_child_weight': 1.0472175191797235,
'num_leaves': 56.95167223639347,
'reg_alpha': 5.258380504748422,
'reg_lambda': 1.8369714379535285,
'subsample': 0.5755759870388671}}]
# dictionary에 있는 target값을 모두 추출
target_list = []
for result in lgbBO.res:
target = result['target']
target_list.append(target)
print(target_list)
# 가장 큰 target 값을 가지는 순번(index)를 추출
print('maximum target index:', np.argmax(np.array(target_list)))
[0.7758329290960616, 0.7756769340648957, 0.776976163031267, 0.7749205409939958, 0.7741227529929269, 0.7773664953021925, 0.7739399036759036, 0.7772482274769529, 0.7759758734462505, 0.7762406739235685, 0.7764952479787675, 0.7771535306175148, 0.7769328889682356, 0.7744011918766316, 0.7774422118078091, 0.7772670569283046, 0.777961302008304, 0.7771975861964955, 0.7767660924971176, 0.7756663645382131, 0.777062749280059, 0.7768597060356665, 0.7764720217059765, 0.7774493312274952, 0.7768854976504461, 0.7724379618621265, 0.7719190876904308, 0.7734656076822688, 0.7753891042822209, 0.7777188541578983]
maximum target index: 16
# 가장 큰 target값을 가지는 index값을 기준으로 res에서 해당 parameter 추출.
max_dict = lgbBO.res[np.argmax(np.array(target_list))]
print(max_dict)
{'target': 0.777961302008304, 'params': {'colsample_bytree': 0.5824262661493007, 'max_bin': 371.1746026096842, 'max_depth': 15.055455354136251, 'min_child_samples': 168.3108071725216, 'min_child_weight': 23.886438476304335, 'num_leaves': 63.790243708508754, 'reg_alpha': 3.6327436106737037, 'reg_lambda': 0.3227467203381007, 'subsample': 0.6383581316575138}}
def train_apps_all(apps_all_train):
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
clf = LGBMClassifier(
nthread=4,
n_estimators=1000,
learning_rate=0.02,
max_depth = 15,
num_leaves=64,
colsample_bytree=0.582,
subsample=0.638,
max_bin=371,
reg_alpha=3.633,
reg_lambda=0.323,
min_child_weight=24,
min_child_samples=168,
silent=-1,
verbose=-1,
)
clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100,
early_stopping_rounds= 100)
return clf
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
clf = train_apps_all(apps_all_train)
prev_agg shape: (338857, 41)
apps_all before merge shape: (356255, 135)
apps_all after merge with prev_agg shape: (356255, 176)
train shape: (215257, 174) valid shape: (92254, 174)
[LightGBM] [Warning] num_threads is set with nthread=4, will be overridden by n_jobs=-1. Current value: num_threads=-1
Training until validation scores don't improve for 100 rounds
[100] training's auc: 0.783906 training's binary_logloss: 0.242191 valid_1's auc: 0.761466 valid_1's binary_logloss: 0.24739
[200] training's auc: 0.804487 training's binary_logloss: 0.23254 valid_1's auc: 0.770483 valid_1's binary_logloss: 0.242633
[300] training's auc: 0.821403 training's binary_logloss: 0.225993 valid_1's auc: 0.774525 valid_1's binary_logloss: 0.241079
[400] training's auc: 0.836004 training's binary_logloss: 0.220459 valid_1's auc: 0.776612 valid_1's binary_logloss: 0.240321
[500] training's auc: 0.848499 training's binary_logloss: 0.215588 valid_1's auc: 0.778139 valid_1's binary_logloss: 0.239818
[600] training's auc: 0.859436 training's binary_logloss: 0.211219 valid_1's auc: 0.77868 valid_1's binary_logloss: 0.23963
[700] training's auc: 0.869455 training's binary_logloss: 0.207164 valid_1's auc: 0.779084 valid_1's binary_logloss: 0.239493
[800] training's auc: 0.878692 training's binary_logloss: 0.203284 valid_1's auc: 0.779326 valid_1's binary_logloss: 0.239413
[900] training's auc: 0.887268 training's binary_logloss: 0.199575 valid_1's auc: 0.779256 valid_1's binary_logloss: 0.239444
Early stopping, best iteration is:
[849] training's auc: 0.883077 training's binary_logloss: 0.201436 valid_1's auc: 0.779364 valid_1's binary_logloss: 0.239404
preds = clf.predict_proba(apps_all_test.drop('SK_ID_CURR', axis=1))[:, 1 ]
apps_all_test['TARGET'] = preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('prev_baseline_tuning_01.csv', index=False)
차수 | Private Score | Public Score | 설명 |
---|---|---|---|
1차 | 0.74088 | 0.74448 | application 데이터 세트 기본 preprocessing |
2차 | 0.75458 | 0.75882 | application 데이터 세트 Feature Engineering |
3차 | 0.76396 | 0.77579 | previous 데이터 세트 Feature Engineering |
4차 | 0.76420 | 0.77583 | previous 데이터 세트 Feature Engineering 함수화 |
5차 | 0.76710 | 0.77630 | Bayesian Optimization을 이용한 하이퍼 파라미터 튜닝 |
Public Score는 test 셋의 20%, Private Score는 test 셋의 80%로 설정, 그래서 보다 정확하게 튜닝을 하려면 train 셋을 cross validation으로 하는 것이 맞다. 하지만 이것이 점수를 무조건 올릴 수 있다라고 할 순 없다.(오히려 떨어질 수 있음)
강의에서 6시간이 걸린다고 해서 일단은 돌리지 않고 코드만 가져옴.
강의에서 돌렸을 때 Private Score 는 0.76355, Public Score 는 0.77499 가 나와서 오히려 떨어짐.
bayesian_params = {
'max_depth': (6, 16),
'num_leaves': (24, 64),
'min_data_in_leaf': (10, 200), # min_child_samples
'min_child_weight':(1, 50),
'bagging_fraction':(0.5, 1.0), # subsample
'feature_fraction': (0.5, 1.0), # colsample_bytree
'max_bin':(10, 500),
'lambda_l2':(0.001, 10), # reg_lambda
'lambda_l1': (0.01, 50) # reg_alpha
}
import lightgbm as lgb
train_data = lgb.Dataset(data=ftr_app, label=target_app, free_raw_data=False)
def lgb_roc_eval_cv(max_depth, num_leaves, min_data_in_leaf, min_child_weight, bagging_fraction,
feature_fraction, max_bin, lambda_l2, lambda_l1):
params = {
"num_iterations":500, "learning_rate":0.02,
'early_stopping_rounds':100, 'metric':'auc',
'max_depth': int(round(max_depth)), # 호출 시 실수형 값이 들어오므로 실수형 하이퍼 파라미터는 정수형으로 변경
'num_leaves': int(round(num_leaves)),
'min_data_in_leaf': int(round(min_data_in_leaf)),
'min_child_weight': int(round(min_child_weight)),
'bagging_fraction': max(min(bagging_fraction, 1), 0),
'feature_fraction': max(min(feature_fraction, 1), 0),
'max_bin': max(int(round(max_bin)),10),
'lambda_l2': max(lambda_l2,0),
'lambda_l1': max(lambda_l1, 0)
}
# 파이썬 lightgbm의 cv 메소드를 사용.
# cross_val_score() 는 early_stopping_rounds 가 없다.
cv_result = lgb.cv(params, train_data, nfold=3, seed=0, verbose_eval =100, early_stopping_rounds=50, metrics=['auc'])
return max(cv_result['auc-mean'])
max_dict = lgbBO.res[np.argmax(np.array(target_list))]
print(max_dict)
def train_apps_all(apps_all_train):
ftr_app = apps_all_train.drop(['SK_ID_CURR', 'TARGET'], axis=1)
target_app = apps_all_train['TARGET']
train_x, valid_x, train_y, valid_y = train_test_split(ftr_app, target_app, test_size=0.3, random_state=2020)
print('train shape:', train_x.shape, 'valid shape:', valid_x.shape)
clf = LGBMClassifier(
nthread=4,
n_estimators=1000,
learning_rate=0.02,
max_depth = 10,
num_leaves=60,
colsample_bytree=0.511,
subsample=0.785,
max_bin=208,
reg_alpha=7.009,
reg_lambda=6.579,
min_child_weight=40,
min_child_samples=91,
silent=-1,
verbose=-1,
)
clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 100,
early_stopping_rounds= 100)
return clf
apps_all = get_apps_all_with_prev_agg(apps, prev)
apps_all = get_apps_all_encoded(apps_all)
apps_all_train, apps_all_test = get_apps_all_train_test(apps_all)
clf = train_apps_all(apps_all_train)
preds = clf.predict_proba(apps_all_test.drop('SK_ID_CURR', axis=1))[:, 1 ]
apps_all_test['TARGET'] = preds
apps_all_test[['SK_ID_CURR', 'TARGET']].to_csv('prev_baseline_tuning_02.csv', index=False)
캐글 Advanced 머신러닝 실전 박치기 / 인프런