!pip install optuna
!pip install catboost
library 설명
Optuna
Cat Boost Regressor
train and val split
Evaluation Score 어떠한 평가 지표를 사용하냐에 따라 바꿔준다.
# Optuna Libraries
import optuna
from optuna import Trial
from optuna.samplers import TPESampler
# LGBM Regressor
from lightgbm import LGBMRegressor
# train_test_split
from sklearn.model_selection import train_test_split
# Evaluation Score
from sklearn.metrics import mean_squared_error
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
현재의 Optuna 는 MSE를 기준으로 hyper parameter tunning을 진행하고 있다.
n_trials을 통해 Optuna를 몇번 실행할 것인지 결정한다.
Cat Boost의 경우 시간이 오래걸린다.
Colab 환경에서 진행시 50 이하가 괜찮은 듯 하다.
# random sampler
sampler = TPESampler(seed=10)
# define function
def objective(trial):
cbrm_param = {
'iterations':trial.suggest_int("iterations", 4000, 25000),
'od_wait':trial.suggest_int('od_wait', 500, 2300),
'learning_rate' : trial.suggest_uniform('learning_rate',0.01, 1),
'reg_lambda': trial.suggest_uniform('reg_lambda',1e-5,100),
'subsample': trial.suggest_uniform('subsample',0,1),
'random_strength': trial.suggest_uniform('random_strength',10,50),
'depth': trial.suggest_int('depth',1, 15),
'min_data_in_leaf': trial.suggest_int('min_data_in_leaf',1,30),
'leaf_estimation_iterations': trial.suggest_int('leaf_estimation_iterations',1,15),
'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
'colsample_bylevel':trial.suggest_float('colsample_bylevel', 0.4, 1.0),
}
# Generate model
model_cbrm = CatBoostRegressor(**cbrm_param)
model_cbrm = model_cbrm.fit(X_train, y_train, eval_set=[(X_val, y_val)],
verbose=0, early_stopping_rounds=25)
# 평가지표 원하는 평가 지표가 있을 시 바꾸어 준다.
MSE = mean_squared_error(y_val, model_cbrm.predict(X_val))
return MSE
optuna_cbrm = optuna.create_study(direction='minimize', sampler=sampler)
optuna_cbrm.optimize(objective, n_trials=50)
cbrm_trial = optuna_cbrm.best_trial
cbrm_trial_params = cbrm_trial.params
print('Best Trial: score {},\nparams {}'.format(cbrm_trial.value, cbrm_trial_params))
Best Trial: score 30.2018855074767,
params {'iterations': 7576, 'od_wait': 1275, 'learning_rate': 0.6572492909312421, 'reg_lambda': 89.78926591011864, 'subsample': 0.4477959779988517, 'random_strength': 31.294050321894346, 'depth': 5, 'min_data_in_leaf': 21, 'leaf_estimation_iterations': 10, 'bagging_temperature': 78.05871620966963, 'colsample_bylevel': 0.8827408396007562}
cbrm_trial_params
{'bagging_temperature': 78.05871620966963,
'colsample_bylevel': 0.8827408396007562,
'depth': 5,
'iterations': 7576,
'leaf_estimation_iterations': 10,
'learning_rate': 0.6572492909312421,
'min_data_in_leaf': 21,
'od_wait': 1275,
'random_strength': 31.294050321894346,
'reg_lambda': 89.78926591011864,
'subsample': 0.4477959779988517}
# Modeling fit
cbrm = CatBoostRegressor(**cbrm_trial_params)
cbrm_study = cbrm.fit(X_train, y_train)
# Predict the y_test
submission['y_test'] = cbrm_study.predict(X_test)