# 기본 모델 적용
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
# Grid Search
param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100]}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(LogisticRegression(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search
from scipy.stats import randint
param_distribs={'C': randint(low=0.001, high=100)}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(LogisticRegression(),
param_distributions=param_distribs, n_iter=100, cv=5)
random_search.fit(X_scaled_train, y_train)
# 기본 모델 적용 (분류)
from sklearn.neighbors import KNeighborsClassifier
model=KNeighborsClassifier()
# 기본 모델 적용 (회귀)
from sklearn.neighbors import KNeighborsRegressor
model=KNeighborsRegressor()
# Grid Search
param_grid={'n_neighbors': [1, 3, 5, 7, 9, 11]}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search
param_distribs = {'n_neighbors': randint(low=1, high=20)}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(KNeighborsRegressor(),
param_distributions=param_distribs, n_iter=20, cv=5)
random_search.fit(X_scaled_train, y_train)
- 가우시안 분포 = 가우스 분포 , 즉 정규분포상에서 발생확률을 계산하기 때문에 붙여진 이름
- 특성치 중 연속형 자료일 경우 발생확률을 정규분포상에서의 확률 (likelihood : 우도)를 구해 계산
하이퍼 파라미터
var_smoothing
: (default = 1e-9, 안정적인 연산을 위해 분산에 더해지는 모든 특성치의 최대 분산 비율)
하이퍼 파라미터
alpha_1
: default = 1e-6, 감마분포의 alpha 파라미터 사전 설정lambda_1
: default = 1e-6, 감마분포의 lambda 파라미터 사전 설정
# 기본 모델 적용 (분류)
from sklearn.naive_bayes import GaussianNB
model=GaussianNB()
# 기본 모델 적용 (회귀)
from sklearn.linear_model import BayesianRidge
model=BayesianRidge()
model.fit(X_scaled_train, y_train)
# Grid Search (분류)
param_grid={'var_smoothing': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9 , 10]}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(GaussianNB(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search (분류)
from scipy.stats import randint
param_distribs = {'var_smoothing': randint(low=0, high=20)}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(GaussianNB(),
param_distributions=param_distribs, n_iter=100, cv=5)
random_search.fit(X_scaled_train, y_train)
# Grid Search (회귀)
param_grid={'alpha_1': [1e-06, 1e-05, 1e-04, 1e-03, 1e-02, 1e-01, 1, 2, 3, 4],
'lambda_1': [1e-06, 1e-05, 1e-04, 1e-03, 1e-02, 1e-01, 1, 2, 3, 4]}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(BayesianRidge(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search (회귀)
param_distribs = {'alpha_1': randint(low=1e-06, high=10),
'lambda_1': randint(low=1e-06, high=10)}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(BayesianRidge(),
param_distributions=param_distribs, n_iter=50, cv=5)
random_search.fit(X_scaled_train, y_train)
분류
from sklearn.naive_bayes import GaussianNB
회귀
from sklearn.linear_model import BayesianRidge
# 기본 모델 적용 (분류)
from sklearn.neural_network import MLPClassifier
model=MLPClassifier()
# 기본 모델 적용 (회귀)
from sklearn.neural_network import MLPRegressor
model=MLPRegressor()
# Grid Search (분류)
param_grid={'hidden_layer_sizes': [10, 30, 50, 100], 'solver': ['sgd', 'adam'],
'activation': ['tanh', 'relu']}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(MLPClassifier(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search (분류)
from scipy.stats import randint
param_distribs={'hidden_layer_sizes': randint(low=10, high=100), 'solver': ['sgd', 'adam'],
'activation': ['tanh', 'relu']}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(MLPClassifier(),
param_distributions=param_distribs, n_iter=10, cv=5)
random_search.fit(X_scaled_train, y_train)
분류 : SVC
- SVM Classifier
회귀 : SVR
- SVM Regressor
# 기본 모델 적용 (분류)
from sklearn.svm import SVC
model = SVC()
# 기본 모델 적용 (회귀)
from sklearn.svm import SVR
model = SVR(kenel = 'poly')
# Grid Search (분류)
param_grid = [
{'kernel' : ['rbf'],
'C': [0.001, 0.01, 0.1, 1, 10, 100],
'gamma' : [0.001, 0.01, 0.1, 1, 10, 100]},
{'kernel' : ['linear'],
'C': [0.001, 0.01, 0.1, 1, 10, 100],
'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
]
from sklearn.model_selection imort GridSearchCV
grid_search = GridSearchCV(SVC(), param_grid, cv = 5)
grid_search.fit(X_scaled_train, y_train)
# Random Search (분류)
from scipy.stats import randint
param_distribs = {'kernel' : ['rbf'],
'C' : randint(low = 0.001, high = 100),
'gamma' : randint(low = 0.001, high = 100)}
from sklearn.model_selection import RandomizedSearchCV
random_search = RandomizedSearchCV(SVC(), param_distributions = param_distribs, n_iter = 100, cv = 5)
random_search.fit(X_scaled_train, y_train)
# Grid Search (회귀)
param_grid = {'kernel' : ['poly'],
'C' : [0.01, 0.1, 1, 10],
'gamma' : [0.01, 0.1, 1, 10]}
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(SVR(kernel = 'poly', param_grid, cv = 5_
grid_search.fit(X_scaled_train, y_train)
# Random Search (회귀)
param_distribs = {'kernel' : ['poly'],
'C' : randint(low = 0.01, high = 10),
'gamma' : randint(low = 0.01, high = 10)}
from sklearn.model_selection import RandomizedSearchCV(SVR(kernel='poly'),
param_distributions = param_distribs, n_iter = 20 , cv = 5)
random_search.fit(X_scaled_train, y_train)
# 기본 모델 적용 (분류)
from sklearn.tree import DecisionTreeClassifier
model=DecisionTreeClassifier()
# 기본 모델 적용 (회귀)
from sklearn.tree import DecisionTreeRegressor
model=DecisionTreeRegressor()
# Grid Search
param_grid={'max_depth': range(2,20,2), 'min_samples_leaf': range(1,50,2)}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search
from scipy.stats import randint
param_distribs = {'max_depth': randint(low=1, high=20),
'min_samples_leaf': randint(low=1, high=50)}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(DecisionTreeClassifier(),
param_distributions=param_distribs, n_iter=20, cv=5)
random_search.fit(X_scaled_train, y_train)
# 기본 모델 적용 (분류)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
# 기본 모델 적용 (회귀)
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
# Grid Search (분류)
param_grid={'n_estimators': range(100, 1000, 100),
'max_features': ['auto', 'sqrt', 'log2']}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(RandomForestClassifier(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search (분류)
from scipy.stats import randint
param_distribs = {'n_estimators': randint(low=100, high=1000),
'max_features': ['auto', 'sqrt', 'log2']}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(RandomForestClassifier(),
param_distributions=param_distribs, n_iter=20, cv=5)
random_search.fit(X_scaled_train, y_train)
# Grid Search (회귀)
param_grid={'n_estimators': range(100, 500, 100),
'max_features': ['auto', 'sqrt', 'log2']}
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(RandomForestRegressor(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)
# Random Search (회귀)
param_distribs = {'n_estimators': randint(low=100, high=500),
'max_features': ['auto', 'sqrt', 'log2']}
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(RandomForestRegressor(),
param_distributions=param_distribs, n_iter=20, cv=5)
random_search.fit(X_scaled_train, y_train)