학습 순서 및 방법
1.1 데이터 분리
X = df.drop('타깃데이터', axis=1)
y = df['타깃데이터']
1.2 데이터 분리
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val =
train_test_split(X, y, test_size=0.2, random_state=42)
2. 모델학습
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_tr, y_tr)
3. 모델 검증 및 성능 평가
y_pred = model.predict(X_val)
print(y_pred)
r2 = r2_score(y_val, y_pred)
r2
rmse = np.sqrt(mean_squared_error(y_val, y_pred))
rmse
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_val, y_pred)
mae
4. 하이퍼파라미터 최적화(그리드서치)
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
model = Ridge()
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_tr, y_tr)
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(best_model)
print(best_params)
5. 최적값으로 다시 모델 훈련
from sklearn.linear_model import Ridge
ridge_model = Ridge(alpha=0.001)
ridge_model.fit(X_tr, y_tr)
6. 다시 성능평가
from sklearn.metrics import mean_squared_error
y_pred = ridge_model.predict(X_val)
r2 = r2_score(y_val, y_pred)
r2
rmse = np.sqrt(mean_squared_error(y_val, y_pred))
rmse
mse = mean_squared_error(y_val, y_pred)
mse
7. 시각화
plt.scatter(y_val, y_pred)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
ridge_coefficients = ridge_model.coef_
feature_names = X_train.columns
sorted_coefficients = sorted(zip(ridge_coefficients,
feature_names), key=lambda x: abs(x[0]), reverse=True)
coefficients, names = zip(*sorted_coefficients)
plt.figure(figsize=(10, 6))
plt.barh(range(len(names)), coefficients, align='center')
plt.yticks(range(len(names)), names)
plt.gca().invert_yaxis()