Sklearn.metrics는 손실, 점수, 회기 성능 평가 함수 등 다양한 함수를 가지고 있다. 예를 들면, mean_squared_error, explacined_evariance_score, r2_score 같은 것이 존재하고 있다.
from sklearn.metrics import explained_variance_score
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
explained_variance_score(y_true, y_pred)
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.utils import shuffle
data = pd.read_csv("https://raw.githubusercontent.com/amankharwal/Website-data/master/student-mat.csv")
data = data[["G1", "G2", "G3", "studytime", "failures", "absences"]]
predict = "G3"
x = np.array(data.drop([predict], 1))
y = np.array(data[predict])
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2)
linear_regression = LinearRegression()
linear_regression.fit(xtrain, ytrain)
predictions = linear_regression.predict(xtest)
from sklearn.model_selection import cross_val_score
print(cross_val_score(linear_regression, x, y, cv=10, scoring="explained_variance").mean())
vie
from sklearn.metrics import max_error
y_true = [3, 2, 7, 1]
y_pred = [9, 2, 7, 1]
max_error(y_true, y_pred)
출력
6
from sklearn.metrics import mean_absolute_error
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
mean_absolute_error(y_true, y_pred)
import numpy as np
def mae(y_true, y_pred):
return np.mean(abs(y_true - y_pred), axis=0)
from sklearn.metrics import mean_squared_error
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
mean_squared_error(y_true, y_pred)
import numpy as np
def mse(y_true, y_pred):
return np.square(np.subtract(y_true, y_pred)).mean()
from sklearn.metrics import mean_squared_error
mean_squared_error(y_actual, y_predicted, squared=False)
import numpy as np
def rmse(y_true, y_pred):
return np.sqrt(((y_true - y_pred) ** 2).mean())
from sklearn.metrics import mean_squared_log_error
y_true = [3, 5, 2.5, 7]
y_pred = [2.5, 5, 4, 8]
mean_squared_log_error(y_true, y_pred)
def rmsle(y_true, y_pred):
sum = 0.0
for x in range(len(y_pred)):
if y_pred[x] < 0 or y_true[x] < 0:
continue
p = np.log(y_pred[x] + 1)
r = np.log(y_true[x] + 1)
sum = sum + (p-r)**2
return (sum/len(y_pred))**0.5
from sklearn.metrics import r2_score
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
r2_score(y_true, y_pred)
def r2_score(x_values, y_values):
correlation_matrix = np.corrcoef(x_values, y_values)
correlation_xy = correlation_matrix[0, 1]
return correlation_xy**2
import numpy as np
# 다항함수를 사용하는 경우
def poly(x, y, degree):
results = {}
coeffs = np.polyfit(x, y, degree)
# 다항 함수 계수
results['polynomial'] = coeffs.tolist()
# r-squared
p = np.poly1d(coeffs)
yhat = p(x)
ybar = np.sum(y)/len(y)
ssreg = np.sum((yhat-ybar)**2)
sstot = np.numpy.sum((y-ybar)**2)
results['determination'] = ssreg / sstot
return results
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
최종적으로 정리를 하다보니 앞서 작성한 분류 모델 평가와 회귀모델 평가 지표를 더 자세히 알 수 있었던 것 같다.