import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
breast = load_breast_cancer()
feature = breast.data
label = breast.target
- 3. 학습, 테스트 데이터 분리
stratify 속성 : label의 unique value 데이터의 분포를 일정하게 나눌 수 있게 도와주는 속성
x_train, x_test, y_train, y_test = train_test_split(feature, label, test_size=0.25, stratify=label, random_state=1)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
model = KNeighborsClassifier(n_neighbors=5)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print('예측값 : ', y_pred[:10])
print('실제값 : ', y_test[:10])
acc = accuracy_score(y_test, y_pred)
print('모델 정확도 : ', acc)