모듈 불러오기
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
seaborn에서 제공하는 데이터셋 확인
sns.get_dataset_names()
A= - sns.load_dataset('data')
컬럼 값 분포 확인 : value_counts() / 차트 그리기
a['data'].value_counts()
a['data'].value_counts().plot(kind='bar')
a['data'].plot(kind='box')
sns.boxplot(data=a, x='age', y='sex')
sns.boxplot(data='a', x='class', y='age', hue='s')
원핫 인코딩
분포 확인 : df[' '].value_counts()
pd.get_dummies(data=' ',columns=[' ']).head()
X,Y나누기 / Train, Test 데이터셋 나누기
X = test.drop('s', axis=1)
y = titanic['s']
from sklern.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=1)
x_train.shape, x_test.shape, y_train.shape, y_test.shape
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,stratify=y, random_state=42)
라이브러리 불러오기
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
Decision Tree
from sklearn.tree import DecisionTreeClassifier
result = DecisionTreeClassifier()
result.fit(x_train, y_train)
result.score(x_test, y_test)
Random Forest
from sklearn.ensemble import RandomForestClassifier
result = RandomForestClassifier()
result.fit(x_train, y_train)
result.score(x_test, y_test)
result_pred = result.predict([x_test[0]])
result_pred
딥러닝 모델링
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(10,activation = 'relu', input_shape=(6, )))
model.add(Dense(2, activation = 'softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=10, batch_size=8)
history = model.fir(x_train, y_train, epochs=30, batch_size=8)
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(6,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=30, batch_size=8)
- 학습이 잘 평가되는지 valid data로 평가를 해보자
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(6,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=30, batch_size=8, validation_data=(x_test, y_test))