Pytorch_basic

InSung-Na·2023년 5월 5일

Part 10. Deep Learning

목록 보기

7/7

해당 글은 제로베이스데이터스쿨 학습자료를 참고하여 작성되었습니다

1. Pytorch_basic

import torch
x = torch.tensor(3.5)

기울기 계산

x = torch.tensor(3.5, requires_grad=True)

print(x)
y = (x-1) * (x-2) * (x-3)

print(y)
y.backward()    # 미분계산
x.grad          # x의 기울기
---------------------------------------------------
print(x) : tensor(3.5000, requires_grad=True)
print(y) : tensor(1.8750, grad_fn=<MulBackward0>)
x.grad : tensor(5.7500)

연쇄법칙(Chain Rule)

a = torch.tensor(2., requires_grad=True)
b = torch.tensor(1., requires_grad=True)

x = 2*a + 3*b
y = 5*a*a + 3*b**3
z = 2*x + 3*y
z.backward()    # 미분실행
a.grad          # a의 미분값
-----------------------------------
tensor(64.)

2. 보스턴 집값 예측

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

데이터 로드

from sklearn.datasets import fetch_openml
import pandas as pd

X, y = fetch_openml('boston', return_X_y=True, parser='auto', version=1)
df = X
df['TARGET'] = y
df.tail()

칼럼 선정

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
cols = ["TARGET", "INDUS", "RM", "LSTAT", "NOX", "DIS"]
data = torch.tensor(df[cols].values).float()
data.shape
-----------------------------------------------------------
torch.Size([506, 6])

특성과 라벨로 분리

x = data[:, 1:]
y = data[:, :1]

print(x.shape, y.shape)
--------------------------------
torch.Size([506, 5]) torch.Size([506, 1])

하이퍼파라미터

n_epochs = 2000
learning_rate = 1e-3
print_interval = 100

모델 학습

model = nn.Linear(x.size(-1), y.size(-1))

# SGD(stochastic gradient descent, 확률적 경사하강법)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for i in range(n_epochs):
    y_hat = model(x)
    loss = F.mse_loss(y_hat, y)
    
    optimizer.zero_grad()   # optimizer 초기화
    loss.backward()     # 미분
    
    optimizer.step()    # 파라미터 업데이트
    
    if (i+1)%print_interval==0:
        print("Epoch %d: loss=%.4e" %(i+1, loss))
-----------------------------------------------------
Epoch 100: loss=4.4202e+01
Epoch 200: loss=3.7470e+01
...
Epoch 1900: loss=2.8987e+01
Epoch 2000: loss=2.8986e+01

모델 학습결과

df = pd.DataFrame(torch.cat([y, y_hat], dim=1).detach_().numpy(), columns=["y", "y_hat"])
sns.pairplot(df, height=5)
plt.show()

3. 유방암 예측

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

# print(cancer.DESCR)

데이터 정리

df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
df['class'] = cancer.target
df.tail()

칼럼 선정

cols = ['mean radius', 'mean texture',
        'mean smoothness', 'mean compactness', 'mean concave points',
        'worst radius', 'worst texture',
         'worst smoothness', 'worst compactness', 'worst concave points',
         'class']

for c in cols[:-1]:
    sns.histplot(df, x=c, hue=cols[-1], bins=50, stat="probability")
    plt.show()	# 이미지 다수 생략

데이터 분리

data = torch.from_numpy(df[cols].values).float()

x = data[:, :-1]
y = data[:, -1:]

print(x.shape, y.shape)
-----------------------------------------------------
torch.Size([569, 10]) torch.Size([569, 1])

하이퍼파라미터

n_epochs = 200000
learning_rate = 1e-2
print_interval = 10000

모델링

class MyModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MyModel, self).__init__()
        self.input_dim, self.output_dim = input_dim, output_dim
        self.linear = nn.Linear(input_dim, output_dim)
        self.act = nn.Sigmoid()
        
    def forward(self, x):
        y = self.act(self.linear(x))
        
        return y

model = MyModel(input_dim=x.size(-1),
                output_dim=y.size(-1))
crit = nn.BCELoss() # Binary Cross Entropy

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

모델 학습

for i in range(n_epochs):
    y_hat = model(x)
    loss = crit(y_hat, y)
    
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()
    
    if (i+1)%print_interval==0:
        print(f"Epoch {i+1}: loss={loss.item():.4f}")
---------------------------------------------------------
Epoch 10000: loss=0.2796
Epoch 20000: loss=0.2299
...
Epoch 190000: loss=0.1167
Epoch 200000: loss=0.1156

모델 학습결과

correct_cnt = (y == (y_hat > .5)).sum()
total_cnt = float(y.size(0))

print("Accuracy: %.4f" %(correct_cnt/total_cnt))
---------------------------------------------------
Accuracy: 0.9649

df = pd.DataFrame(torch.cat([y, y_hat], dim=1).detach().numpy(), columns=["y", "y_hat"])
sns.histplot(df, x="y_hat", hue="y", bins=50, stat="probability")
plt.show()

4. MNIST

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import matplotlib.pyplot as plt
%matplotlib inline

Set Cuda

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Current device is", device)
----------------------------------------------------------------------
Current device is cpu

Datasets Load

import os
# os.listdir("../data")
train_data = datasets.MNIST(root="../data",   # data save path
                            train=True,       # train data
                            download=True,    # download on
                            transform=transforms.ToTensor())

test_data = datasets.MNIST(root="../data",    # data save path
                            train=False,      # test data
                            transform=transforms.ToTensor())

print("number of training data: ", len(train_data))
print("number of test data: ", len(test_data))
-----------------------------------------------------------------
number of training data:  60000
number of test data:  10000

Data Check

image, label = train_data[0]
image.shape, image.squeeze().shape
# 첫번째 차원이 channel
------------------------------------------------
(torch.Size([1, 28, 28]), torch.Size([28, 28]))

plt.imshow(image.squeeze().numpy(), cmap="gray")
plt.title("label : %s" %label)
plt.show()

Mini batch configure

batch_size = 50
learning_rate = 0.0001
epoch_num = 15

train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset = test_data,
                                           batch_size=batch_size,
                                           shuffle=True)

first_batch = train_loader.__iter__().__next__()
print("{:15s} | {:<25s} | {}".format("name", "type", "size"))
print("{:15s} | {:<25s} | {}".format("Num of Batch", "", len(train_loader)))
print("{:15s} | {:<25s} | {}".format("first_batch", str(type(first_batch)), len(first_batch)))
print("{:15s} | {:<25s} | {}".format("first_batch[0]", str(type(first_batch[0])), first_batch[0].shape))
print("{:15s} | {:<25s} | {}".format("first_batch[1]", str(type(first_batch[1])), first_batch[1].shape))
-----------------------------------------------------------------------------------------------------------
name            | type                      | size
Num of Batch    |                           | 1200
first_batch     | <class 'list'>            | 2
first_batch[0]  | <class 'torch.Tensor'>    | torch.Size([50, 1, 28, 28])
first_batch[1]  | <class 'torch.Tensor'>    | torch.Size([50])

Modeling

nn.Linear(3136, 1000)으로 설정되어 있다
(28,28) -> MaxPooling2d 2번 -> (7,7) 여기에 channel_cnt를 곱함
하지만 연결계층의 입력크기는 일반적으로 특징을 잘 포착할 수 있을 만큼 크게 선택됨
입력크기 너무 크면 과적합, 작으면 정보를 모두 포착하지 못해서 올바른 학습불가
따라서 시행착오를 통해서 최적의 입력크기를 찾아야한다

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1, padding="same")
        self.conv2 = nn.Conv2d(32, 64, 3, 1, padding="same")
        self.dropout = nn.Dropout2d(0.25)

        self.fc1 = nn.Linear(3136, 1000)
        self.fc2 = nn.Linear(1000, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)
        
        x = self.dropout(x)
        x = torch.flatten(x,1)
        
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        
        return output

model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

Model Learning

from time import time

model.train()
i = 1
for epoch in range(epoch_num):
    start_time_each_epoch = time()
    for data, target in train_loader:
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if i%1000==0:
            print("Time: %.3f\tTrain Step: %d\tLoss: %.3f\t" %(time() - start_time_each_epoch, i, loss.item()))
        i+=1
---------------------------------------------------------------------------
Time: 81.436	Train Step: 1000	Loss: 0.191	
Time: 69.932	Train Step: 2000	Loss: 0.033	
Time: 53.119	Train Step: 3000	Loss: 0.014	
Time: 52.150	Train Step: 4000	Loss: 0.132	
Time: 22.078	Train Step: 5000	Loss: 0.192	
Time: 136.500	Train Step: 6000	Loss: 0.178	
Time: 114.181	Train Step: 7000	Loss: 0.048	
Time: 90.783	Train Step: 8000	Loss: 0.018	
Time: 68.979	Train Step: 9000	Loss: 0.020	
Time: 43.789	Train Step: 10000	Loss: 0.005	
Time: 20.056	Train Step: 11000	Loss: 0.055	
Time: 97.013	Train Step: 12000	Loss: 0.000	
Time: 78.883	Train Step: 13000	Loss: 0.027	
Time: 62.891	Train Step: 14000	Loss: 0.005	
Time: 47.510	Train Step: 15000	Loss: 0.003	
Time: 31.382	Train Step: 16000	Loss: 0.000	
Time: 15.918	Train Step: 17000	Loss: 0.001	
Time: 95.073	Train Step: 18000	Loss: 0.018

Model Eval

model.eval()
correct = 0

for data, target in test_loader:
    data = data.to(device)
    target = target.to(device)
    output = model(data)
    prediction = output.data.max(1)[1]
    correct += prediction.eq(target.data).sum()
    
print("Test set: Accuracy: %.2f" %(100.*correct / len(test_loader.dataset)))
------------------------------------------------------------------------------
Test set: Accuracy: 99.08

InSung-Na

이전 포스트

Pytorch_basic

Part 10. Deep Learning

1. Pytorch_basic

기울기 계산

연쇄법칙(Chain Rule)

2. 보스턴 집값 예측

데이터 로드

칼럼 선정

특성과 라벨로 분리

하이퍼파라미터

모델 학습

모델 학습결과

3. 유방암 예측

데이터 정리

칼럼 선정

데이터 분리

하이퍼파라미터

모델링

모델 학습

모델 학습결과

4. MNIST

Set Cuda

Datasets Load

Data Check

Mini batch configure

Modeling

Model Learning

Model Eval

OX분류하기

0개의 댓글