CHAPTER 3. 신경망
def step_function(x):
if x > 0:
return 1
else:
return 0
import numpy as np
def step_function(x):
y = x > 0
return y.astype(np.int) # 입력된 bool형을 int형으로 변환
import numpy as np
import matplotlib.pyplot as plt
def step_function(x):
return np.array(x > 0, dtype=np.int)
x = np.arange(-5.0, 5.0, 0.1)
y = step_function(x)
plt.plot(x, y)
plt.ylim(-0.1, 1.1)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
def sigmoid_function(x):
return 1 / (1 + np.exp(-x))
x = np.arange(-5.0, 5.0, 0.1)
y = sigmoid_function(x)
plt.plot(x, y)
plt.ylim(-0.1, 1.1)
plt.show()
def relu(x):
return np.maximum(0, x)
x = np.arange(-5.0, 5.0, 0.1)
y1 = relu(x)
plt.plot(x, y1)
plt.ylim(-1.1, 6.1)
plt.show()
import numpy as np
# 1차원 배열
a = np.array([1, 2, 3, 4])
print(a) # [1 2 3 4]
print(np.ndim(a)) # 1
print(a.shape) # (4,)
print(a.shape[0]) # 4
b = np.array([[1, 2], [3, 4], [5, 6]])
print(b) # [[1 2] [3 4] [5 6]]
print(np.ndim(b)) # 2
print(b.shape) # (3, 2)
print(b.shape[0]) # 3
import numpy as np
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
np.dot(a, b)
np.dot(): 입력이 1차원 배열이면 벡터를, 2차원 배열이면 행렬 곱을 계산
다차원 배열을 곱하려면 두 행렬의 대응하는 차원의 요소 수를 일치시켜야 함
import numpy as np
x = np.array([1, 2])
w = np.array([[1, 3, 5], [2, 4, 6]])
y = np.dot(x, w)
print(y) # [5 11 17]
import numpy as np
# 입력층에서 1층으로 신호 전달
X = np.array([1.0, 0.5])
W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
B1 = np.array([0.1, 0.2, 0.3])
print(W1.shape) # (2, 3)
print(X.shape) # (2,)
print(B1.shape) # (3,)
A1 = np.dot(X, W1) + B1
Z1 = sigmoid_function(A1)
print(A1) # [0.3, 0.7, 1.1]
print(Z1) # [0.57444252, 0.66818777, 0.75026011]
# 1층에서 2층으로 신호 전달
W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
B2 = np.array([0.1, 0.2])
A2 = np.dot(Z1, W2) + B2
Z2 = sigmoid_function(A2)
# 2층에서 출력층으로 신호 전달
# 출력층의 활성화 함수로, identity_function을 이용
def identity_function(x):
return x
W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
B3 = np.array([0.1, 0.2])
A3 = np.dot(Z2, W3) + B3
Y = identity_function(A3) # 혹은 Y = A3
# 가중치와 편향을 초기화하고 이를 딕셔너리 변수 network에 저장
# 딕셔너리 변수 network에는 각 층에서 필요한 매개변수(가중치와 편향)을 저장
def init_network():
network = {}
network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
network['b1'] = np.array([0.1, 0.2, 0.3])
network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
network['b2'] = np.array([0.1, 0.2])
network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
network['b3'] = np.array([0.1, 0.2])
return network
# 입력 신호를 출력으로 변환하는 처리 과정 구현
# 신호가 순방향(입력에서 출력 방향)으로 전달됨(순전파)
def forward(network, x):
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, W1) + b1
z1 = sigmoid_function(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid_function(a2)
a3 = np.dot(z2, W3) + b3
y = identity_function(a3)
return y
network = init_network()
x = np.array([1.0, 0.5])
y = forward(network, x)
print(y) # [ 0.31682708 0.69627909]
def softmax(a):
exp_a = np.exp(a)
sum_exp_a = np.sum(exp_a)
y = exp_a / sum_exp_a
return y
소프트맥스 함수는 지수 함수를 사용하는데, 이로 인해서 오버플로 문제가 발생해 수치가 불안정해질수 있는 문제점이 있음
오버플로 문제를 해결하기위해 소프트맥스 함수 개선
a = np.array([1010, 1000, 990])
#print(np.exp(a) / np.sum(np.exp(a)))
c = np.max(a) # c = 1010 (최댓값)
print(a-c) # a-c = array([0, -10, -20])
print(np.exp(a-c) / np.sum(np.exp(a-c)))
def softmax(a):
c = np.max(a)
exp_a = np.exp(a - c)
sum_exp_a = np.sum(exp_a)
y = exp_a / sum_exp_a
return y
import sys, os
sys.path.append(os.pardir)
from dataset.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
print(x_train.shape) # (60000, 784)
print(t_train.shape) # (60000,)
print(x_test.shape) # (10000, 784)
print(t_test.shape) # (10000,)
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from PIL import image
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
img = x_train[0]
label = t_train[0]
print(label)
print(img.shape)
img = img.reshape(28, 28)
print(img.shape)
img_show(img)
import numpy as np
import pickle
from dataset.mnist import load_mnist
from common.functions import sigmoid, softmax
def get_data():
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
return x_test, t_test
def init_network():
with open("sample_weight.pkl", 'rb') as f:
network = pickle.load(f)
return network
def predict(network, x):
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, W3) + b3
y = softmax(a3)
return y
x, t = get_data()
network = init_network()
accuracy_cnt = 0
for i in range(len(x)):
y = predict(network, x[i])
p = np.argmax(y)
if p == t[i]:
accuracy_cnt += 1
print("Accuracy: "+str(float(accuracy_cnt) / len(x)))
x, t = get_data()
network = init_network()
batch_size = 100
accuracy_cnt = 0
for i in range(0, len(x), batch_size):
x_batch = x[i:i+batch_size]
y_batch = predict(network, x_batch)
p = np.argmax(y_batch, axis=1)
accuracy_cnt += np.sum(p == t[i:i+batch_size])
print("Accuracy: " + str(float(accuracy_cnt) / len(x)))