CNN 맥스풀링 구현

IngCoding·2022년 6월 29일
1

머신러닝

목록 보기
33/34

내용출처 : 이수안컴퓨터연구소 Youtube

import cv2
import numpy as np 
import matplotlib.pyplot as plt
import urllib
import requests
from io import BytesIO

1. CNN 함수 정의

def url_to_image(url, gray=False): # url 이미지 불러오기
  resp = urllib.request.urlopen(url)
  image = np.asarray(bytearray(resp.read()), dtype="uint8")

  if gray == True:
    image = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE)
  else:
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

  return image
# 이미지 필터
def filtered_image(image, filter, output_size):
  filtered_img = np.zeros((output_size, output_size))
  filter_size = filter.shape[0]

  for i in range(output_size):
    for j in range(output_size):
      multiply_values = image[i:(i + filter_size), j:(j+filter_size)] * filter
      sum_value = np.sum(multiply_values)

      if (sum_value > 255):
        sum_value = 255

      filtered_img[i, j] = sum_value

  return filtered_img
# 이미지를 2차원 배열로 변환해주는 함수
def im2col(input_data, filter_h, filter_w, stride=1, pad=0): 

  N, C, H, W = input_data.shape
  out_h = (H + 2*pad - filter_h) // stride + 1
  out_w = (W + 2*pad - filter_w) // stride + 1

  img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
  col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

  for y in range(filter_h):
    y_max = y + stride * out_h
    for x in range(filter_w):
      x_max = x + stride * out_w
      col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

  col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1)
  return col 

# 2차원 배열을 이미지로 변환해주는 함수
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):

  N, C, H, W = input_shape
  out_h = (H + 2*pad - filter_h) // stride + 1
  out_w = (W + 2*pad - filter_w) // stride + 1
  col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

  img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride -1))
  for y in range(filter_h):
    y_max = y + stride * out_h
    for x in range(filter_w):
      x_man = x + stride * out_w
      img[:, :, y:y_max:stride, x:x_man:stride] = img[:, :, y, x, :, :]

  return img[:, :, pad:H + pad, pad:W + pad]
def init_weight(num_filters, data_dim, kernel_size, stride=1, pad=0, weight_std=0.01):

  weights = weight_std * np.random.randn(num_filters, data_dim, kernel_size, kernel_size)
  biases = np.zeros(num_filters)

  return weights, biases
class conv2D: # 합성곱층
  def __init__(self, W, b, stride=1, pad=0):
    self.W = W
    self.b = b
    self.stride = stride
    self.pad = pad

    self.input_data = None
    self.col = None
    self.col_w = None

    self.dW = None
    self.db = None

  def forward(self, input_data): # 순전파
    FN, C, FH, FW = self.W.shape
    N, C, H, W = input_data.shape
    out_h = (H + 2*self.pad - FH) // self.stride + 1
    out_w = (W + 2*self.pad - FW) // self.stride + 1

    col = im2col(input_data, FH, FW, self.stride, self.pad )
    col_W = self.W.reshape(FN, -1).T

    out = np.dot(col, col_W) + self.b
    output = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

    self.input_data = input_data
    self.col = col 
    self.col_W = col_W
    
    return output

  def backward(self, dout): # 역전파
    FN, C, FH, FW = self.W.shape
    dout = dout.transpose(0, 2, 3, 1).reshape()

    self.db = np.sum(dout, axis=0)
    self.dW - np.dot(self.col.T, dout)
    self.dW = self.dw.transpose(1, 0).reshape(FN, C, FH, FW)

    dcol = np.dot(dout, self.col_W.T)
    dx = col2im(dcol, self.input_data.shape, FH, FW, self.stride, self.pad)

    return dx

2. 풀링 층 (Pooling Layer)

class Pooling2D:
  def __init__(self, kernel_size=2, stride=1, pad=0):
    self.kernel_size = kernel_size
    self.stride = stride
    self.pad = pad

    self.input_data = None
    self.arg_max = None

  def forward(self, input_data): 
    N, C, H, W = input_data.shape   
    out_h = (H - self.kernel_size) // self.stride + 1
    out_w = (W - self.kernel_size) // self.stride + 1

    col = im2col(input_data, self.kernel_size, self.kernel_size, self.stride, self.pad)
    col = col.reshape(-1, self.kernel_size * self.kernel_size)

    arg_max = np.argmax(col, axis=1)
    out = np.max(col, axis=1)
    output = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

    self.input_data = input_data
    self.arg_max = arg_max
    return output

  def backward(self, dout): 
    dout = dout.transpose(0, 2, 3, 1)

    pool_size = self.kernel_size * self.kernel_size
    dmax = np.zeros((dout.size, pool_size))
    dmax[np.arrange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
    dmax = dmax.reshape(dout.shape + (pool_size,))

    dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2] -1)
    dx = col2im(dcol, self.input_data.shape, self.kernel_size, self.stride, self.pad)

    return dx

3. 풀링 층 테스트

  • 2차원 이미지
    • (Height, Width, 1)
img_url = "https://upload.wikimedia.org/wikipedia/ko/thumb/2/24/Lenna.png/440px-Lenna.png"
image_gray = url_to_image(img_url, gray=True)
image_gray = image_gray.reshape(image_gray.shape[0], -1, 1)
print("image.shape:", image_gray.shape)

image_gray = np.expand_dims(image_gray.transpose(2, 0, 1), axis=0)

plt.imshow(image_gray[0, 0, :, :], cmap='gray')
plt.show()
image.shape: (440, 440, 1)

W, b = init_weight(8, 1, 3)
conv = conv2D(W, b)
pool = Pooling2D(stride=2, kernel_size=2) # 풀링선언
output = conv.forward(image_gray)  
print("Conv size:", output.shape)
Conv size: (1, 8, 438, 438)
output = pool.forward(output) # 맥스풀링 결과 
print("Pooling Layer size", output.shape) # 반으로 줄어듦
Pooling Layer size (1, 8, 219, 219)
# 이미지 확인
plt.figure(figsize=(10,10))

plt.subplot(1,3,1)
plt.title("Feature Map 8")
plt.imshow(output[0, 7, :, :], cmap='gray')

plt.subplot(1, 3, 2)
plt.title("Feature Map 4")
plt.imshow(output[0, 3, :, :], cmap='gray')

plt.subplot(1, 3, 3)
plt.title("Feature Map 1")
plt.imshow(output[0, 0, :, :], cmap='gray')

plt.show()

4. 맥스풀링 결과 시각화

  • Color Image
  • conv --> maxpooling --> conv --> maxpooling
  • 시각화 과정
    • [2,5,9] 필터를 통해 확인
img_url = "https://upload.wikimedia.org/wikipedia/ko/thumb/2/24/Lenna.png/440px-Lenna.png"
image_color = url_to_image(img_url)
print("image.shape:", image_color.shape)

plt.imshow(image_color)
plt.show()

image_color = image_color.transpose(2, 0, 1)
print("image.shape:", image_color.shape)
image.shape: (440, 440, 3)

image.shape: (3, 440, 440)
batch_image_color = np.repeat(image_color[np.newaxis, :, :, :], 15, axis=0)
print(batch_image_color.shape)
(15, 3, 440, 440)
W, b = init_weight(10, 3, 3)
conv1 = conv2D(W, b) 
pool = Pooling2D(stride=2, kernel_size=2) 
output1 = conv1.forward(batch_image_color)
print(output1.shape)
(15, 10, 438, 438)
plt.figure(figsize=(10,10))

plt.subplot(1,3,1)
plt.title("Feature Map 2")
plt.imshow(output1[4, 1, :, :], cmap='gray')

plt.subplot(1, 3, 2)
plt.title("Feature Map 5")
plt.imshow(output1[4, 4, :, :], cmap='gray')

plt.subplot(1, 3, 3)
plt.title("Feature Map 9")
plt.imshow(output1[4, 8, :, :], cmap='gray')

plt.show()

output1 = pool.forward(output1)
print(output1.shape)
(15, 10, 219, 219)
W2, b2 = init_weight(30, 10, 3)
conv2 = conv2D(W2, b2) 
pool = Pooling2D(stride=2, kernel_size=2) 
output2 = conv2.forward(output1)
print(output2.shape)
(15, 30, 217, 217)
plt.figure(figsize=(10,10))

plt.subplot(1,3,1)
plt.title("Feature Map 2")
plt.imshow(output2[4, 1, :, :], cmap='gray')

plt.subplot(1, 3, 2)
plt.title("Feature Map 5")
plt.imshow(output2[4, 4, :, :], cmap='gray')

plt.subplot(1, 3, 3)
plt.title("Feature Map 9")
plt.imshow(output2[4, 8, :, :], cmap='gray')

plt.show()

output2 = pool.forward(output2)
print(output2.shape)
(15, 30, 108, 108)
plt.figure(figsize=(10,10))

plt.subplot(1,3,1)
plt.title("Feature Map 2")
plt.imshow(output2[4, 1, :, :], cmap='gray')

plt.subplot(1, 3, 2)
plt.title("Feature Map 5")
plt.imshow(output2[4, 4, :, :], cmap='gray')

plt.subplot(1, 3, 3)
plt.title("Feature Map 9")
plt.imshow(output2[4, 8, :, :], cmap='gray')

plt.show()

  • 풀링과정을 거치며 이미지가 더 축약되는 것을 볼 수 있다.
profile
Data & PM

0개의 댓글