Deeplearing -chap 9 - 1

심준보·2023년 6월 12일

컴퓨터 비전을 위한 고급 딥러닝

import os

input_dir = "images/"
target_dir = "annotations/trimaps/"

input_img_paths = sorted(
    [os.path.join(input_dir, fname)
     for fname in os.listdir(input_dir)
     if fname.endswith(".jpg")])
target_paths = sorted(
    [os.path.join(target_dir, fname)
     for fname in os.listdir(target_dir)
     if fname.endswith(".png") and not fname.startswith(".")])

sorted -> 알파벳 순서 정렬

os.path.join(input_dir,fname) -> 두 개의 경로를 연결하여 새로운 경로를 생성하는 함수

if fname.endswith(".jpg") -> 파일 이름이 'jpg'로 끝나는지 확인

import matplotlib.pyplot as plt
from tensorflow.keras.utils import load_img, img_to_array  

plt.axis("off")  
plt.imshow(load_img(input_img_paths[9]))

import matplotlib.pyplot as plt -> 시각화 라이브러리 불러오기

plt.axis("off") -> 축 표시 안하기

plt.imshow(load_img(input_img_path[9]) -> 10 번쨰 이미지 시각화하기

def display_target(target_array):
    normalized_array = (target_array.astype("uint8") - 1) * 127 
    plt.axis("off")
    plt.imshow(normalized_array[:, :, 0])

img = img_to_array(load_img(target_paths[9], color_mode="grayscale")) 
display_target(img)

plt.imshow(normalized_array[:,:,0]) -> 첫 번쨰 채널을 시각화한다.

img = img_to_array(load_img(target_paths[9], color_mode="grayscale"))

-> 10번쨰 이미지를 로드하고 , 그레이 스케일로 변환하여 img변수에 배열 형태로 저장

import numpy as np
import random

img_size = (200, 200)   
num_imgs = len(input_img_paths)

random.Random(1337).shuffle(input_img_paths)  
random.Random(1337).shuffle(target_paths)  
def path_to_input_image(path):
    return img_to_array(load_img(path, target_size=img_size))

def path_to_target(path):
    img = img_to_array(
        load_img(path, target_size=img_size, color_mode="grayscale"))
    img = img.astype("uint8") - 1
    return img

import random -> "random" 모듈을 가져온다. 무작위 섞는 기능 제공

random.Random(1337).shuffle(input_img_paths) -> input_img_paths 리스트를 섞기 위해 'random.Random(1337)'객체를 생성한 후 'shuffle'함수를 호출

def path_to_input_image(path):
return img_to_array(load_img(path, target_size=img_size))

-> target_img 사이즈를 img_size로 조정한 후 img_to_array 를 이용해서 배열형태로 변환

def path_to_target(path):
   img = img_to_array(
       load_img(path, target_size=img_size, color_mode="grayscale"))
   img = img.astype("uint8") - 1
   return img

input_imgs = np.zeros((num_imgs,) + img_size + (3,), dtype="float32")
targets = np.zeros((num_imgs,) + img_size + (1,), dtype="uint8")

-> (3,) : 3개의 채널로 이루어져 있다.


num_val_samples = 1000
train_input_imgs = input_imgs[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_input_imgs = input_imgs[-num_val_samples:]
val_targets = targets[-num_val_samples:]

train_input_imgs = input_imgs[:-num_val_samples]
-> input_imgs에서 뒤에서 1000까지 제외하고 처음부터 할당

val_input_imgs = input_imgs[-num_val_samples:]
-> input_imgs에서 뒤에서부터 1000까지 할당

def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))  # 채널 수를 3으로 해주기 위해서 -> (3,) 으로 한다
    x = layers.Rescaling(1./255)(inputs)

    x = layers.Conv2D(64, 3, strides=2, activation="relu", padding="same")(x) 
    # padding 해도 크기 줄어드나? -> 입력 출력 크기 동일해진다 # 64:filter , kernelsize :3 ,  strides:2(이미지가 반으로 줄어든다)
    x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)  
    x = layers.Conv2D(128, 3, strides=2, activation="relu", padding="same")(x)
    x = layers.Conv2D(128, 3, activation="relu", padding="same")(x)
    x = layers.Conv2D(256, 3, strides=2, padding="same", activation="relu")(x)
    x = layers.Conv2D(256, 3, activation="relu", padding="same")(x)

    x = layers.Conv2DTranspose(256, 3, activation="relu", padding="same")(x)
    x = layers.Conv2DTranspose(256, 3, activation="relu", padding="same", strides=2)(x)
    x = layers.Conv2DTranspose(128, 3, activation="relu", padding="same")(x)
    x = layers.Conv2DTranspose(128, 3, activation="relu", padding="same", strides=2)(x)
    x = layers.Conv2DTranspose(64, 3, activation="relu", padding="same")(x)
    x = layers.Conv2DTranspose(64, 3, activation="relu", padding="same", strides=2)(x)

    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)     #num_classes : filter , 3:kernel_size  . num_classes에 대한 확률이다.

    model = keras.Model(inputs, outputs)
    return model

inputs = keras.Input(shape=img_size + (3,))
-> img_size와 채널 수 3을 가지는 입력 텐서를 생성합니다. 이는 모델의 입력 이미지 크기와 채널 수를 나타냅니다.

x = layers.Rescaling(1./255)(inputs)

-> 입력 값을 0과 1 사이로 정규화하기 위해 Rescaling 레이어를 사용합니다. 입력 이미지의 픽셀 값 범위가 0-255이므로, 이를 0-1 범위로 조정합니다.

x = layers.Conv2D(64, 3, strides=2, activation="relu", padding="same")(x)

-> 2D 합성곱(Convolution) 레이어를 사용하여 입력에 64개의 필터를 적용합니다. 필터 크기는 3x3이며, 스트라이드(stride)는 2로 설정되어 입력의 크기를 절반으로 줄입니다. 활성화 함수로 ReLU를 사용하고, 패딩(padding)은 "same"으로 설정하여 입력과 출력 크기가 동일하도록 합니다.

-> 이어지는 코드에서는 일련의 Conv2D 및 Conv2DTranspose 레이어를 사용하여 U-Net 아키텍처를 구성합니다. U-Net은 인코더-디코더 구조로 이루어져 있으며, 인코더는 입력 이미지를 점차 다운샘플링하여 특성 맵을 추출하는 역할을 수행하고, 디코더는 추출된 특성 맵을 업샘플링하여 입력 이미지와 동일한 크기의 세그멘테이션 맵을 생성합니다.

outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

->입력에 num_classes 개수의 필터를 적용하는 2D 합성곱 레이어를 생성합니다. 필터 크기는 3x3이며, 활성화 함수로 소프트맥스(softmax) 함수를 사용합니다.

model = keras.Model(inputs, outputs)

->입력과 출력을 지정하여 케라스 모델을 생성합니다.

Compile and fit

model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy")

callbacks = [
    keras.callbacks.ModelCheckpoint("oxford_segmentation.keras",
                                    save_best_only=True)
]

history = model.fit(train_input_imgs, train_targets,
                    epochs=50,
                    callbacks=callbacks,
                    batch_size=64,
                    validation_data=(val_input_imgs, val_targets))

"sparse_categorical_crossentropy"

-> 원-핫 인코딩된 레이블이 아닌 정수 형태의 레이블을 처리하는데 유용

callbacks

callbacks = [
    keras.callbacks.ModelCheckpoint("oxford_segmentation.keras",
                                    save_best_only=True)
]

ModelCheckpoint -> 모델의 체크포인트(checkpoint)를 저장하는 역할

"oxford_segmentation.keras"
-> 체크포인트 파일의 경로와 이름을 지정합니다. 여기서는 "oxford_segmentation.keras"라는 파일로 체크포인트를 저장할 것입니다.

save_best_only=True
-> 이 인자를 True로 설정하면, 모델의 성능이 이전보다 개선되었을 때에만 체크포인트를 저장합니다. 이를 통해 훈련 중 최상의 모델을 유지할 수 있습니다.

epoch 및 loss값 지정 후 그래프 생성

epochs = range(1, len(history.history["loss"]) + 1)
loss = history.history["loss"]
val_loss = history.history["val_loss"]
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()

epochs = range(1, len(history.history["loss"]) + 1)

-> x축에 표시될 에포크(epoch) 값들을 생성합니다. len(history.history["loss"])는 훈련 과정에서 발생한 손실 값들의 개수를 나타냅니다. 이를 range 함수에 적용하여 1부터 해당 개수까지의 정수를 생성

loss = history.history["loss"]

->훈련 과정에서 기록된 손실 값들을 가져온다

plt.plot(epochs, loss, "bo", label="Training loss")

-> 훈련 데이터에 대한 손실 값을 그래프로 표현합니다. x축은 에포크 값들이고, y축은 손실 값들입니다. "bo"는 파란색 점 형태로 표시됨을 의미합니다. 레이블은 "Training loss"로 지정됩니다.

plt.legend()

-> 그래프의 범례를 추가한다 , 그래프의 선이나 점의 의미를 설명하는 것

mask = model.predict(np.expand_dims(test_image, 0))[0]

-> np.expand_dims(test_image, 0)는 선택한 이미지에 차원을 추가하여 배치 차원을 만듭니다.