Deeplearning -chap 9-3

심준보·2023년 6월 12일

중간 활성화 시각화

from tensorflow import keras
import numpy as np

img_path = keras.utils.get_file(
    fname="cat.jpg",
    origin="https://img-datasets.s3.amazonaws.com/cat.jpg")

def get_img_array(img_path, target_size):
    img = keras.utils.load_img(
        img_path, target_size=target_size)
    array = keras.utils.img_to_array(img)
    array = np.expand_dims(array, axis=0)
    return array

img_tensor = get_img_array(img_path, target_size=(180, 180))

img_path = keras.utils.get_file(fname="cat.jpg", origin="https://img-datasets.s3.amazonaws.com/cat.jpg")
-> get_file 함수를 사용하여 주어진 URL에서 이미지 파일을 다운로드합니다. 다운로드된 이미지 파일의 경로를 img_path 변수에 저장
def get_img_array(img_path, target_size)
->get_img_array 함수를 정의합니다. 이 함수는 이미지 파일의 경로와 목표 크기(target_size)를 입력으로 받습니다.
img = keras.utils.load_img(img_path, target_size=target_size)
-> load_img 함수를 사용하여 이미지 파일을 로드합니다. target_size를 지정하여 이미지를 해당 크기로 조정합니다. 로드된 이미지는 img 변수에 저장
array = np.expand_dims(array, axis=0):
-> 이 경우, axis=0을 지정하여 배열의 첫 번째 차원을 추가

층 활성화를 반환하는 모델 만들기

from tensorflow.keras import layers

layer_outputs = []
layer_names = []
for layer in model.layers:
    if isinstance(layer, (layers.Conv2D, layers.MaxPooling2D)):
        layer_outputs.append(layer.output)
        layer_names.append(layer.name)
activation_model = keras.Model(inputs=model.input, outputs=layer_outputs)

if isinstance(layer, (layers.Conv2D, layers.MaxPooling2D))
-> 현재 레이어가 Conv2D 또는 MaxPooling2D 레이어인지 확인

layer_outputs.append(layer.output)
-> 선택된 레이어의 출력을 layer_outputs 리스트에 추가
activations = activation_model.predict(img_tensor)
-> 각 레이어의 출력을 예측합니다. 이를 통해 입력 이미지에 대한 각 레이어의 활성화 값을 얻을 수 있습니다.

first_layer_activation = activations[0]
print(first_layer_activation.shape)
-> 리스트에서 첫 번째 레이어의 활성화 출력을 선택하고, 그 출력의 형태(shape)를 출력하는 부분

import matplotlib.pyplot as plt
plt.matshow(first_layer_activation[0, :, :, 5], cmap="viridis") #viridis
plt.show()
-> 첫 번째 레이어의 활성화 출력에서 5번째 채널을 선택하여 시각화하는 부분
-> 모든 공간 차원(행과 열)에 대해 5번째 채널의 값을 선택하여 매트릭스로 표현
matshow
-> 매트릭스를 시각화하는 함수
cmap="viridis
-> 색상 맵을 지정하는 매개변수로, "viridis" 색상 맵을 사용하여 시각화

모든 층의 활성화에 있는 전체 채널 시각화하기

images_per_row = 16
for layer_name, layer_activation in zip(layer_names, activations):
    n_features = layer_activation.shape[-1]
    size = layer_activation.shape[1]
    n_cols = n_features // images_per_row
    display_grid = np.zeros(((size + 1) * n_cols - 1,
                             images_per_row * (size + 1) - 1))
    for col in range(n_cols):
        for row in range(images_per_row):
            channel_index = col * images_per_row + row
            channel_image = layer_activation[0, :, :, channel_index].copy()
            if channel_image.sum() != 0:
                channel_image -= channel_image.mean()
                channel_image /= channel_image.std()
                channel_image *= 64
                channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype("uint8")
            display_grid[
                col * (size + 1): (col + 1) * size + col,
                row * (size + 1) : (row + 1) * size + row] = channel_image
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1],
                        scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.grid(False)
    plt.axis("off")
    plt.imshow(display_grid, aspect="auto", cmap="viridis")

n_features = layer_activation.shape[-1]
-> 해당 레이어의 활성화 출력에서 사용된 특징 맵의 개수를 n_features 변수에 저장
size = layer_activation.shape[1]
-> 특징 맵의 크기를 size 변수에 저장
n_cols = n_features // images_per_row
-> 한 줄에 표시할 이미지의 열 수를 계산
display_grid = np.zeros(((size + 1) n_cols - 1, images_per_row (size + 1) - 1))
-> 빈 그리드를 생성합니다. 각 이미지는 크기가 size + 1이며 이미지 사이에는 1개의 픽셀 간격이 있습니다.
channel_image = layer_activation[0, :, :, channel_index].copy()
-> 현재 채널의 활성화 맵을 선택
if channel_image.sum() != 0:
-> 선택한 활성화 맵의 합이 0이 아닌 경우에만 조정 작업을 수행
channel_image -= channel_image.mean()
-> 평균 값을 뺸다
channel_image /= channel_image.std()
-> 표준 편차로 나눕니다.
channel_image *= 64
-> 값을 64배 확장
channel_image += 128
-> 값을 128만큼 이동
channel_image = np.clip(channel_image, 0, 255).astype("uint8")
-> 픽셀 값을 0에서 255 사이로 클리핑하고 uint8 자료형으로 변환
-> 배열의 모든 요소들을 0에서 255사이로 클리핑하여 값의 범위를 제한한다.
scale = 1. / size
-> 그리드의 크기를 조정하기 위한 스케일을 계산합니다.

-plt.grid(False)
-> 그래프의 격자를 비활성화한다.

Xception 합성곱 기반 모델 만들기

model = keras.applications.xception.Xception(
    weights="imagenet",
    include_top=False)

keras.applications.xception.Xception
-> Xception 모델을 생성합니다.

weights="imagenet"
-> ImageNet 데이터셋으로 사전 훈련된 가중치를 사용합니다. 이는 이미지 분류와 관련된 많은 다양한 카테고리를 학습한 모델 가중치를 불러올 수 있게 해줍니다.
include_top=False
-> 모델의 상단 부분에 위치한 fully connected layer를 포함하지 않고, 특성 추출을 위한 CNN 부분만을 가져옵니다

특성 추출 모델 만들기

layer_name = "block3_sepconv1"
layer = model.get_layer(name=layer_name)
feature_extractor = keras.Model(inputs=model.input, outputs=layer.output)

model.get_layer(name=layer_name)
-> model에서 layer_name에 해당하는 레이어를 가져옵니다

keras.applications.xception.preprocess_input(img_tensor)
-> preprocess_input : 전처리를 수행하여 입력 이미지를 적절한 형식으로 변환

import tensorflow as tf

def compute_loss(image, filter_index):
    activation = feature_extractor(image)
    filter_activation = activation[:, 2:-2, 2:-2, filter_index]
    return tf.reduce_mean(filter_activation)

filter_activation = activation[:, 2:-2, 2:-2, filter_index]
-> 특성 맵에서 [:, 2:-2, 2:-2, filter_index]를 사용하여 특정 필터 인덱스에 해당하는 부분을 추출

tf.reduce_mean(filter_activation)
-> 평균값을 계산하여 손실을 정의

확률적 경사 상승법을 사용한 손실 최대화

@tf.function
def gradient_ascent_step(image, filter_index, learning_rate):
    with tf.GradientTape() as tape:
        tape.watch(image)
        loss = compute_loss(image, filter_index)
    grads = tape.gradient(loss, image)   
    grads = tf.math.l2_normalize(grads)
    image += learning_rate * grads    
    return image

@tf.function
-> 데코레이터는 TensorFlow 함수로써의 동작을 최적화하기 위해 사용
tf.GradientTape()
-> gradient를 계산할 수 있는 tape을 생성
loss = compute_loss(image, filter_index)
-> 호출하여 손실을 계산
image = tf.random.uniform(
minval=0.4,
maxval=0.6,
shape=(1, img_width, img_height, 3))
-> tf.random.uniform를 사용하여 초기 이미지를 생성
gradient_ascent_step
-> gradient ascent를 수행하여 이미지를 손실을 최대화하는 방향으로 업데이트

텐서를 이미지로 변환하기 위한 유틸리티 함수

def deprocess_image(image):
    image -= image.mean()
    image /= image.std()
    image *= 64
    image += 128
    image = np.clip(image, 0, 255).astype("uint8")
    image = image[25:-25, 25:-25, :]
    return image

입력된 이미지에서 평균을 뺍니다. 이는 이미지를 중심에 정렬하기 위한 작업입니다.
이미지를 표준편차로 나눕니다. 이는 이미지의 명암을 조정하는 작업입니다.
이미지를 64배 확장하고 128을 더합니다. 이는 이미지의 명암 범위를 확장하는 작업입니다.
이미지를 0과 255 사이의 값으로 클리핑하고 "uint8" 자료형으로 변환합니다. 이는 이미지를 정수 픽셀 값으로 변환하는 작업입니다.
이미지의 가장자리 25픽셀을 제거합니다. 이는 이미지의 가장자리에 나타나는 노이즈를 제거하는 작업입니다.
최종적으로 후처리된 이미지를 반환합니다.

all_images = []
for filter_index in range(64):
    print(f"{filter_index}번 필터 처리중")
    image = deprocess_image(
        generate_filter_pattern(filter_index)
    )
    all_images.append(image)

margin = 5
n = 8
cropped_width = img_width - 25 * 2
cropped_height = img_height - 25 * 2
width = n * cropped_width + (n - 1) * margin
height = n * cropped_height + (n - 1) * margin
stitched_filters = np.zeros((width, height, 3))

for i in range(n):
    for j in range(n):
        image = all_images[i * n + j]
        stitched_filters[
            (cropped_width + margin) * i : (cropped_width + margin) * i + cropped_width,
            (cropped_height + margin) * j : (cropped_height + margin) * j
            + cropped_height,
            :,
        ] = image

keras.utils.save_img(
    f"filters_for_layer_{layer_name}.png", stitched_filters)

generate_filter_pattern
-> 각 필터의 패턴을 생성
deprocess_image
-> 패턴을 후처리합니다. 그런 다음, 모든 필터에 대해 생성된 이미지를 all_images 리스트에 저장
stitched_filters
-> 이 배열은 각 필터 패턴을 격자 형태로 결합한 이미지를 저장하기 위해 사용

img_path = keras.utils.get_file(
    fname="elephant.jpg",
    origin="https://img-datasets.s3.amazonaws.com/elephant.jpg")

def get_img_array(img_path, target_size):
    img = keras.utils.load_img(img_path, target_size=target_size)
    array = keras.utils.img_to_array(img)
    array = np.expand_dims(array, axis=0)
    array = keras.applications.xception.preprocess_input(array)
    return array

img_array = get_img_array(img_path, target_size=(299, 299))

keras.applications.xception.preprocess_input

-> 이미지 배열을 Xception 모델의 입력 형식에 맞게 전처리
-> 이미지 배열의 픽셀 값을 전처리하여 Xception 모델이 예상하는 형식으로 맞춥니다.

preds = model.predict(img_array)
print(keras.applications.xception.decode_predictions(preds, top=3)[0])

keras.applications.xception.decode_predictions
-> decode_predictions :가장 가능성이 높은 클래스 레이블과 해당 확률을 출력
-> top=3으로 설정되어 있으므로, 가장 가능성이 높은 상위 3개의 예측 결과가 출력

마지막 합성곱 출력을 반환하는 모델 만들기

last_conv_layer_name = "block14_sepconv2_act"
classifier_layer_names = [
    "avg_pool",
    "predictions",
]
last_conv_layer = model.get_layer(last_conv_layer_name)
last_conv_layer_model = keras.Model(model.inputs, last_conv_layer.output)

마지막 합성곱 출력 위에 있는 분류기에 적용하기 위한 모델 만들기

classifier_input = keras.Input(shape=last_conv_layer.output.shape[1:])
x = classifier_input
for layer_name in classifier_layer_names:
    x = model.get_layer(layer_name)(x)
classifier_model = keras.Model(classifier_input, x)

last_conv_layer.output.shape[1:]
-> 마지막 컨볼루션 레이어의 출력 형태를 가져옵니다

최상위 예측 클래스의 그레디언트 계산하기

import tensorflow as tf

with tf.GradientTape() as tape:
    last_conv_layer_output = last_conv_layer_model(img_array)
    tape.watch(last_conv_layer_output)
    preds = classifier_model(last_conv_layer_output)
    top_pred_index = tf.argmax(preds[0])
    top_class_channel = preds[:, top_pred_index]

grads = tape.gradient(top_class_channel, last_conv_layer_output)

tf.GradientTape()
-> 기울기 테이프를 생성
tape.watch(last_conv_layer_output)
-> 해당 출력에 대한 기울기를 계산할 수 있도록 설정
tape.gradient(top_class_channel, last_conv_layer_output)
-> 이를 통해 선택한 클래스의 활성화 맵에 대한 기울기를 얻을 수 있습니다

그레이디언트를 평균하고 채널 중요도 가중치 적용하기

pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)).numpy()
last_conv_layer_output = last_conv_layer_output.numpy()[0]
for i in range(pooled_grads.shape[-1]):
    last_conv_layer_output[:, :, i] *= pooled_grads[i]
heatmap = np.mean(last_conv_layer_output, axis=-1)

tf.reduce_mean(grads, axis=(0, 1, 2)).numpy()
-> grads 텐서의 축 (0, 1, 2)를 기준으로 평균을 계산하여 넘파이 배열로 반환
last_conv_layer_output.numpy()[0]
-> 넘파이 배열로 변환한 후 첫 번째 차원의 값을 선택합니다.
heatmap = np.mean(last_conv_layer_output, axis=-1)

-> last_conv_layer_output의 마지막 차원을 평균하여 얻은 히트맵

히트맵 후처리하기

heatmap = np.maximum(heatmap, 0)  
heatmap /= np.max(heatmap)
plt.matshow(heatmap)
plt.show()

np.maximum(heatmap, 0)
-> 히트맵의 각 원소를 0과 비교하여 더 큰 값을 선택
-> 이는 음수 값을 제거하여 히트맵을 양수로 만들어줍니다.
heatmap /= np.max(heatmap)
-> 0과 1 사이의 범위로 조정

심준보

밑거름이라고생각합니다

이전 포스트

Deeplearning - chap 9-2

다음 포스트