translation invariance
- input shift에 대해 output이 영향을 받지 않아야 한다.
고양이가 좌측 상단에 있던 우측 하단에 있던 고양이로 예측해야 한다는 뜻이다.
코드 블록을 제시해줘서 구현이 쉬울 것 같다.
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
class convBlock(nn.Module):
def __init__(self,in_c,out_c,**kwargs):
super(convBlock,self).__init__()
self.conv = nn.Conv2d(in_c,out_c,**kwargs)
self.bn = nn.BatchNorm2d(out_c)
def forward(self,x):
x = self.conv(x)
x = self.bn(x)
return x
class inception(nn.Module):
def __init__(self,in_c,n1x1,n3x3_re,n3x3,n5x5_re,n5x5,pool_proj):
super(inception,self).__init__()
self.conv1 = convBlock(in_c,n1x1,kernel_size=1,stride=1,padding=0)
self.conv2 = nn.Sequential(
convBlock(in_c,n3x3_re,kernel_size=1,stride=1,padding=0),
convBlock(n3x3_re,n3x3,kernel_size=3,stride=1,padding=1)
)
self.conv3 = nn.Sequential(
convBlock(in_c,n5x5_re,kernel_size=1,stride=1,padding=0),
convBlock(n5x5_re,n5x5,kernel_size=5,stride=1,padding=2)
)
self.conv4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3,stride=1,padding=1),
convBlock(in_c,pool_proj,kernel_size=1,padding=0)
)
def forward(self,x):
a = self.conv1(x)
b = self.conv2(x)
c = self.conv3(x)
d = self.conv4(x)
return torch.cat([a,b,c,d],dim=1)
class GoogleNet(nn.Module):
def __init__(self,):
super(GoogleNet,self).__init__()
self.bottom = nn.Sequential(
convBlock(3,64,kernel_size=7,stride=2,padding=3),
nn.MaxPool2d(3,2,1),
convBlock(64,64,kernel_size=1,stride=1),
convBlock(64,192,kernel_size=3,stride=1,padding=1),
nn.MaxPool2d(3,2,1),
)
self.train = True
self.auxclf = nn.Sequential(
nn.AvgPool2d(kernel_size=5,stride=3),
convBlock(512,128,kernel_size=1,stride=1),
)
self.auxclf2 = nn.Sequential(
nn.AvgPool2d(kernel_size=5,stride=3),
convBlock(528,128,kernel_size=1,stride=1),
)
self.a3 = inception(192,64,96,128,16,32,32)
self.b3 = inception(256,128,128,192,32,96,64)
self.a4 = inception(480,192,96,208,16,48,64)
self.b4 = inception(512,160,112,224,24,64,64)
self.c4 = inception(512,128,128,256,24,64,64)
self.d4 = inception(512,112,144,288,32,64,64)
self.e4 = inception(528,256,160,320,32,128,128)
self.a5 = inception(832,256,160,320,32,128,128)
self.b5 = inception(832,384,192,384,48,128,128)
self.pool1 = nn.MaxPool2d(3,2,1)
self.pool2 = nn.MaxPool2d(3,2)
self.avg = nn.AvgPool2d(kernel_size=7,stride=1)
self.linear = nn.Linear(1024,1000)
self.linear2 = nn.Linear(2048,1024)
self.softmax = nn.Softmax(dim=0)
def forward(self,x):
x = self.bottom(x)
x = self.a3(x)
x = self.b3(x)
x = self.pool1(x)
x = self.a4(x)
clf1 = self.auxclf(x)
clf1 = self.linear2(clf1.view(-1))
clf1 = self.linear(clf1)
x = self.b4(x)
x = self.c4(x)
x = self.d4(x)
clf2 = self.auxclf2(x)
clf2 = self.linear2(clf2.view(-1))
clf2 = self.linear(clf2)
x = self.e4(x)
x = self.pool1(x)
x = self.a5(x)
x = self.b5(x)
x = self.avg(x)
x = self.linear(x.view(-1))
x = self.softmax(x)
if self.train:
return clf1, clf2
else:
return x
https://roytravel.tistory.com/338
블로그를 참고를 많이 했다.
다음 모델은 GAN(2014) / citation : 50000+ 이다