'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html
비대면 24 May, 2023
10-5 Advanced CNN(VGG)
LeNet-5: conv filter 5x5, stride 1/subsampling pool
AlexNet: conv1: 96 11x11, stride 4/pool1
GoogLeNet: 22layers. 1x1 conv를 병렬적으로 사용-inception module
VGGNet: 19 layers, 모든 conv 3x3, padding 1, stride1_깊이에 따른 성능 변화를 연구
ResNet: 152 layers-layer를 건너뛰듯이 → 두 개의 레이어가 합쳐지는 효과를 냄
⇒ 이미지 분류용 알고리즘들임. 텍스트 분류에 사용되기도 함
VGG16
-vgg11~vgg19까지 만들 수 있음
torchvision.meodels.vgg: input 3x224x224 기준
import torchvision.meodels.vgg as vgg
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=True):
super(VGG, self).__init__()
self.features = features #convolution
self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096), #이미지 사이즈가 달라지면 수정 필요
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)#FC layer(1~3)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x) #Convolution
x = self.avgpool(x) # avgpool
x = x.view(x.size(0), -1) #flatten
x = self.classifier(x) #FC layer
return x
def _initialize_weights(self):
for m in self.modules(): #feature값을 하나씩 리턴
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') #He intialization
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
##feature값 만들기
# 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
def make_layers(cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v #채널 수가 변경됨
return nn.Sequential(*layers)
cfg = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], #8 + 3 =11 == vgg11
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 10 + 3 = vgg 13
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], #13 + 3 = vgg 16
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 16 +3 =vgg 19
'custom' : [64,64,64,'M',128,128,128,'M',256,256,256,'M']
}
conv = make_layers(cfg['custom'], batch_norm=True)
CNN = VGG(make_layers(cfg['custom']), num_classes=10, init_weights=True)
-vgg뒤에 붙는 숫자는 어떻게 나오는 거지? conv개수+classfier 개수
with CIFAR-10
-learning rate를 epoch 진행됨에 따라 줄어들게 할 수 있음
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9) #epoch 5번마다 lr 줄어듦
#training
epochs = 50
for epoch in range(epochs): # loop over the dataset multiple times
running_loss = 0.0
lr_sche.step()
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = vgg16(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 30 == 29: # print every 30 mini-batches
loss_tracker(loss_plt, torch.Tensor([running_loss/30]), torch.Tensor([i + epoch*len(trainloader) ]))
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 30))
running_loss = 0.0
10-6 Advanced CNN(ResNet)
-h(x)=x(가장 간단한 형태)가 가장 좋은 성능을 냄
torchvision.meodels.resnet: input 3x224x224 기준
import torchvision.models.resnet as resnet
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, strid
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x) # 3x3 stride = 2
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out) # 3x3 stride = 1
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
#downsample이 없으면 stride가 2 이상이 됐을 때, out과 identity가 사이즈가 달라져 덧셈이 불가능함. 따라서 identity값을 낮춰주기 위해 사용함
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = conv1x1(inplanes, planes) #conv1x1(64,64)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes, stride)#conv3x3(64,64)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = conv1x1(planes, planes * self.expansion) #conv1x1(64,256)_planes를 곱해서 64->256
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x) # 1x1 stride = 1
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out) # 3x3 stride = stride
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out) # 1x1 stride = 1
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
# model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) #resnet 50
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
super(ResNet, self).__init__()
self.inplanes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0]'''3''')
self.layer2 = self._make_layer(block, 128, layers[1]'''4''', stride=2)
self.layer3 = self._make_layer(block, 256, layers[2]'''6''', stride=2)
self.layer4 = self._make_layer(block, 512, layers[3]'''3''', stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677_근거가 나와있는 논문
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
#각주 layer2 기준임
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
#resnet에서는 downsample을 channel을 맞추기 위해서 사용
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride), #conv1x1(256, 512, 2)
nn.BatchNorm2d(planes * block.expansion), #batchnrom2d(512)
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion #self.inplanes = 128 * 4
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes)) # * 3
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnet18(pretrained=False, **kwargs):
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) #=> 2*(2+2+2+2)(basicblock) +1(conv1) +1(fc) = 16 +2 =resnet 18
return model
def resnet50(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) #=> 3*(3+4+6+3)(bottleneck) +(conv1) +1(fc) = 48 +2 = 50
return model
def resnet152(pretrained=False, **kwargs):
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) # 3*(3+8+36+3) +2 = 150+2 = resnet152
return mode
with CIFAR-10
-더 정확한 normalize_mean과 std 직접 계산해서 사용
transform = transforms.Compose([
transforms.ToTensor()
])
trainset = torchvision.datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=transform)
print(trainset.train_data.shape)
train_data_mean = trainset.train_data.mean( axis=(0,1,2) )
train_data_std = trainset.train_data.std( axis=(0,1,2) )
print(train_data_mean)
print(train_data_std)
train_data_mean = train_data_mean / 255
train_data_std = train_data_std / 255
print(train_data_mean)
print(train_data_std)
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
super(ResNet, self).__init__()
self.inplanes = 16
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
#self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 16, layers[0], stride=1)
self.layer2 = self._make_layer(block, 32, layers[1], stride=1)
self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
self.layer4 = self._make_layer(block, 128, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
#x.shape =[1, 16, 32,32]
x = self.bn1(x)
x = self.relu(x)
#x = self.maxpool(x)
x = self.layer1(x)
#x.shape =[1, 128, 32,32]
x = self.layer2(x)
#x.shape =[1, 256, 32,32]
x = self.layer3(x)
#x.shape =[1, 512, 16,16]
x = self.layer4(x)
#x.shape =[1, 1024, 8,8]
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
#training
epochs = 70
for epoch in range(epochs): # loop over the dataset multiple times
running_loss = 0.0
lr_sche.step()
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = resnet50(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 30 == 29: # print every 30 mini-batches
value_tracker(loss_plt, torch.Tensor([running_loss/30]), torch.Tensor([i + epoch*len(trainloader) ]))
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 30))
running_loss = 0.0
##Check Accuracy
#acc = acc_check(resnet50, testloader, epoch, save=1)
#value_tracker(acc_plt, torch.Tensor([acc]), torch.Tensor([epoch]))
대면 27 May, 2023
Top comments (0)