DEV Community: 4rldur0

댑덥딥 9주차 정리

4rldur0 — Thu, 28 Dec 2023 11:23:29 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 7 June, 2023

11-4 RNN time series

time series data? =serial data, 일정한 시간 간격으로 배치된 데이터 ex) 주가 데이터

apply RNN: many-to-one

hidden state에 충분한 dimesion을 주고 마지막 output에 fc layer

#스케일링: 0부터 1사이의 상대값으로 변환하여 사용→부담이 줄어듦
def minmax_scaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    return numerator / (denominator + 1e-7)

#Neural Net setting
class Net(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, layers):
        super(Net, self).__init__()
        self.rnn = torch.nn.LSTM(input_dim, hidden_dim, num_layers=layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, output_dim, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x[:, -1])
        return x

net = Net(data_dim, hidden_dim, output_dim, 1)

11-5 RNN Seq2seq

Seq2seq model

? sequence를 입력받고 sequence를 출력. 번역/챗봇 분야에서 많이 활용

RNN은 문장이 끝나기 전 답변을 만듦. 끝까지 듣고 답하기 위해 seq2seq 모델이 나옴

encoder와 decoder이라는 두 개의 RNN으로 구성됨

encoder: input을 벡터의 형태로 압축하여 decoder에 전달

decoder: 첫 셀에 전달된 벡터

# 전체적인 흐름

SOURCE_MAX_LENGTH = 10
TARGET_MAX_LENGTH = 12
#data setting
load_pairs, load_source_vocab, load_target_vocab = preprocess(raw, SOURCE_MAX_LENGTH, TARGET_MAX_LENGTH)
print(random.choice(load_pairs))

#define encoder, decoder
enc_hidden_size = 16
dec_hidden_size = enc_hidden_size
enc = Encoder(load_source_vocab.n_vocab, enc_hidden_size)
dec = Decoder(dec_hidden_size, load_target_vocab.n_vocab)

train(load_pairs, load_source_vocab, load_target_vocab, enc, dec, 5000, print_every=1000)
evaluate(load_pairs, load_source_vocab, load_target_vocab, enc, dec, TARGET_MAX_LENGTH)

#data setting-convert sentence to one-hot vector
def tensorize(vocab, sentence):
indexes = [vocab.vocab2index[word] for word in sentence.split(" ")]
indexes.append(vocab.vocab2index["<EOS>"])
return torch.Tensor(indexes).long().to(device).view(-1, 1)

# fix token for "start of sentence" and "end of sentence"
SOS_token = 0
EOS_token = 1

# class for vocabulary related information of data
class Vocab:
    def __init__(self):
        self.vocab2index = {"<SOS>": SOS_token, "<EOS>": EOS_token}
        self.index2vocab = {SOS_token: "<SOS>", EOS_token: "<EOS>"}
        self.vocab_count = {}
        self.n_vocab = len(self.vocab2index)

    def add_vocab(self, sentence):
        for word in sentence.split(" "):
            if word not in self.vocab2index:
                self.vocab2index[word] = self.n_vocab
                self.vocab_count[word] = 1
                self.index2vocab[self.n_vocab] = word
                self.n_vocab += 1
            else:
                self.vocab_count[word] += 1

# declare simple encoder
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, x, hidden):
        x = self.embedding(x).view(1, 1, -1)
        x, hidden = self.gru(x, hidden)
        return x, hidden

# declare simple decoder
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)    #압축된 데이터를 원래 크기로 복원시킴
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x, hidden):
        x = self.embedding(x).view(1, 1, -1)
        x, hidden = self.gru(x, hidden)
        x = self.softmax(self.out(x[0]))
        return x, hidden

nn.Embedding: input에 one-hot으로 표현된 vector를 곱하여 거대한 matrix(mxn)를 mx1 vector로 변환함
GRU: LSTM과 같은 advanced RNN model.

LSTM: RNN보다 성능이 좋다고 알려져 있다.(gradient vanishing문제 일부 해소)

GRU: LSTM보다 빠르다고 알려져 있다.

Q. encoder에서 embedding 했는데 decoder에서 왜 또 embedding 하지? 이미 압축된 형태로 들어오는 거 아닌가?
A. hidden state의 사이즈를 gru input 사이즈에 맞춰주기 위해서

# training seq2seq
def train(pairs, source_vocab, target_vocab, encoder, decoder, n_iter, print_every=1000, learning_rate=0.01):
    loss_total = 0

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    training_batch = [random.choice(pairs) for _ in range(n_iter)]
    training_source = [tensorize(source_vocab, pair[0]) for pair in training_batch]
    training_target = [tensorize(target_vocab, pair[1]) for pair in training_batch]

    criterion = nn.NLLLoss()    #crossentropy를 사용하기도 함

    for i in range(1, n_iter + 1):
        source_tensor = training_source[i - 1]
        target_tensor = training_target[i - 1]

        encoder_hidden = torch.zeros([1, 1, encoder.hidden_size]).to(device)

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        source_length = source_tensor.size(0)
        target_length = target_tensor.size(0)

        loss = 0

        for enc_input in range(source_length):
            _, encoder_hidden = encoder(source_tensor[enc_input], encoder_hidden)

        decoder_input = torch.Tensor([[SOS_token]]).long().to(device) #decoder의 첫 셀의 입력값은 start token
        decoder_hidden = encoder_hidden # connect encoder output to decoder input

        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # teacher forcing

        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        loss_iter = loss.item() / target_length
        loss_total += loss_iter

        if i % print_every == 0:
            loss_avg = loss_total / print_every
            loss_total = 0
            print("[{} - {}%] loss = {:05.4f}".format(i, i / n_iter * 100, loss_avg))

nn.NLLLoss: CrossEntropyLoss와 같이 cross-entropy 손실을 구하는 함수이고, 분류문제에서 출력이 확률값일 때 사용. CrossEntropyLoss = LogSoftmax + NLLLoss
teacher forcing: 실제 정답을 다음 셀에 넣어주는 것.

-일부는 teacher forcing, 일부는 gru의 예측값을 다음 셀에 전달하는 방법을 사용할 수도 있음

11-6 RNN PackedSequence

sequential data는 길이가 정해져 있지 않음 → 하나로 tensor로 묶어서 학습시켜야 함

cf) 이미지는 fixed size(예를 들어 32x32)

padding method

? 가장 긴 sequence의 길이에 맞추어 나머지 sequence의 뒤를 pad라는 토큰으로 채음

packing method

? seqence 길이의 정보를 저장하여 사용. batch는 길이 내림차순으로 정렬되어야 함. padding method에 비해 효율적이고 pad tocken을 사용하지 않아도 됨.

대면 8 April, 2023

RNN-timeseries, seq2seq, packedsequence

colab.research.google.com

댑덥딥 8주차 정리

4rldur0 — Thu, 28 Dec 2023 11:16:30 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 31 May, 2023

10-7 next step of CNN

CV 관련 앞으로 할만한 것들

classification: 분류

-DenseNet, SENet, MobileNet, SqueezeNet, AutoML(NAS, NASNet)

object detection: 탐지

-Latest Object Detection을 검색

hoya012 / deep_learning_object_detection

A paper list of object detection using deep learning.

deep learning object detection

A paper list of object detection using deep learning. I wrote this page with reference to this survey paper and searching and searching..

Last updated: 2020/09/22

Update log

2018/9/18 - update all of recent papers and make some diagram about history of object detection using deep learning 2018/9/26 - update codes of papers. (official and unofficial)
2018/october - update 5 papers and performance table.
2018/november - update 9 papers.
2018/december - update 8 papers and and performance table and add new diagram(2019 version!!).
2019/january - update 4 papers and and add commonly used datasets.
2019/february - update 3 papers.
2019/march - update figure and code links.
2019/april - remove author's names and update ICLR 2019 & CVPR 2019 papers.
2019/may - update CVPR 2019 papers.
2019/june - update CVPR 2019 papers and dataset paper.
2019/july - update BMVC 2019 papers and some of ICCV…

View on GitHub

object tracking: 프레임 간의 연관관계

-MDNet, GOTURN, CFNet, ROLO, Tracking the Untrackable

segmetation: 배경과 객체 분할

-FCN, U-Net, Mask RCNN

Image Captioning

Super Resolution

Generative Model(AutoEncoder, GAN)

OpenPose

[ ] Pytorch가 가지고 있는 다양한 기능들 직접 사용해보기!
[ ] Custom DataSet 만드는 방법 익히기
[ ] Pytorch가 제공하지 않는 데이터셋을 다운받아서 학습해보기 (Tiny-ImageNet Challenge 추천!)

11-0 RNN intro

RNN? sequential data를 다루기 위해 고안됨. ex) word, sentence, time series

sequential data? 순서가 중요한 data. ex) “hello”를 “elhol”로 바꾸면 알아듣지 못 함

-RNN 이전에도 position index를 활용하여 sequential data를 처리할 수 있었음. 다만, 복잡한 구조 파악 어려웠음

: A라는 셀에 t번째 입력값이 들어오면, t번째 출력값이 나오고, 동시에 다른 출력값이 셀 A로 들어감. 이때 다른 출력값을 hidden state라고 하며, 다음 셀로 전달됨. 따라서 t+1번째의 출력값은 t번째의 영향을 받음. 다음 셀은 사실 원래의 셀과 같은 것임(=모든 셀 A는 같음. 모두 같은 함수(파라미터)를 공유하기 때문)

Ht는 기본적으로 w를 인자로 갖는 함수 연산에 의해 나옴. Ht-1은 hidden state이고, y는 Ht와 같음.

ex) (vanilla) recurrent neural network

caracter-level language model example

Usages of RNN

ex)

1) 이미지→이미지에 대한 설명

2) 문장 → 감정 레이블

3) 문장 → 문장 (문장이 다 끝난 시점부터 다음 문장이 나옴)

4) 비디오 → 비디오

multilayered RNN

RNN applications

cf) advanced model examples

-LSTM(http://colah.github.io/posts/2015-08-Understanding-LSTMs/)

-GRU

11-1 RNN basics

RNN in PyTorch

rnn = torch.nn.RNN(input_size, hidden_size)
outputs, _status = rnn(input_data)

input/output shape: dimension 3짜리 torch tensor

input shape = (batch size, sequence length, input_size)

output shape = (batch size, sequence length, hidden_size)

-batch size, sequnece length는 pytorch가 input data에 따라 자동으로 판별함

example_hello를 입력 받았을 때, ello-를 출력받기

import torch
import numpy as np

input_size = 4
hidden_size = 2

# 1-hot encoding
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]
#input_size = 4

input_data_np = np.array([[h, e, l, l, o],
                                                [e, o, l, l, l],
                                                [l, l, e, e, l]], dtype=np.float32)
#batch size = 3

# transform as torch tensor
input_data = torch.Tensor(input_data_np)

rnn = torch.nn.RNN(input_size, hidden_size)
outputs, _status = rnn(input_data)

11-2 RNN hihello and charseq

Hihello problem

h가 들어오면 i를, i가 들어오면 h를, h가 들어오면 e를, … 예측하는 문제

-위의 example과 비슷

##prepare data for hihello problem
char_set = ['h', 'i', 'e', 'l', 'o']
# hyper parameters
input_size = len(char_set)
hidden_size = len(char_set)
learning_rate = 0.1
# data setting
x_data = [[0, 1, 0, 2, 3, 3]]
x_one_hot = [[[1, 0, 0, 0, 0],
[0, 1, 0, 0, 0],
[1, 0, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 0, 1, 0],
[0, 0, 0, 1, 0]]]
y_data = [[1, 0, 2, 3, 3, 4]]
# transform as torch tensor variable
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

charseq

hihello problem에서 입력을 일반화한 것

sample = " if you want you"
# make dictionary
char_set = list(set(sample))
char_dic = {c: i for i, c in enumerate(char_set)}
# hyper parameters
dic_size = len(char_dic)    #=input_size
hidden_size = len(char_dic)
learning_rate = 0.1
# data setting
sample_idx = [char_dic[c] for c in sample]
x_data = [sample_idx[:-1]]
x_one_hot = [np.eye(dic_size)[x] for x in x_data]
y_data = [sample_idx[1:]]
# transform as torch tensor variable
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

# declare RNN
rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True) # batch_first guarantees the order of output = (B, S, F)
# loss & optimizer setting
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn.parameters(), learning_rate)

# start training
for i in range(100):
    optimizer.zero_grad()
    outputs, _status = rnn(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()
    result = outputs.data.numpy().argmax(axis=2)
    result_str = ''.join([char_set[c] for c in np.squeeze(result)])
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

11-3 RNN Long sequence

longseq

-loger dataset를 특정 사이즈로 잘라 사용(=특정 사이즈의 윈도우를 하나씩 옆으로 옮기며 자름)→자른 데이터를 인덱스로 저장→one-hot encoding→torch tensor화

ex)

# data setting
x_data = []
y_data = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    print(i, x_str, '->', y_str)
    x_data.append([char_dic[c] for c in x_str]) # x str to index
    y_data.append([char_dic[c] for c in y_str]) # y str to index

x_one_hot = [np.eye(dic_size)[x] for x in x_data]

# transform as torch tensor variable
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

# declare RNN + FC
# (adding FC and stacking RNN)
class Net(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, layers):
        super(Net, self).__init__()
        self.rnn = torch.nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, hidden_dim, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x
net = Net(dic_size, hidden_size, 2)

# loss & optimizer setting
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)
# start training
for i in range(100):
    optimizer.zero_grad()
    outputs = net(X)
    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    results = outputs.argmax(dim=2)
    predict_str = ""
    for j, result in enumerate(results):
        # print(i, j, ''.join([char_set[t] for t in result]), loss.item())
        if j == 0:
            predict_str += ''.join([char_set[t] for t in result])
        else:
            predict_str += char_set[result[-1]]

    print(predict_str)

대면 8 April, 2023

gradient descent and linear regression

Q. train run을 여러번 돌릴 때, data 쪽 셀을 run하지 않고 돌리면 그 전 런타임에서 발생한 오버피팅이 그다음 런타임 시작부터 누적되어 발생하는 느낌? 왜 그런거지?

Q. train loss 왜 증가?
A. learning_rate 너무 컸음

colab.research.google.com

댑덥딥 7주차 정리

4rldur0 — Thu, 28 Dec 2023 11:03:54 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 24 May, 2023

10-5 Advanced CNN(VGG)

LeNet-5: conv filter 5x5, stride 1/subsampling pool

AlexNet: conv1: 96 11x11, stride 4/pool1

GoogLeNet: 22layers. 1x1 conv를 병렬적으로 사용-inception module

VGGNet: 19 layers, 모든 conv 3x3, padding 1, stride1_깊이에 따른 성능 변화를 연구

ResNet: 152 layers-layer를 건너뛰듯이 → 두 개의 레이어가 합쳐지는 효과를 냄

⇒ 이미지 분류용 알고리즘들임. 텍스트 분류에 사용되기도 함

VGG16

-vgg11~vgg19까지 만들 수 있음

torchvision.meodels.vgg: input 3x224x224 기준

import torchvision.meodels.vgg as vgg

class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()

        self.features = features #convolution

        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),    #이미지 사이즈가 달라지면 수정 필요
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )#FC layer(1~3)

        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x) #Convolution 
        x = self.avgpool(x) # avgpool
        x = x.view(x.size(0), -1) #flatten
        x = self.classifier(x) #FC layer
        return x

    def _initialize_weights(self):
        for m in self.modules():    #feature값을 하나씩 리턴
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')    #He intialization
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

##feature값 만들기
# 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3

    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v    #채널 수가 변경됨

    return nn.Sequential(*layers)

cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], #8 + 3 =11 == vgg11
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 10 + 3 = vgg 13
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], #13 + 3 = vgg 16
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 16 +3 =vgg 19
    'custom' : [64,64,64,'M',128,128,128,'M',256,256,256,'M']
}

conv = make_layers(cfg['custom'], batch_norm=True)

CNN = VGG(make_layers(cfg['custom']), num_classes=10, init_weights=True)

-vgg뒤에 붙는 숫자는 어떻게 나오는 거지? conv개수+classfier 개수

with CIFAR-10

-learning rate를 epoch 진행됨에 따라 줄어들게 할 수 있음

lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)    #epoch 5번마다 lr 줄어듦

#training
epochs = 50

for epoch in range(epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    lr_sche.step()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 30 == 29:    # print every 30 mini-batches
            loss_tracker(loss_plt, torch.Tensor([running_loss/30]), torch.Tensor([i + epoch*len(trainloader) ]))
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 30))
            running_loss = 0.0

10-6 Advanced CNN(ResNet)

-h(x)=x(가장 간단한 형태)가 가장 좋은 성능을 냄

torchvision.meodels.resnet: input 3x224x224 기준

import torchvision.models.resnet as resnet

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, strid

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):

        identity = x

        out = self.conv1(x) # 3x3 stride = 2
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out) # 3x3 stride = 1
        out = self.bn2(out)

        if self.downsample is not None:    
            identity = self.downsample(x)
#downsample이 없으면 stride가 2 이상이 됐을 때, out과 identity가 사이즈가 달라져 덧셈이 불가능함. 따라서 identity값을 낮춰주기 위해 사용함

        out += identity
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes) #conv1x1(64,64)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)#conv3x3(64,64)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion) #conv1x1(64,256)_planes를 곱해서 64->256
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x) # 1x1 stride = 1
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out) # 3x3 stride = stride 
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out) # 1x1 stride = 1
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    # model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) #resnet 50 
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(ResNet, self).__init__()

        self.inplanes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)

        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0]'''3''')
        self.layer2 = self._make_layer(block, 128, layers[1]'''4''', stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2]'''6''', stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3]'''3''', stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677_근거가 나와있는 논문
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

#각주 layer2 기준임
def _make_layer(self, block, planes, blocks, stride=1):

        downsample = None

        if stride != 1 or self.inplanes != planes * block.expansion: 
            #resnet에서는 downsample을 channel을 맞추기 위해서 사용
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride), #conv1x1(256, 512, 2)
                nn.BatchNorm2d(planes * block.expansion), #batchnrom2d(512)
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))

        self.inplanes = planes * block.expansion #self.inplanes = 128 * 4

        for _ in range(1, blocks): 
            layers.append(block(self.inplanes, planes)) # * 3

        return nn.Sequential(*layers)


    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

def resnet18(pretrained=False, **kwargs):
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) #=> 2*(2+2+2+2)(basicblock) +1(conv1) +1(fc)  = 16 +2 =resnet 18
    return model

def resnet50(pretrained=False, **kwargs):
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) #=> 3*(3+4+6+3)(bottleneck) +(conv1) +1(fc) = 48 +2 = 50
    return model

def resnet152(pretrained=False, **kwargs):
    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) # 3*(3+8+36+3) +2 = 150+2 = resnet152    
    return mode

with CIFAR-10

-더 정확한 normalize_mean과 std 직접 계산해서 사용


transform = transforms.Compose([
    transforms.ToTensor()
])

trainset = torchvision.datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=transform)

print(trainset.train_data.shape)

train_data_mean = trainset.train_data.mean( axis=(0,1,2) )
train_data_std = trainset.train_data.std( axis=(0,1,2) )

print(train_data_mean)
print(train_data_std)

train_data_mean = train_data_mean / 255
train_data_std = train_data_std / 255

print(train_data_mean)
print(train_data_std)

class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(ResNet, self).__init__()
        self.inplanes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        #self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 16, layers[0], stride=1)
        self.layer2 = self._make_layer(block, 32, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 128, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(128 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        #x.shape =[1, 16, 32,32]
        x = self.bn1(x)
        x = self.relu(x)
        #x = self.maxpool(x)

        x = self.layer1(x)
        #x.shape =[1, 128, 32,32]
        x = self.layer2(x)
        #x.shape =[1, 256, 32,32]
        x = self.layer3(x)
        #x.shape =[1, 512, 16,16]
        x = self.layer4(x)
        #x.shape =[1, 1024, 8,8]

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

#training
epochs = 70

for epoch in range(epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    lr_sche.step()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 30 == 29:    # print every 30 mini-batches
            value_tracker(loss_plt, torch.Tensor([running_loss/30]), torch.Tensor([i + epoch*len(trainloader) ]))
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 30))
            running_loss = 0.0

    ##Check Accuracy
    #acc = acc_check(resnet50, testloader, epoch, save=1)
    #value_tracker(acc_plt, torch.Tensor([acc]), torch.Tensor([epoch]))

대면 27 May, 2023

VGG, resNet

colab.research.google.com

댑덥딥 6주차 정리

4rldur0 — Thu, 28 Dec 2023 10:59:13 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 17 May, 2023

10-2 mnist cnn

[총정리]

라이브러리 가져오기

# Lab 11 MNIST and Convolutional Neural Network
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

gpu 사용 설정

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

학습에 사용되는 parameter 설정

# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

데이터셋 가져오고 loader 만들기

# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

학습 모델 만들기(torch.nn.module)

# CNN Model (2 conv layers)
class CNN(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        # L1 ImgIn shape=(?, 28, 28, 1)
        #    Conv     -> (?, 28, 28, 32)
        #    Pool     -> (?, 14, 14, 32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # L2 ImgIn shape=(?, 14, 14, 32)
        #    Conv      ->(?, 14, 14, 64)
        #    Pool      ->(?, 7, 7, 64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # Final FC 7x7x64 inputs -> 10 outputs(0~9)
        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias=True)    #fully connected layer
        torch.nn.init.xavier_uniform_(self.fc.weight)    #weight 초기화

    def forward(self, x):
        out = self.layer1(x)    #layer1 통과
                #print(out.shape)로 확인
        out = self.layer2(out)    #layer2 통과
        out = out.view(out.size(0), -1)   # Flatten them for FC
        out = self.fc(out)
        return out

# instantiate CNN model
model = CNN().to(device)

loss function(criterion) 선택, 최적화 도구(optimizer) 선택

# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

모델 학습 및 loss check

# train my model
total_batch = len(data_loader)
print('Learning started. It takes sometime.')
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        # image is already size of (28x28), no reshape
        # label is not one-hot encoded
        X = X.to(device)    #torch cuda 연산 진행
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

print('Learning Finished!')

학습된 모델 성능 확인

# Test model and check accuracy
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

더 많은 layer의 CNN

학습 모델 만들기(torch.nn.module)

# CNN Model
class CNN(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        # L1 ImgIn shape=(?, 28, 28, 1)
        #    Conv     -> (?, 28, 28, 32)
        #    Pool     -> (?, 14, 14, 32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # L2 ImgIn shape=(?, 14, 14, 32)
        #    Conv      ->(?, 14, 14, 64)
        #    Pool      ->(?, 7, 7, 64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # L3 ImgIn shape=(?, 7, 7, 64)
        #    Conv      ->(?, 7, 7, 128)
        #    Pool      ->(?, 4, 4, 128)
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1))

        # L4 FC 4x4x128 inputs -> 625 outputs
        self.fc1 = torch.nn.Linear(4 * 4 * 128, 625, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4 = torch.nn.Sequential(
            self.fc1,
            torch.nn.ReLU(),
            torch.nn.Dropout(p=1 - self.keep_prob))
        # L5 Final FC 625 inputs -> 10 outputs
        self.fc2 = torch.nn.Linear(625, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)   # Flatten them for FC
        out = self.layer4(out)
        out = self.fc2(out)

#모델에 임의의 값을 넣어 test
value = torch.Tensor[1,1,28,28]
print(model(value).shape)

⇒ 더 깊어져도 모델의 성능이 떨어질 수 있음

⇒ 효율적으로 만드는 것이 중요!

10-3 visdom

-visdom 설치

pip install visdom

-visdom 서버 켜기

python -m visdom.server

visdom 사용법

**import** torch
**import** torch.nn **as** nn

**import** torchvision
**import** torchvision.datasets **as** dsets

#Jupyter Notebook > Terminal 를 새로 켜서 python -m visdom.server 를 입력
#import visdom
**import** visdom
vis **=** visdom**.**Visdom()

#Text
vis.text("Hello, world!",env="main")

#image
a=torch.randn(3,200,200)
vis.image(a)

#images
vis.images(torch.Tensor(3,3,28,28))

##example (using MNIST and CIFAR10)
# 시간이 좀 걸립니다.
MNIST = dsets.MNIST(root="./MNIST_data",train = True,transform=torchvision.transforms.ToTensor(), download=True)
cifar10 = dsets.CIFAR10(root="./cifar10",train = True, transform=torchvision.transforms.ToTensor(),download=True)
CIFAR10
data = cifar10.__getitem__(0)
print(data[0].shape)
vis.images(data[0],env="main")

##MNIST
data = MNIST.__getitem__(0)
print(data[0].shape)
vis.images(data[0],env="main")

##Check dataset
data_loader = torch.utils.data.DataLoader(dataset = MNIST,
                                          batch_size = 32,
                                          shuffle = False)
for num, value in enumerate(data_loader):
    value = value[0]
    print(value.shape)
    vis.images(value)
    break
vis.close(env="main")

##Line Plot
Y_data = torch.randn(5)
plt = vis.line (Y=Y_data)
X_data = torch.Tensor([1,2,3,4,5])
plt = vis.line(Y=Y_data, X=X_data)

##Line update
Y_append = torch.randn(1)
X_append = torch.Tensor([6])

vis.line(Y=Y_append, X=X_append, win=plt, update='append')
multiple Line on single windows
num = torch.Tensor(list(range(0,10)))
num = num.view(-1,1)
num = torch.cat((num,num),dim=1)

plt = vis.line(Y=torch.randn(10,2), X = num)

##Line info
plt = vis.line(Y=Y_data, X=X_data, opts = dict(title='Test', showlegend=True))
plt = vis.line(Y=Y_data, X=X_data, opts = dict(title='Test', legend = ['1번'],showlegend=True))
plt = vis.line(Y=torch.randn(10,2), X = num, opts=dict(title='Test', legend=['1번','2번'],showlegend=True))

##make function for update line
def loss_tracker(loss_plot, loss_value, num):
    '''num, loss_value, are Tensor'''
    vis.line(X=num,
             Y=loss_value,
             win = loss_plot,
             update='append'
             )
plt = vis.line(Y=torch.Tensor(1).zero_())

for i in range(500):
    loss = torch.randn(1) + i
    loss_tracker(plt, loss, torch.Tensor([i]))

##close the window
vis**.**close(env**=**"main")

⇒ visdom 왜 씀? → 모델 학습을 시각적으로 확인할 수 있도록 돕는 tool

cf) tensorboardx도 visualization tool로 많이 쓰임

minist-cnn with visdom

##make plot
loss_plt = vis.line(Y=torch.Tensor(1).zero_(),opts=dict(title='loss_tracker', legend=['loss'], showlegend=True))

##train with loss_tracker
total_batch = len(data_loader)

for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)

        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('[Epoch:{}] cost = {}'.format(epoch+1, avg_cost))
    **loss_tracker(loss_plt, torch.Tensor([avg_cost]), torch.Tensor([epoch]))**
print('Learning Finished!')

10-4 ImageFolder

? 내가 가진 사진 활용해서 딥러닝 모델 학습하기

나만의 데이터 셋 준비하기

-class마다 다르게 폴더를 만들어 이미지들 넣어놓기

-사진 크기 줄이기

#이미지 보이게 할 때 활용
from matplotlib.pyplot import imshow
%matplotlib inline

torchvision.datasets.ImageFolder으로 불러오기

trans = transforms.Compose([
    transforms.Resize((64,128))
])

train_data = torchvision.datasets.ImageFolder(root='custom_data/origin_data', transform=trans)

transforms 적용하여 저장 하기 origin_data -> train_data

for num, value in enumerate(train_data):
    data, label = value
    print(num, data, label)

    if(label == 0):
        data.save('custom_data/train_data/gray/%d_%d.jpeg'%(num, label))    #경로대로 폴더 만들어져 있어야 함
    else:
        data.save('custom_data/train_data/red/%d_%d.jpeg'%(num, label)

CNN모델 생성 및 학습

#모델 저장하고 불러오기
torch.save(net.state_dict(), "./model/model.pth")
new_net = CNN().to(device)
new_net.load_state_dict(torch.load('./model/model.pth'))

Q. 왜 저장해놔야 함? 어디에 저장되는 거임?

#test
trans=torchvision.transforms.Compose([
    transforms.Resize((64,128)),
    transforms.ToTensor()
])
test_data = torchvision.datasets.ImageFolder(root='./custom_data/test_data', transform=trans)
test_set = DataLoader(dataset = test_data, batch_size = len(test_data))
with torch.no_grad():
    for num, data in enumerate(test_set):
        imgs, label = data
        imgs = imgs.to(device)
        label = label.to(device)

        prediction = net(imgs)

        correct_prediction = torch.argmax(prediction, 1) == label

        accuracy = correct_prediction.float().mean()
        print('Accuracy:', accuracy.item())

대면 20 May, 2023

(sum up)mnist cnn

CIFAR-10 이미지 크기에 맞춰 CNN2 다시 설계

3x32x32

self.layer1 = torch.nn.Sequential(
            nn.Conv2d(3,x,kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
    self.layer2 = nn.Sequential(
            nn.Conv2d(x,y,kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
    self.layer3 = nn.Sequential(
            nn.Linear(inputsize_height->(outputsize 식/maxpool1/maxpool2)*inputsize_width->(outputsize 식/maxpool1/maxpool2)*y, 120),
            nn.ReLU(),
            nn.Linear(120,10)
        )

colab.research.google.com

댑덥딥 5주차 정리

4rldur0 — Thu, 28 Dec 2023 10:42:49 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 10 May, 2023

09-3 Dropout

-overfitting 방지

?학습을 진행하면서 각 레이어에 존재하는 노드를 사전에 설정한 비율에 따라 껐다 켰다를 반복함→켜져있는 노드의 가중치만을 사용해 output을 계산함.

dropout = torch.nn.Dropout(p=drop_prob)
model = torch.nn.Sequential(linear1, relu, dropout, ... )

*test할 때는 dropout 사용하지 않음

model.eval()      #dropout = False

09-04 Batch Normalization

Gradient Vanishing/Exploding

Gradient Vanishing? 그래디언트가 작아지면서 소멸하는 현상

Gradient Exploding? vanishing문제와 반대. 미분을 계산할 때 값이 너무 크거나 nand값이 나올 때

→ change activation function, careful initialization, small learning rate(간접적 방식)

→Batch Normalization

Internal Covariate Shift

Covariate Shift? test set와 train set의 분포에 차이가 있기에 발생하는 문제가 있다. 한 레이어의 입력과 출력의 분포에 차이가 있다.

-레이어마다 covariate shift가 있기 때문에 레이어가 깊어질수록 더 큰 변화가 발생

→Batch Normalization? mini-batch마다 normalization을 하는 것

training: x→(m(=sample mean), sigma(=sample variance))normalize→x_hat, backpropagation→(gama,beta)scale and shift

한 train set 끝나면 sample mean/variance을 learning mean/variace로 고정→ gama*x_hat+beta 를 적용하여 batch normalize된 input을 활용

⇒batch data 구성이 달라져도 같은 output을 얻을 수 있음

[CNN]

10-1 Convolution

?이미지 위에서 stride 값 만큼 filter(kernel)을 이동시키면서 겹쳐지는 부분의 각 원소의 값을 곱해서 모두 더한 값을 출력으로 하는 연산

stride? filter를 한 번에 얼마나 이동할 것인가

padding: zero padding

Pytorch nn.Conv2d

torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0,
                            dilation=1, groups=1,bais=True)

input type: torch.Tensor, shape:(batch_size, channel, height, weight)

ouput size = (input size-kernel size+(2*padding))/stride+1

-stride로 나눌 때 소수점 나오면 버리고 1 더하기

Neuron과 Convolution

convolution의 filter값이 perceptron의 weight값으로 들어감.

filter가 지나가는 input의 부분부분이 train data set

convolution filter도 bias를 가질 수 있음

Pooling

-이미지 사이즈를 줄이기 위해, fully connected 연산을 대체하기 위해

max pooling: 정해진 사이즈 안에서 가장 큰

average pooling: 정해진 사이즈 안에서 평균

torch.nn.MaxPool2d(kernel_size, stride=None, padding=0,
                            dilation=1, return_indices=False, ceil_mode=False)

input → filter → conv → pool → output

대면 8 April, 2023

dropout, batch normalization

colab.research.google.com

댑덥딥 4주차 정리

4rldur0 — Thu, 28 Dec 2023 10:35:19 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 3 May, 2023

08-1 Perceptron

Perceptron

-인공신경망의 한 종류

인공신경망? Neuron의 동작을 본 따 만든 모델

-x*weight의 합 + bias를 input으로 함. activation fuction(ex. sigmoid)을 거쳐 output을 만듦

-AND와 OR 문제를 해결하기 위해 만들어짐→XOR 문제는 mutilayer가 필요/linear한 classifier로는 불가능

-XOR 문제 해결(단층)

08-2 Multi Layer Perceptron

Multilayer Perceptron

-XOR 문제 해결(multilayer)

-학습할 수 있는 방법이 없었음→backpropagation 알고리즘 개발을 통해 해결

#layer 2개
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)
# nn layers
linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)
# define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
# cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

#layer 4개
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)
# nn layers
linear1 = torch.nn.Linear(2, 10, bias=True)
linear2 = torch.nn.Linear(10, 10, bias=True)
linear3 = torch.nn.Linear(10, 10, bias=True)
linear4 = torch.nn.Linear(10, 1, bias=True)
sigmoid
...

backpropagation

-출력값과 예측값을 비교→w 업데이트

X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)
# nn layers
w1 = torch.Tensor(2, 2).to(device)
b1 = torch.Tensor(2).to(device)
w2 = torch.Tensor(2, 1).to(device)
b2 = torch.Tensor(1).to(device)
def sigmoid(x):
# sigmoid function
    return 1.0 / (1.0 + torch.exp(-x))
# return torch.div(torch.tensor(1), torch.add(torch.tensor(1.0), torch.exp(-x)))
def sigmoid_prime(x):
# derivative of the sigmoid function
    return sigmoid(x) * (1 - sigmoid(x))

for step in range(10001):
# forward
    l1 = torch.add(torch.matmul(X, w1), b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    Y_pred = sigmoid(l2)
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred))
# Back prop (chain rule)
for step in range(10001):
# Loss derivative
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
# Layer 2
    d_l2 = d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)
# Layer 1
    d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)
for step in range(10001):
# Weight update
    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    if step % 100 == 0:
        print(step, cost.item())

09-1 ReLU

problem of sigmoid: gradient를 구할 때 문제 발생(vanishing gradient)-양끝에서는 gradient가 0에 가까움

→ReLU? f(x) = max(0,x)

:양수면 자기 자신, 음수면 0을 출력

torch.nn.sigmoid(x)
torch.nn.tanh(x)
torch.nn.relu(x)
torch.nn.leaky_relu(x, 0.01)    #음수에서 f(x)=0이 되는 문제 해결

Optimizer in PyTorch

torch.optim.SGD
torch.optim.Adadelta
torch.optim.Adagrad
torch.optim.Adam
torch.optim.SparseAdam
torch.optim.Adamax
torch.optim.ASGD
torch.optim.LBFGS
torch.optim.RMSprop
torch.optim.Rprop

09-2 Weight initialization

상수로 초기화
RBM/DBM
Xavier/He

RBM/DBM

RBM(Restricted Boltzmann Machine): 하나의 layer 안에서는 연결 없음. 다른 layer와는 모두 연결됨

-Pre-training을 통해 실현 X→Y Y→X’

(a) X1→Y2 Y2→X1’

(b) 첫번째 layer의 w를 고정한 후 두번째 layer에 대해 반복 X2→Y3 Y3→X2’

…

-요즘에는 잘 사용하지 않음

Xavier/He

-특성에 따라 초기화를 달리함. 수식에 대입하는 간단한 방법

-Nin: layer의 input 수, Nout: layer의 output 수

Xavier Normal initialization

Xavier Uniform initialization

He initialization: Xavier initialization에서 Nout이 빠진 버전

He Normal initialization

He Uniform initialization

# nn layers
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()
# xavier initialization
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

09-3 Dropout

-overfitting을 해결하기 위한 방법

-한 layer에서 사전에 설정한 확률(~=비율)에 따라 무작위로 택한 일부분의 node만을 활용함

-매번 다른 형태의 network를 만듦

dropout = torch.nn.Dropout(p=drop_prob)

*test 할 때는 dropout을 사용하지 않음.

model.eval()    #=dropout = False

Q. backpropagation = chain rule ?

대면 6 May, 2023
Q. loss 값이 nan이 나오는 이유

perceptron

colab.research.google.com

댑덥딥 3주차 정리

4rldur0 — Thu, 28 Dec 2023 09:25:47 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 19 April, 2023

07-1 Tips

maximum likelihood estimation

likelihood: 가능도

MLE: f(θ)가 최대가 되는 θ(observation을 가장 잘 설명하는 θ)를 찾아내는 과정

ex)베르누이 분포를 따를 때, f(θ)=

(n과 k는 observation으로 얻어짐)

optimization via gradient descent

f(θ)의 최대를 찾을 때 활용

θ←θ-ɑ*▽L(x;θ)

overfitting and regrularization

MLE는 숙명적으로 overfitting이 따름

overfitting: 주어진 데이터에 대해 과도하게 fitting된 상태

-원하는 fitting: 파란색 선

overfitting을 줄이는 방법

1)more data

2)less features

3)regularization

regulatization의 종류

1)early stopping: validation Loss가 더이상 낮아지지 않을 때

2)reducing network size

3)weight decay

4)dropout⭐

5)batch normalization⭐

-2)~5) 딥러닝에서 사용

training and test dataset

:overfitting을 최소화하는 방법 중 하나

dev set(validation set)을 통해 training set이 overfitting되었는지 검증(optional)→test set으로 확인

Basic Approach to Train DNN

①make a neural network architecture

②train and check that model is over-fitted

if it is not, increase the model size(deeper and wider)
if it is, add regularization, such as drop-out, batch-normalization

③repeat from step-2

learning rate

learning rate가 너무 크면 cost가 너무 커진다(발산한다)

learning rate가 너무 작으면 cost가 거의 줄어들지 않는다

data preprocessing

1)standardization: 정규분포화

mu=x_train.mean(dim=0)
sigma = x_train.std(dim=0)
norm_x_train = (x_train - mu) / sigm

전처리를 안 했다면? y_train의 column 간 데이터의 크기 차이가 크면 크기가 작은 쪽은 거의 무시됨

07-2 MNIST

MNIST: handwritten digits dataset(training set(60,000 장)+test set)

-size: 28x28

-1 channel gray image

-0~9 digits

torchvision

: 유명 데이터셋, 모델 아키텍쳐, transform으로 구성된 패키지

①

import torchvision.datasets as dsets

mnist_train = dsets.MNIST(root="MNIST_data/", train = True, transform=transforms.ToTensor(), download=True)
mnist_test = dsets.MNIST(root="MNIST_data/", train = False, transform=transforms.ToTensor(), download=True)

-pytorch image: channel height wide 순서 v.s. 일반적인 image: height wide channel 순서 → .ToTensor() 활용

② torch.utils.DataLoader를 활용해 data 불러옴

댑덥딥 2주차 정리

③ size: 28x28 →view()를 이용해 784로 바꿔줌

for epoch in range(training_epochs):
    for X, Y in data_loader:
        ...
        X = X.view(-1, 28 * 28).to(device)
        ...

full code

##Train
# MNIST data image of shape 28 * 28 = 784 Softmax
linear = torch.nn.Linear(784, 10, bias=True).to(device)
# initialization
torch.nn.init.normal_(linear.weight)
# parameters
training_epochs = 15
batch_size = 100
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device) # Softmax is internally computed.
optimizer = torch.optim.SGD(linear.parameters(), lr=0.1)
for epoch in range(training_epochs):
avg_cost = 0
total_batch = len(data_loader)
for X, Y in data_loader:
# reshape input image into [batch_size by 784]
# label is not one-hot encoded
X = X.view(-1, 28 * 28).to(device)
optimier.zero_grad()
hypothesis = linear(X)
cost = criterion(hypothesis, Y)
cost.backward()
avg_cost += cost / total_batch
print("Epoch: ", "%04d" % (epoch+1), "cost =", "{:.9f}".format(avg_cost))

##Test
# Test the model using test sets
With torch.no_grad():
X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
Y_test = mnist_test.test_labels.to(device)
prediction = linear(X_test)
correct_prediction = torch.argmax(prediction, 1) == Y_test
accuracy = correct_prediction.float().mean()
print("Accuracy: ", accuracy.item())

##Visualization
import matplotlib.pyplot as plt
import random
...
r = random.randint(0, len(mnist_test) - 1)
X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 *
28).float().to(device)
Y_single_data = mnist_test.test_labels[r:r + 1].to(device)
print("Label: ", Y_single_data.item())
single_prediction = linear(X_single_data)
print("Prediction: ", torch.argmax(single_prediction,
1).item())
plt.imshow(mnist_test.test_data[r:r + 1].view(28, 28),
cmap="Greys", interpolation="nearest")
plt.show()

대면 22 April, 2023

tips and torchvision

colab.research.google.com

댑덥딥 2주차 정리

4rldur0 — Thu, 28 Dec 2023 09:13:09 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 13 April, 2023

04-1 multivariate linear regression

simple linear regression 복습

-하나의 정보→하나의 결과 예측

H(x) = Wx+b

multivariate linear regression

-여러 개의 정보→하나의 결과 예측

H(x) = w1x1+w2x2+w3x3+b

: x가 길이 1000의 vector라면? 식이 하염없이 길어짐→matmul() 사용

cost 함수, 학습 방식 simple linear regression과 동일

#simple과 다른 부분

#데이터 정의
x_train = torch.FloatTensor([[73, 80, 75],[93, 88, 93], [89, 91, 90], [96, 98, 100], [73, 66, 70]])
y_train = torch.FloatTensor([[152],[185],[180],[196],[142]])
#W 정의
W = torch.zeros((3,1), requires_grad=True)

torch.nn module

-모델 생성을 도와줌

nn.Linear(입력 개수, 출력 개수)

hypothesis 계산은 forward()에서

gradient 계산은 backward()에서_pytorch가 알아서 해줌

F.mse_loss

torch.nn.functional에서 다양한 loss function을 제공

Full code

04-2 Loading Data

-엄청난 양의 데이터를 편하게 다루는 방법

데이터가 많으면? 속도가 느리거나 하드웨어적으로 불가능→일부분의 데이터만 학습

“Minibatch” Gradient Descent

-전체 데이터를 균일하게 나눠 학습하는 방법→업데이트 주기 빨라짐/잘못된 예측을 할 수 있음

데이터 일부만 활용

PyTorch Dataset module

-원하는 데이터셋 지정

len(): 데이터셋의 총 데이터수

getitem(): 특정 인덱스에 상응하는 입출력 데이터 torch.tensor 형태로 반환

DataLoader

-데이터셋 불러오기

batch_size: 통상적으로 2의 제곱수로 설정

suffle 옵션: 데이터가 학습되는 순서를 바꿈

data_loader = torch.utils.DataLoader(
    DataLoader=mnist_train, 
    batch_size=batch_size, 
    shuffle=True, 
    drop_last=True
)

Full code

05 Logistic Regression

-결과 값이 0 또는 1

-binary classification에서 사용

→ BCE(sigmoid 함수 활용)

computing hypothesis

=1/(1+e^(-(XW+b)))

H(x)~=P(x=1;w) = 1-P(x=0;w)

hypothesis = 1/(1+torch.exp(-(x_train.matmul(w)+b)))
#혹은
hypothesis = torch.sigmoid(x_train.matmul(w)+b)

computing cost function

    #bce
F.binary_cross_entropy(hypothesis, y_train)

참고) 로지스틱 회귀에서의 cost function 도출 _https://copycode.tistory.com/162

⇒두 경우를 하나의 식으로 표현하면 위의 cost(W) 식이 됨

Full Code

evaluation

prediction = torch.ByteTensor
prediction = hypothesis >= torch.FloatTensor([0.5])      #0.5보다 크면 1, 작으면 0
correct_prediction = prediction.float() == y_train

higher implementation

Full Code

06 Softmax Classification

-Logistic Regression의 연장선

-multi-classifcation 문제에서 사용

-이산확률분포 함수를 근사함

-multinolmial classfication → CE(softmax)

Softmax

-이전의 사건을 바탕으로 다음 사건이 일어날 확률을 계산

-max값을 뽑는데 soft하게(부드럽게) 뽑음

ex) [1,2,3]에 대해

: 일반적인 max-(0,0,1) / softmax-(0.0900, 0.2447, 0.6652)_합은 1, P(3|1) = 0.6652

-여기에서 ei는 sigmoid 함수로 구한 각각의 값(합이 1이 아님)

Cross Entropy

-두 개의 확률 분포의 유사도를 나타내는 수치

cross entropy를 최소화하면 Q를 P에 근사할 수 있음

Cross Entropy Loss

Low-level Implementation and High-level Implementation

Q(x) = prediction

z = torch.rand(3, 5, requires_grad=True)
hypothesis = F.softmax(z, dim=1)

y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

#(low level)
torch.log(F.softmax(z, dim=1))
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()

#혹은(high level)
F.log_softmax(z, dim=1)
cost = F.nll_loss(F.log_softmax(z, dim=1), y)      //nll:Negative Log Likelihood

#혹은
cost = F.cross_entropy(z, y)      //F.log_softmax() + F.nll_loss()

Full Code

//low level

//F.cross_entropy

대면 15 April, 2023

multivariate linear regression_logistic regression_softmax classification

colab.research.google.com

댑덥딥 1주차 정리

4rldur0 — Thu, 28 Dec 2023 08:40:04 +0000

'모두를 위한 딥러닝 시즌 2' 강의를 듣고 공부하는 스터디 입니다. https://deeplearningzerotoall.github.io/season2/lec_tensorflow.html

비대면 5 April, 2023

Lab-01-1

basic tensor manipulation

vector(1d)-matrix(2)-tensor(3)

pytorch tensor shape convention

-matrix 크기
2d(typical simple setting)
: |t| = (batch size, dim)으로 표현
3d(typical computer vision)
: |t| = (batch size, width, height)
3d(typical natural language processing)
: |t| = (batch size, length, dim)
: dim x length가 하나의 문장

numpy와 pytorch

##numpy
t = np.array([0., 1., ...,])
t.ndim      //rank
t.shape      //shape
t[:2]      //slicing

##pytorch
t = torch.FloatTensor([0., 1., ...,])
t.ndim()      //rank
t.shape()      //shape
t.size()      //shape
t[:2]      //slicing

t.mean()      //평균
t.mean(dim=0)      //dim=0끼리 평균
t.sum()       //합
t.sum(dim=-1)      //dim=-1끼리 합
t.max()      //최대
t.max(dim=1)      //dim=1 중 최대, argument(index)값을 같이 리턴함

broadcasting

행렬끼리 연산을 수행할 때 크기를 맞춰야 함→ broadcasting 기능을 통해 자동으로 크기가 맞춰짐
자동으로 적용되므로 주의해야 함

#broadcasting
//vector + scalar
m1 = torch.FloatTensor([[1, 2]])      //(1,2)
m2 = torch.FloatTensor([3])      //(1, )->(1,2)
m1+m2

//2X 1 vector + 1X 2 vector
m1 = torch.FloatTensor([[1, 2]])      //(1,2)->(2,2)
m2 = torch.FloatTensor([3],[4])      //(2,1)->(2,2)

Lab-01-2

view(numpy의 reshape)⭐

-1은 보통 가장 변동이 심한 batch size에

squeez

어떤 한 dimension의 element가 1일 때, 그 dimension을 없앰
dimension을 명시할 수도 있음

unsqueeze

원하는 dimension에 1을 넣어줌
dimension 명시해야 함
view로 동일한 결과 얻을 수 있음

#view
//|t| = (2,2,3)
t.view([-1,3])      //첫번째 차원 정하지x, 두번째 차원 3으로/(2,2,3)->(2x2, 3)->(4,3)
#squeeze
//|ft| = (3,1)
ft.squeeze()      //(3,1)->(3)
ft.squeeze(dim=0)      //변화 없음
#unsqueeze
ft.unsqueeze(0)      //(3)->(1,3)

type casting

lt = torch.LongTensor([1,2,3,4])      //[1,2,3,4]
lt.float      //[1., 2., 3., 4.]
bt = torch.ByteTensor([True, False, False, True])
bt.long      //[1,0,0,1]  
bt.float      //[1., 0., 0., 1.]

concatenate(이어붙이기)

x = torch.FloatTensor([[1,2], [3,4]])
x = torch.FloatTensor([[5,6], [7,8]])
torch.cat([x,y], dim=0)      //(2,2)+(2,2)->(4,2)
torch.cat([x,y], dim=1)      //(2,2)+(2,2)->(2,4)

stacking(concatenate를 간편화시킨 것)

unsqueeze한 리스트로 cat하여 똑같은 결과 낼 수 있음

//|x|, |y|, |z| = (2, )
torch.stack([x, y,z])      //(2,)+(2,)+(2,)->(3,2)
torch.stack([x, y,z], dim=1)      //(2,)+(2,)+(2,)->(2,3)

ones and zeros

똑같은 shape의 1로 채운/0으로 채운 tensor를 만듦
device 같음

//|x|=(3,2)
torch.ones_like(x)
torch.zeros_like(x)

in-place operation

‘_’
메모리를 새로 선언하지 않고 결과값을 리스트에 반영함
garbage collector가 잘 되어 있으므로 큰 차이 없음

x.mul(2.)
x.mul_(2.)

Lab-02, Lab-03

#data definition
x_train = torch.FloatTensor([])      //입력
y_train = torch.FloatTensor([])      //출력

#hypothesis

//hypothesis function: y=Wx+b: 학습 데이터와 가장 잘 맞는 하나의 직선
//w, b 0으로 초기화
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)

hypothesis = x_train * W +b

#compute loss

//MSE활용

cost = torch.mean((hyphthesis - y_train)**2)      //predict-target

#gradient descent
//b가 없는 모델이라고 가정(w=1일 때 가장 좋은 모델(cost=0))
//gradient descent: W= w-lr*gradW
gradient = 2* torch.mean((@*x_train-y_trina)*x_train)
lr = 0.1
W -= lr * gradient

//torch.optim으로 gradient descent를 간편하게 구현
optimizer = optim.SGD([W], lr=0.15)

//cost로 H(x)개선
optimizer.zero_grad()      //gradient0으로 초기화
cost.backward()      //gradient 계산
optimizer.step      //gradient descent

-이를 여러 번의 iteration을 돌려 cost=0에 가까워지도록 함

대면 8 April, 2023

gradient descent and linear regression

colab.research.google.com