DEV Community

Super Kai (Kazuya Ito)
Super Kai (Kazuya Ito)

Posted on

Module in PyTorch

Module() can create a model, being its base class as shown below:

*Memos:

  • forward() must be overridden in the subclass of Module().
  • state_dict() can return a dictionary containing references to the whole state of the module. *It cannot get the num3 and num4 defined without Parameter().
  • parameters() can return an iterator over module parameters. *It cannot get the num3 and num4 defined without Parameter().
  • training can check if it's train mode or eval mode. By default, it's train mode.
  • train() can set a model train mode.
  • eval() can set a model eval mode.
  • cpu() can convert all model parameters and buffers to CPU.
  • cuda() can convert all model parameters and buffers to CUDA(GPU).


import torch
from torch import nn

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.num1 = nn.Parameter(torch.tensor(9.))
        self.num2 = nn.Parameter(torch.tensor(7.))
        self.num3 = torch.tensor(-2.) # Defined without `Parameter()`
        self.num4 = torch.tensor(6.) # Defined without `Parameter()`
        self.layer1 = nn.Linear(in_features=4, out_features=5)
        self.layer2 = nn.Linear(in_features=5, out_features=2)
        self.layer3 = nn.Linear(in_features=2, out_features=3)
        self.relu = nn.ReLU()

    def forward(self, x): # Must be overridden
        x1 = self.layer1(input=x)
        x2 = self.relu(input=x1)
        x3 = self.layer2(input=x2)
        x4 = self.relu(input=x3)
        x5 = self.layer3(input=x4)
        return x5

my_tensor = torch.tensor([8., -3., 0., 1.])

torch.manual_seed(42)

mymodel = MyModel()
mymodel(x=my_tensor)
# tensor([0.8092, 0.8460, 0.3758], grad_fn=<ViewBackward0>)

mymodel
# MyModel(
#   (layer1): Linear(in_features=4, out_features=5, bias=True)
#   (layer2): Linear(in_features=5, out_features=2, bias=True)
#   (layer3): Linear(in_features=2, out_features=3, bias=True)
#   (relu): ReLU()
# )

mymodel.layer2
# Linear(in_features=5, out_features=2, bias=True)

mymodel.state_dict()
# OrderedDict([('num1', tensor(9.)),
#              ('num2', tensor(7.)),
#              ('layer1.weight',
#               tensor([[0.3823, 0.4150, -0.1171, 0.4593],
#                       [-0.1096, 0.1009, -0.2434, 0.2936],
#                       [0.4408, -0.3668, 0.4346, 0.0936],
#                       [0.3694, 0.0677, 0.2411, -0.0706],
#                       [0.3854, 0.0739, -0.2334, 0.1274]])),
#              ('layer1.bias',
#               tensor([-0.2304, -0.0586, -0.2031, 0.3317, -0.3947])),
#              ('layer2.weight',
#               tensor([[-0.2062, -0.1263, -0.2689, 0.0422, -0.4417],
#                       [0.4039, -0.3799, 0.3453, 0.0744, -0.1452]])),
#              ('layer2.bias', tensor([0.2764, 0.0697])),
#              ('layer3.weight',
#               tensor([[0.5713, 0.0773],
#                       [-0.2230, 0.1900],
#                       [-0.1918, 0.2976]])),
#              ('layer3.bias', tensor([0.6313, 0.4087, -0.3091]))])

params = mymodel.parameters()

next(params)
# Parameter containing:
# tensor(9., requires_grad=True)

next(params)
# Parameter containing:
# tensor(7., requires_grad=True)

next(params)
# Parameter containing:
# tensor([[0.3823, 0.4150, -0.1171, 0.4593],
#         [-0.1096, 0.1009, -0.2434, 0.2936],
#         [0.4408, -0.3668, 0.4346, 0.0936],
#         [0.3694, 0.0677, 0.2411, -0.0706],
#         [0.3854, 0.0739, -0.2334, 0.1274]], requires_grad=True)

next(params)
# Parameter containing:
# tensor([-0.2304, -0.0586, -0.2031, 0.3317, -0.3947],
#        requires_grad=True)

next(params)
# Parameter containing:
# tensor([[-0.2062, -0.1263, -0.2689, 0.0422, -0.4417],
#         [0.4039, -0.3799, 0.3453, 0.0744, -0.1452]],
#        requires_grad=True)

next(params)
# Parameter containing:
# tensor([0.2764, 0.0697], requires_grad=True)

next(params)
# Parameter containing:
# tensor([[0.5713, 0.0773],
#         [-0.2230, 0.1900],
#         [-0.1918, 0.2976]], requires_grad=True)

next(params)
# Parameter containing:
# tensor([0.6313, 0.4087, -0.3091], requires_grad=True)

mymodel.training
# True

mymodel.eval()

mymodel.training
# False

mymodel.train()

mymodel.training
# True

mymodel.cuda(device='cuda:0')

mymodel.layer2.weight.device
# device(type='cuda', index=0)

mymodel.cpu()

mymodel.layer2.weight.device
# device(type='cpu')


Enter fullscreen mode Exit fullscreen mode

Top comments (0)