DEV Community

Super Kai (Kazuya Ito)
Super Kai (Kazuya Ito)

Posted on • Edited on

RandomCrop in PyTorch (1)

Buy Me a Coffee

*Memos:

RandomCrop() can randomly crop an image as shown below. *It's about size argument:

*Memos:

  • The 1st argument for initialization is size(Required-Type:int or tuple/list(int) or size()): *Memos:
    • It's [height, width].
    • It must be 1 <= x.
    • A tuple/list must be the 1D with 1 or 2 elements.
    • A single value(int or tuple/list(int)) means [size, size].
  • The 2nd argument for initialization is padding(Optional-Default:None-Type:int or tuple/list(int)): *Memos:
    • It's [left, top, right, bottom] which can be converted from [left-right, top-bottom] or [left-top-right-bottom].
    • A tuple/list must be the 1D with 1, 2 or 4 elements.
    • A single value(int or tuple/list(int)) means [padding, padding, padding, padding].
    • Double values(tuple/list(int)) means [padding[0], padding[1], padding[0], padding[1]].
  • The 3rd argument for initialization is pad_if_needed(Optional-Default:False-Type:bool):
    • If it's False and size is smaller than an original image or the padded image by padding, there is error.
    • If it's True and size is smaller than an original image or the padded image by padding, there is no error, then the image is randomly padded to become size.
  • The 4th argument for initialization is fill(Optional-Default:0-Type:int, float or tuple/list(int or float)): *Memos:
    • It can change the background of an image. *The background can be seen when an image is positively padded.
    • A tuple/list must be the 1D with 1 or 3 elements.
    • If all values are x <= 0, it's black.
  • The 5th argument for initialization is padding_mode(Optional-Default:'constant'-Type:str). *'constant', 'edge', 'reflect' or 'symmetric' can be set to it.
  • The 1st argument is img(Required-Type:PIL Image or tensor(int)): *Memos:
    • A tensor must be 2D or 3D.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import RandomCrop

rc = RandomCrop(size=100)
rc = RandomCrop(size=100,
                padding=None,
                pad_if_needed=False, 
                fill=0,
                padding_mode='constant')
rc
# RandomCrop(size=(100, 100),
#            pad_if_needed=False,
#            fill=0,
#            padding_mode=constant)

rc.size
# (100, 100)

print(rc.padding)
# None

rc.pad_if_needed
# False

rc.fill
# 0

rc.padding_mode
# 'constant'

origin_data = OxfordIIITPet(
    root="data",
    transform=None
)

s300_data = OxfordIIITPet( # `s` is size.
    root="data",
    transform=RandomCrop(size=300)
    # transform=RandomCrop(size=[300, 300])
)

s200_data = OxfordIIITPet(
    root="data",
    transform=RandomCrop(size=200)
)

s100_data = OxfordIIITPet(
    root="data",
    transform=RandomCrop(size=100)
)

s50_data = OxfordIIITPet(
    root="data",
    transform=RandomCrop(size=50)
)

s10_data = OxfordIIITPet(
    root="data",
    transform=RandomCrop(size=10)
)

s1_data = OxfordIIITPet(
    root="data",
    transform=RandomCrop(size=1)
)

s200_300_data = OxfordIIITPet(
    root="data",
    transform=RandomCrop(size=[200, 300])
)

s300_200_data = OxfordIIITPet(
    root="data",
    transform=RandomCrop(size=[300, 200])
)

import matplotlib.pyplot as plt

def show_images1(data, main_title=None):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i in range(1, 6):
        plt.subplot(1, 5, i)
        plt.imshow(X=data[0][0])
    plt.tight_layout()
    plt.show()

plt.figure(figsize=[7, 9])
plt.title(label="s500_394origin_data", fontsize=14)
plt.imshow(X=origin_data[0][0])
show_images1(data=origin_data, main_title="s500_394origin_data")
show_images1(data=s300_data, main_title="s300_data")
show_images1(data=s200_data, main_title="s200_data")
show_images1(data=s100_data, main_title="s100_data")
show_images1(data=s50_data, main_title="s50_data")
show_images1(data=s10_data, main_title="s10_data")
show_images1(data=s1_data, main_title="s1_data")
show_images1(data=s200_300_data, main_title="s200_300_data")
show_images1(data=s300_200_data, main_title="s300_200_data")

# ↓ ↓ ↓ ↓ ↓ ↓ The code below is identical to the code above. ↓ ↓ ↓ ↓ ↓ ↓
def show_images2(data, main_title=None, s=None, p=None,
                 pin=False, f=0, pm='constant'):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    temp_s = s
    im = data[0][0]
    for i in range(1, 6):
        plt.subplot(1, 5, i)
        if not temp_s:
            s = [im.size[1], im.size[0]]
        rc = RandomCrop(size=s, padding=p, pad_if_needed=pin,
                        fill=f, padding_mode=pm)
        plt.imshow(X=rc(im))
    plt.tight_layout()
    plt.show()

plt.figure(figsize=[7, 9])
plt.title(label="s500_394origin_data", fontsize=14)
plt.imshow(X=origin_data[0][0])
show_images2(data=origin_data, main_title="s500_394origin_data")
show_images2(data=origin_data, main_title="s300_data", s=300)
show_images2(data=origin_data, main_title="s200_data", s=200)
show_images2(data=origin_data, main_title="s100_data", s=100)
show_images2(data=origin_data, main_title="s50_data", s=50)
show_images2(data=origin_data, main_title="s10_data", s=10)
show_images2(data=origin_data, main_title="s1_data", s=1)
show_images2(data=origin_data, main_title="s200_300_data", s=[200, 300])
show_images2(data=origin_data, main_title="s300_200_data", s=[300, 200])
Enter fullscreen mode Exit fullscreen mode

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Top comments (0)

Billboard image

The Next Generation Developer Platform

Coherence is the first Platform-as-a-Service you can control. Unlike "black-box" platforms that are opinionated about the infra you can deploy, Coherence is powered by CNC, the open-source IaC framework, which offers limitless customization.

Learn more