การเเบ่งส่วนภาพ โดยใช้ TensorFlow

#python #tensorflow #ai

การจำเเนกรูปภาพ คือการให้เครือได้กำหนดคลาสให้กับรูปภาพที่ Input เข้ามาในเเต่ละรูป ซึ่งการ Output จะเป็นการ mask วาดขอบเขตในรูปภาพทำให้สามารถนำไปใช้ได้อย่างกว้างขวาง มีการใช้อย่างหลากหลาย เช่น การวิเคราะห์ภาพของทางการเเพทย์ รถยนต์ขับเคลื่อนอัตโนมัติ การวิเคราะห์ภาพจากดาวเทียม เเละอีกมากมาย เป้าหมายหลักของการเเบ่งส่วนภาพ คือ เพื่อที่จะจดจำ เเละทำความเข้าใจสิ่งที่อยู่ในรูปภาพในระดับเล็กขนาด pixels

ข้อมูลรูปภาพที่จะนำมาเเบ่งส่วนรูปภาพ ใช้ข้อมูล "The Oxford-IIIT Pet Dataset" เป็นข้อมูลที่เกี่ยวกับสัตว์ทั้งหมด 377 หมวดหมู่โดยจะมีรูปภาพ 200 รูปเอาไว้สำหรับเเต่ละคลาส รูปภาพเเต่ละภาพมีขนาด ท่าทางการเเสดง เเละเเสงที่เเตกต่างกัน รูปภาพทั้งหมดมีคำอธิบายเกี่ยวข้องกับสายพันธุ์ เเละการเเย่งส่วนการตัดเเต่งในรูปเเบบ pixels

ขั้นตอนการทำ

1.นำเข้า library เข้าไปใน google colab

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
from tensorflow import keras
import tensorflow as tf
import tensorflow_datasets as tfds
import cv2
import PIL
from IPython.display import clear_output

2.Download ชุดข้อมูล Tensorflow โดยจะใช้คำสั่งดังนี้เพื่อ Download เข้างาน

dataset, info = tfds.load('oxford_iiit_pet:3.*.*', with_info=True)

3.การประมวลผลของข้อมูล
เริ่มจาก ตั้งค่า pixels อยู่ในช่วง [0,1] จากนั้นเราจะเเบ่งค่า pixels 255 ชุด

def normalize(input_image, input_mask):

    # Normalize the pixel range values between [0:1]
    img = tf.cast(input_image, dtype=tf.float32) / 255.0
    input_mask -= 1
    return img, input_mask

@tf.function
def load_train_ds(dataset):
    img = tf.image.resize(dataset['image'], 
                          size=(width, height))
    mask = tf.image.resize(dataset['segmentation_mask'],
                           size=(width, height))

    if tf.random.uniform(()) > 0.5:
        img = tf.image.flip_left_right(img)
        mask = tf.image.flip_left_right(mask)

    img, mask = normalize(img, mask)
    return img, mask

@tf.function
def load_test_ds(dataset):
    img = tf.image.resize(dataset['image'], 
                          size=(width, height))
    mask = tf.image.resize(dataset['segmentation_mask'], 
                           size=(width, height))

    img, mask = normalize(img, mask)
    return img, mask

ต่อมาเราจะมากำหนดตัวเเปรให้ป็นค่าคงที่ เช่น ขนาดของbuffer ความกว้าง ความสูงของข้อมูล Input

TRAIN_LENGTH = info.splits['train'].num_examples

# Batch size is the number of examples used in one training example.
# It is mostly a power of 2
BATCH_SIZE = 64
BUFFER_SIZE = 1000
STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE

# For VGG16 this is the input size
width, height = 224, 224

ในตอนนี้เราจะทำการ เทรนเเละทดสอบข้อมูลในตัวเเปรต่างๆ เเละได้ดำเนินการเพิ่มข้อมูลให้เสร็จสิ้น

train = dataset['train'].map(
    load_train_ds, num_parallel_calls=tf.data.AUTOTUNE)
test = dataset['test'].map(load_test_ds)

train_ds = train.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test.batch(BATCH_SIZE)

4.เเสดงข้อมูล
เห็นภาพตัวอย่างเเละ mask ที่เกี่ยวข้องจากชุดข้อมูล

 def display_images(display_list):
    plt.figure(figsize=(15, 15))
    title = ['Input Image', 'True Mask', 
             'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')

    plt.show()


for img, mask in train.take(5):
    sample_image, sample_mask = img, mask
    display_list = sample_image, sample_mask

display_images(display_list)

Output

จากนั้นจะใช้ encoder จาก VGG16 ที่ได้รับการฝึกฝนด้สน ImageNet เพื่อช่วยลดเวลาในการฝึกโมเดล UNet เเละสามารถให้ผลลัพธ์ที่ดีโดยใช้เวลาที่น้อยลง

base_model = keras.applications.vgg16.VGG16(
    include_top=False, input_shape=(width, height, 3))

layer_names = [
    'block1_pool',
    'block2_pool',
    'block3_pool',
    'block4_pool',
    'block5_pool',
]
base_model_outputs = [base_model.get_layer(
    name).output for name in layer_names]
base_model.trainable = False

VGG_16 = tf.keras.models.Model(base_model.input,
                               base_model_outputs)

เเล้วทำการกำหนด decoder

def fcn8_decoder(convs, n_classes):
    f1, f2, f3, f4, p5 = convs

    n = 4096
    c6 = tf.keras.layers.Conv2D(
        n, (7, 7), activation='relu', padding='same', 
    name="conv6")(p5)
    c7 = tf.keras.layers.Conv2D(
        n, (1, 1), activation='relu', padding='same', 
    name="conv7")(c6)

    f5 = c7

    # upsample the output of the encoder
    # then crop extra pixels that were introduced
    o = tf.keras.layers.Conv2DTranspose(n_classes, kernel_size=(
        4, 4), strides=(2, 2), use_bias=False)(f5)
    o = tf.keras.layers.Cropping2D(cropping=(1, 1))(o)

    # load the pool 4 prediction and do a 1x1
    # convolution to reshape it to the same shape of `o` above
    o2 = f4
    o2 = (tf.keras.layers.Conv2D(n_classes, (1, 1),
                                activation='relu', 
                                padding='same'))(o2)

    # add the results of the upsampling and pool 4 prediction
    o = tf.keras.layers.Add()([o, o2])

    # upsample the resulting tensor of the operation you just did
    o = (tf.keras.layers.Conv2DTranspose(
        n_classes, kernel_size=(4, 4), strides=(2, 2), 
    use_bias=False))(o)
    o = tf.keras.layers.Cropping2D(cropping=(1, 1))(o)

    # load the pool 3 prediction and do a 1x1
    # convolution to reshape it to the same shape of `o` above
    o2 = f3
    o2 = (tf.keras.layers.Conv2D(n_classes, (1, 1),
                                activation='relu', 
                                padding='same'))(o2)

    # add the results of the upsampling and pool 3 prediction
    o = tf.keras.layers.Add()([o, o2])

    # upsample up to the size of the original image
    o = tf.keras.layers.Conv2DTranspose(
        n_classes, kernel_size=(8, 8), strides=(8, 8),
    use_bias=False)(o)

    # append a softmax to get the class probabilities
    o = tf.keras.layers.Activation('softmax')(o)
    return o

เอาทุกอย่างที่ได้กำหนดเข้ามาด้วยกันเเละสร้างโมเดลขึ้นมา

def segmentation_model():

    inputs = keras.layers.Input(shape=(width, height, 3))
    convs = VGG_16(inputs)
    outputs = fcn8_decoder(convs, 3)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    return model


opt = keras.optimizers.Adam()

model = segmentation_model()
model.compile(optimizer=opt,
            loss=tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True),
            metrics=['accuracy'])

5.ทำการสร้าง prediction mask utility
จะทำการสร้าง function ที่จะเเสดงให้เห็นภาพจริงเเละ mask ที่ถูกต้องเเละ mask ที่ predicted ในเเถวเดียวกัน

def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    return pred_mask[0]


def show_predictions(dataset=None, num=1):
    if dataset:
        for image, mask in dataset.take(num):
            pred_mask = model.predict(image)
            display_images([image[0], mask[0], create_mask(pred_mask)])
    else:
        display_images([sample_image, sample_mask,
                        create_mask(model.predict(sample_image[tf.newaxis, ...]))])


show_predictions()

Output

6.การฝึกฝนโมเดล
โดยเราจะตั้งค่าในการฝึกฝนโมเดลเอาไว้เป็นเวลา 20 รอบเเละทำการตรวจซ้ำ 5 รอบ

EPOCHS = 20
VAL_SUBSPLITS = 5
VALIDATION_STEPS = info.splits['test'].num_examples//BATCH_SIZE//VAL_SUBSPLITS

model_history = model.fit(train_ds, epochs=EPOCHS,
                        steps_per_epoch=STEPS_PER_EPOCH,
                        validation_steps=VALIDATION_STEPS,
                        validation_data=test_ds)

Output

7.จะใช้ Metricsในการคำนวณค่าออกมา
สำหรับงานเเบ่งส่วนภาพนั้นจะมีการใช้เมตริกทั้งหมด 2 ประเภทคือ 1.Intersection over Union(IOU) เเละ 2.Dice score

def compute_metrics(y_true, y_pred):
    '''
    Computes IOU and Dice Score.

    Args:
    y_true (tensor) - ground truth label map
    y_pred (tensor) - predicted label map
    '''

    class_wise_iou = []
    class_wise_dice_score = []

    smoothening_factor = 0.00001

    for i in range(3):
        intersection = np.sum((y_pred == i) * (y_true == i))
        y_true_area = np.sum((y_true == i))
        y_pred_area = np.sum((y_pred == i))
        combined_area = y_true_area + y_pred_area

        iou = (intersection + smoothening_factor) / \
            (combined_area - intersection + smoothening_factor)
        class_wise_iou.append(iou)

        dice_score = 2 * ((intersection + smoothening_factor) /
                        (combined_area + smoothening_factor))
        class_wise_dice_score.append(dice_score)

    return class_wise_iou, class_wise_dice_score

8.จะใช้ Metrics ในการคำนวณค่าออกมา

def get_test_image_and_annotation_arrays():
    '''
    Unpacks the test dataset and returns
    the input images and segmentation masks
    '''

    ds = test_ds.unbatch()
    ds = ds.batch(info.splits['test'].num_examples)

    images = []
    y_true_segments = []

    for image, annotation in ds.take(1):
        y_true_segments = annotation.numpy()
        images = image.numpy()

    y_true_segments = y_true_segments[:(
        info.splits['test'].num_examples - (info.splits['test']
                                            .num_examples % BATCH_SIZE))]
    images = images[:(info.splits['test'].num_examples -
                    (info.splits['test'].num_examples % BATCH_SIZE))]

    return images, y_true_segments


y_true_images, y_true_segments = get_test_image_and_annotation_arrays()

integer_slider = 2574
img = np.reshape(y_true_images[integer_slider], (1, width, height, 3))
y_pred_mask = model.predict(img)
y_pred_mask = create_mask(y_pred_mask)
y_pred_mask.shape


def display_prediction(display_list, display_string):
    plt.figure(figsize=(15, 15))
    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.xticks([])
        plt.yticks([])
        if i == 1:
            plt.xlabel(display_string, fontsize=12)
        plt.imshow(keras.preprocessing.image.array_to_img(display_list[i]))
    plt.show()


iou, dice_score = compute_metrics(
    y_true_segments[integer_slider], y_pred_mask.numpy())
display_list = [y_true_images[integer_slider],
                y_true_segments[integer_slider], y_pred_mask]


class_names = ['pet', 'background', 'outline']
display_string_list = ["{}: IOU: {} Dice Score: {}".format(class_names[idx],
                                                        i, dc) for idx, (i, dc) in
                    enumerate(zip(np.round(iou, 4), np.round(dice_score, 4)))]
display_string = "\n\n".join(display_string_list)


# showing predictions with metrics
display_prediction(display_list, display_string)

Output

สรุปผล

จากผลการทดลองที่ใช้ TensorFlow ในการทำการ Image Segmentation โดยใช้ชุดกลุ่มข้อมูลสัตว์เลี้ยงของ Oxford IIIT ทำให้มีการเเบ่งสัดส่วนภาพได้อย่างมาก สามารถนำโมเดลไปต่อยอดสู่งาน Project อื่นๆหรือเอาใช้สร้างโมเดลใหม่ที่ดีขึ้นในอนาคต

เเหล่งอ้างอิง
1.https://www.geeksforgeeks.org/image-segmentation-using-tensorflow/
2.https://www.tensorflow.org/tutorials/images/segmentation?hl=th