Implementation of keras version of DeepLab-V3 + semantic segmentation Neural Network

Implementation of keras version of DeepLab-V3 + semantic segmentation Neural Network

network structure

Deeplab series network models are developed from ResNet residual module, and on this basis, they are integrated with the implementation of empty convolution. Compared with Deeplab v3, Deeplab v3 + introduces the encoder decoder structure commonly used in semantic segmentation [25] [26] in order to integrate multi-scale information. The encoder provides rich semantic information, and the decoder restores fine object edges, so as to integrate low-level features and high-level features, which further improves the accuracy of segmentation boundary, At the same time, the resolution of the feature extracted by the encoder can be arbitrarily controlled, and the accuracy and time-consuming can be balanced by hole convolution.

The structure of DeepLab v3 + model is shown in Figure 3-6. The main body of its encoder is DCNN with hole convolution. Hole convolution is one of the keys of DeepLab model. Because the hole will cross pixels when extracting feature points, it can increase the receptive field without changing the size of the feature map, so that the information range contained in each convolution output becomes larger. For the encoder, it is conducive to extracting more effective features and extracting multi-scale information.

Meanwhile, the deep lab V3 + model adopts the ASPP (atlas spatial pyramid pooling) module to further extract multi-scale features by using different receptive fields and up sampling.

Complete code

Add Reference Library

from keras.preprocessing import image
from keras.models import Model, load_model, Sequential
from keras import backend as K
from keras.utils import np_utils
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelEncoder
from keras import metrics
from keras.losses import binary_crossentropy

import matplotlib.pyplot as plt
from keras.layers import Conv2D, MaxPooling2D, UpSampling2D, BatchNormalization, Reshape, Permute, Activation, Input
from keras.layers import DepthwiseConv2D, ZeroPadding2D, GlobalAveragePooling2D, Lambda, Concatenate, Dropout, Conv2DTranspose
from keras import layers
from keras.utils.np_utils import to_categorical
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger
from keras.layers.merge import concatenate
from PIL import Image
from keras.optimizers import Adam
import tensorflow as tf
from keras.applications.resnet50 import ResNet50

import matplotlib as mpl
import seaborn as sns

import os
import random
import numpy as np
import cv2
import matplotlib.pyplot as plt
import shutil
import pandas as pd
import time
from tqdm import *

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
seed = 7
np.random.seed(seed)

Data loading

if not os.path.exists('./edge_clip/'):
  !unzip /content/drive/MyDrive/edge_clip.zip -d ./
img_w = 512
img_h = 512
n_label = 1

classes = [0., 1.]

labelencoder = LabelEncoder()
labelencoder.fit(classes)

image_sets = os.listdir('/content/edge_clip/train/src/')


def load_img(path, grayscale=False):
    if grayscale:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) / 255.0
    else:
        img = cv2.imread(path)
        img = np.array(img, dtype="float") / 255.0
    return img


filepath = '/content/edge_clip/train/'
def get_train_val(val_rate=0.2):
    train_url = []
    train_set = []
    val_set = []
    for pic in os.listdir(filepath + 'src/'):
        train_url.append(pic)

    random.seed(14)
    random.shuffle(train_url)
    total_num = len(train_url)
    val_num = int(val_rate * total_num)
    for i in range(len(train_url)):
        if i < val_num:
            val_set.append(train_url[i])
        else:
            train_set.append(train_url[i])
    return train_set, val_set


# data for training
def generateData(batch_size, data=[]):
  # print 'generateData...'
  while True:
    train_data = []
    train_label = []
    batch = 0
    for i in (range(len(data))):
        url = data[i]
        batch += 1
        img = load_img(filepath + 'src/' + url)
        img = img_to_array(img)
        train_data.append(img)
        label = load_img(filepath + 'label/' + url, grayscale=True)
        label = img_to_array(label)
        train_label.append(label)
        if batch % batch_size == 0:
            # print 'get enough bacth!\n'
            train_data = np.array(train_data)
            train_label = np.array(train_label)
            yield (train_data, train_label)
            train_data = []
            train_label = []
            batch = 0

            # data for validation


def generateValidData(batch_size, data=[]):
  # print 'generateValidData...'
  while True:
    valid_data = []
    valid_label = []
    batch = 0
    for i in (range(len(data))):
        url = data[i]
        batch += 1
        img = load_img(filepath + 'src/' + url)
        img = img_to_array(img)
        valid_data.append(img)
        label = load_img(filepath + 'label/' + url, grayscale=True)
        label = img_to_array(label)
        valid_label.append(label)
        if batch % batch_size == 0:
            valid_data = np.array(valid_data)
            valid_label = np.array(valid_label)
            yield (valid_data, valid_label)
            valid_data = []
            valid_label = []
            batch = 0
train_set, val_set = get_train_val()
len(train_set), len(val_set)
(8320, 2080)

Build model

def Upsample(tensor, size):
    '''bilinear upsampling'''
    name = tensor.name.split('/')[0] + '_upsample'

    def bilinear_upsample(x, size):
        resized = tf.image.resize(
            images=x, size=size)
        return resized
    y = Lambda(lambda x: bilinear_upsample(x, size),
               output_shape=size, name=name)(tensor)
    return y


def ASPP(tensor):
    '''atrous spatial pyramid pooling'''
    dims = K.int_shape(tensor)

    y_pool = AveragePooling2D(pool_size=(
        dims[1], dims[2]), name='average_pooling')(tensor)
    y_pool = Conv2D(filters=256, kernel_size=1, padding='same',
                    kernel_initializer='he_normal', name='pool_1x1conv2d', use_bias=False)(y_pool)
    y_pool = BatchNormalization(name=f'bn_1')(y_pool)
    y_pool = Activation('relu', name=f'relu_1')(y_pool)

    # y_pool = Upsample(tensor=y_pool, size=[dims[1], dims[2]])
    y_pool = Conv2DTranspose(filters=256, kernel_size=(2, 2), 
                  kernel_initializer='he_normal', dilation_rate=512 // 16 - 1)(y_pool)

    y_1 = Conv2D(filters=256, kernel_size=1, dilation_rate=1, padding='same',
                 kernel_initializer='he_normal', name='ASPP_conv2d_d1', use_bias=False)(tensor)
    y_1 = BatchNormalization(name=f'bn_2')(y_1)
    y_1 = Activation('relu', name=f'relu_2')(y_1)

    y_6 = Conv2D(filters=256, kernel_size=3, dilation_rate=6, padding='same',
                 kernel_initializer='he_normal', name='ASPP_conv2d_d6', use_bias=False)(tensor)
    y_6 = BatchNormalization(name=f'bn_3')(y_6)
    y_6 = Activation('relu', name=f'relu_3')(y_6)

    y_12 = Conv2D(filters=256, kernel_size=3, dilation_rate=12, padding='same',
                  kernel_initializer='he_normal', name='ASPP_conv2d_d12', use_bias=False)(tensor)
    y_12 = BatchNormalization(name=f'bn_4')(y_12)
    y_12 = Activation('relu', name=f'relu_4')(y_12)

    y_18 = Conv2D(filters=256, kernel_size=3, dilation_rate=18, padding='same',
                  kernel_initializer='he_normal', name='ASPP_conv2d_d18', use_bias=False)(tensor)
    y_18 = BatchNormalization(name=f'bn_5')(y_18)
    y_18 = Activation('relu', name=f'relu_5')(y_18)

    y = concatenate([y_pool, y_1, y_6, y_12, y_18], name='ASPP_concat')

    y = Conv2D(filters=256, kernel_size=1, dilation_rate=1, padding='same',
               kernel_initializer='he_normal', name='ASPP_conv2d_final', use_bias=False)(y)
    y = BatchNormalization(name=f'bn_final')(y)
    y = Activation('relu', name=f'relu_final')(y)
    return y


def DeepLabV3Plus(img_height=512, img_width=512, nclasses=1):
    print('*** Building DeepLabv3Plus Network ***')

    base_model = ResNet50(input_shape=(
        img_height, img_width, 3), weights='imagenet', include_top=False)
    
    image_features = base_model.get_layer('conv4_block6_out').output
    x_a = ASPP(image_features)
    # x_a = Upsample(tensor=x_a, size=[img_height // 4, img_width // 4])
    x_a = Conv2DTranspose(filters=256, kernel_size=(2, 2), 
                  kernel_initializer='he_normal', dilation_rate=img_height // 16 * 3)(x_a)

    x_b = base_model.get_layer('conv2_block3_out').output
    x_b = Conv2D(filters=48, kernel_size=1, padding='same',
                 kernel_initializer='he_normal', name='low_level_projection', use_bias=False)(x_b)
    x_b = BatchNormalization(name=f'bn_low_level_projection')(x_b)
    x_b = Activation('relu', name='low_level_activation')(x_b)

    x = concatenate([x_a, x_b], name='decoder_concat')

    x = Conv2D(filters=256, kernel_size=3, padding='same', activation='relu',
               kernel_initializer='he_normal', name='decoder_conv2d_1', use_bias=False)(x)
    x = BatchNormalization(name=f'bn_decoder_1')(x)
    x = Activation('relu', name='activation_decoder_1')(x)

    x = Conv2D(filters=256, kernel_size=3, padding='same', activation='relu',
               kernel_initializer='he_normal', name='decoder_conv2d_2', use_bias=False)(x)
    x = BatchNormalization(name=f'bn_decoder_2')(x)
    x = Activation('relu', name='activation_decoder_2')(x)
    # x = Upsample(x, [img_height, img_width])
    x = Conv2DTranspose(filters=256, kernel_size=(2, 2), 
                        kernel_initializer='he_normal', dilation_rate=img_height // 4 * 3)(x)

    x = Conv2D(nclasses, (1, 1), name='output_layer')(x)
    x = Activation('sigmoid')(x) 
    '''
    x = Activation('softmax')(x) 
    tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    Args:
        from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
        we assume that `y_pred` encodes a probability distribution.
    '''     
    model = Model(inputs=base_model.input, outputs=x, name='DeepLabV3_Plus')
    print(f'*** Output_Shape => {model.output_shape} ***')
    return model
model = DeepLabV3Plus(nclasses=1)
*** Building DeepLabv3Plus Network ***
*** Output_Shape => (None, 512, 512, 1) ***
def dice_coef(y_true, y_pred, smooth=1):
    intersection = K.sum(y_true * y_pred, axis=[1,2])
    union = K.sum(y_true, axis=[1,2]) + K.sum(y_pred, axis=[1,2])
    return K.mean( (2. * intersection + smooth) / (union + smooth), axis=0)

def dice_coef_loss(y_true, y_pred):
	1 - dice_coef(y_true, y_pred, smooth=1)

def bce_logdice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) - K.log(1. - dice_loss(y_true, y_pred))

model.compile(optimizer=Adam(lr=1e-4), loss=['binary_crossentropy', bce_logdice_loss], metrics=['accuracy'])

model training

EPOCHS = 5
BS = 4

model_path = '/content/drive/MyDrive/models/deeplab_v3.h5'

## callback policy
# Save training log
csvlogger = CSVLogger('/content/drive/MyDrive/training_deeplab_v3.csv', separator=',', append=True)
# Learning rate attenuation strategy
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1) # Learning rate attenuation strategy
# Breakpoint training is conducive to restoring and saving the model
checkpoint = ModelCheckpoint(model_path, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
# early stopping strategy
early_stopping = EarlyStopping(monitor='val_accuracy', patience=4, verbose=1, mode='auto')


train_numb = len(train_set)
valid_numb = len(val_set)
print("the number of train data is", train_numb)
print("the number of val data is", valid_numb)
H = model.fit(
    generateData(BS, train_set),
    steps_per_epoch=train_numb // BS,
    epochs=EPOCHS, verbose=1,
    validation_data=generateData(BS, val_set),
    validation_steps=valid_numb // BS,
    callbacks=[checkpoint, early_stopping, reduce_lr, csvlogger],
    max_queue_size=BS
    )
WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen.
the number of train data is 8320
the number of val data is 2080
Epoch 1/5
  75/2080 [>.............................] - ETA: 48:22 - loss: 0.6673 - accuracy: 0.7521

Thinking summary

  1. Model weight saving: generally, the small model will save the structure and weight of the model, while the large model only saves the weight of the model. Here, because there is a lambda layer in the upper sampling, the model structure cannot be saved. You can save the weight layer, define the model structure, and then load the weight;
  2. Training time: the training time of Deeplab-V3 + is so long that it is about 10 times longer than that of unet training. We can only guess that the up sampling and hole convolution are time-consuming??? Maybe you need to do a verification. Friends can tell me what they know;

Keywords: Python AI neural networks TensorFlow Deep Learning

Added by phorman on Mon, 31 Jan 2022 09:09:55 +0200