The study of Python -- the construction of VGG

VGG

vgg is a classical convolutional neural network, which is mainly composed of convolution layer, normalization, activation function, pooling layer and full connection layer. Among them, the convolution layer > normalization > activation function is the main body, and the pooling layer mainly performs scale transformation on the data, and the full connection layer outputs the final results for classification.

import torch
import torch.nn as nn
import numpy as np
import cv2
from PIL import Image
import torchvision.transforms as transforms

cfg = {
    'VGG11': [64, 'M', 128, 'M', 256,'M', 512, 'M', 512,'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.fc1 = nn.Linear(512 * 8 * 8, 512 * 4)
        self.bn_d1 = self._linear_layers()
        self.fc2 = nn.Linear(512 * 4, 128)
        self.bn_d2 = self._linear_layers()
        self.fc3 = nn.Linear(128, 2)

    @staticmethod
    def _linear_layers():
        layers = list([])
        layers.append(torch.nn.Dropout(0.5))
        layers.append(torch.nn.ReLU())
        return nn.Sequential(*layers)

    @staticmethod
    def _make_layers(structure):
        layers = []
        in_channels = 3
        for out_channels in structure:
            if out_channels == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, out_channels, kernel_size=3,
                                     padding=1, bias=False),
                           nn.BatchNorm2d(out_channels),
                           nn.ReLU(inplace=True)]
                in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.bn_d1(out)
        out = self.fc2(out)
        out = self.bn_d2(out)
        out = self.fc3(out)
        return out

train

Here through opencv read image training, PIL read image and opencv read image because the algorithm is different, the image pixel value will be very different, so when using the model, which library is used to read the image with the library method.

import torch
import cv2
import torch.nn as nn
import torch.utils.data
from PIL import Image
import torchvision
import torchvision.transforms as transforms
from model.VggNet import VGG


# Super parameter
EPOCH = 30
BATCH_SIZE = 10
LR = 0.001

IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')


def default_loader(path):
    with open(path, 'rb') as f:
        img = Image.fromarray(cv2.imread(f.name))
        return img


def load_train_data():
    path = "../medias/fire_pictures/val"  # Route
    train_set = torchvision.datasets.ImageFolder(path,
                                                 transform=transforms.Compose([
                                                     transforms.Resize((256, 256)),
                                                     transforms.ToTensor()]),
                                                 loader=default_loader
                                                 )

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
    return train_loader


def load_test_data():
    path = "../medias/fire_pictures/test"
    test_set = torchvision.datasets.ImageFolder(path,
                                                transform=transforms.Compose([
                                                    transforms.Resize((256, 256)),
                                                    transforms.ToTensor()]),
                                                loader=default_loader
                                                )
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)
    return test_loader


def train(filename, model):
    train_loader = load_train_data()
    with torch.no_grad():
        model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    loss_func = nn.CrossEntropyLoss()
    for epoch in range(EPOCH):
        for step, (b_x, b_y) in enumerate(train_loader):
            if torch.cuda.is_available():
                with torch.no_grad():
                    b_x = b_x.cuda()
                    b_y = b_y.cuda()
            output = model(b_x)
            loss = loss_func(output, b_y)
            optimizer.zero_grad() 
            loss.backward()  
            optimizer.step()  

            if step % 50 == 0:
                print('Epoch: ', epoch)
          
    cnn = model .cpu()
    torch.save(cnn, '../network/' + filename)


def test(filename):
    cnn = torch.load('./network/' + filename).cuda()
    test_loader = load_test_data()
    data_iter = iter(test_loader)
    images, labels = data_iter.__next__()
    outputs = cnn(images.cuda())
    predicted = torch.softmax(outputs, 1)
    p = torch.max(predicted, 1)
    print(p[1].cpu().numpy(), labels.cpu().numpy())
    j = 0
    for i in range(len(p[1].cpu().numpy())):
        a = int(p[1].cpu().numpy()[i]) - int(labels.cpu().numpy()[i])
        if a == 0:
            j += 1
    print(j / 100)


if __name__ == '__main__':
    vgg = VGG("vgg19")
    train("vgg19_fire_256.pkl", cnn)


Published 27 original articles, won praise 6, visited 2521
Private letter follow

Keywords: network OpenCV

Added by robs99 on Wed, 04 Mar 2020 16:03:56 +0200