A renaissance CNN network VGG is implemented on the cirfa10 data set

A renaissance CNN network VGG is implemented on the cirfa10 data set

🙋‍♂️ Classmate Zhang, zhangruiyuan@zju.edu.cn Please contact me if you have any questions~

0. Background introduction

I'm doomed not to sleep well tonight.

I want to build a CNN network by myself to deepen my knowledge. When I studied in depth some time ago, I also heard other Up talk about VGG. This network is relatively simple, so I want to implement it with pytoch.

The data set used in this paper is cirfar-10, which contains 10 items. Each item has 6000 color, 32 x 32 size pictures, of which 50000 are used for training and 10000 for testing.

The structure of this paper is as follows: the reading, visualization and cutting of the data set cirfar-10, and the implementation of model verification will be in the code under this chapter. The network model implemented in Chapters 1, 2 and 3 needs to run together with the basic code of this chapter. The basic code is not the focus of this article, so I won't repeat it here.

import torch
import torchvision
import torchvision.transforms as transforms


# 1, Download data to local
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./datasets/5f6b1577787e9d5bb70800a4-momodel', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./datasets/5f6b1577787e9d5bb70800a4-momodel', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


# 2, Image display
import matplotlib.pyplot as plt
import numpy as np

# Functions that display images
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

# Get random data
dataiter = iter(trainloader)
images, labels = dataiter.next()

# Display image
imshow(torchvision.utils.make_grid(images))
# Display image labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))


# 3, Demonstrate the network structure (please replace here with the code in the first, second and third chapters of the blog)
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # The input shape is 3 * 32 * 32
        self.c1 = nn.Conv2d(3, 6, 5, padding=1)  # 6*32*32
        self.b1 = nn.BatchNorm2d(6)              # 6*32*32
        self.a1 = nn.ReLU()                      # 6*32*32
        self.p1 = nn.MaxPool2d(2,2,padding=1)    # 6*16*16
        self.d1 = nn.Dropout(p=0.2)              # 6*16*16
        
        self.flatten = nn.Flatten()              # 1536
        self.f1= nn.Linear(1536,128)             # 128
        self.a2= nn.ReLU()                       # 128
        self.f2= nn.Linear(128,10)               # 10
        self.a3= nn.Softmax()                    # 10
        
        

    def forward(self, x):
        
        x = self.c1(x)
        x = self.b1(x)
        x = self.a1(x)
        x = self.p1(x)
        x = self.d1(x)
        
        x = self.flatten(x)
        x = self.f1(x)
        x = self.a2(x)
        x = self.f2(x)
        x = self.a3(x)
    
        return x
    
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    

net = Net()
print(net)

# 4, Model training
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print(device)
net.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01,momentum=0.8)


print('begin')
for epoch in range(5):  # Multi batch cycle

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # Get input
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Gradient set to 0
        optimizer.zero_grad()

        # Forward propagation, back propagation, optimization
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print status information
        running_loss += loss.item()
        if i % 500 == 499:    # Print every 3 batches. Please increase this parameter when training on gpu
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2))
            running_loss = 0.0
            

print('Finished Training')


# 5, Model accuracy calculation
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

1, Use torch only nn. XXX to build the network structure

import torch.nn as nn
import torch.nn.functional as F

# VGG implementation
# When you print the model, you will show more details of the model
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # The input shape is 3 * 32 * 32
        self.c1 = nn.Conv2d(3, 64, 3, padding=1)  
        self.b1 = nn.BatchNorm2d(64)  
        self.a1 = nn.ReLU()  
        self.c2 = nn.Conv2d(64, 64, 3, padding=1)
        self.b2 = nn.BatchNorm2d(64)  
        self.a2 = nn.ReLU()  
        self.p1 = nn.MaxPool2d(2)

        self.c3 = nn.Conv2d(64, 128, 3, padding=1)
        self.b3 = nn.BatchNorm2d(128)
        self.a3 = nn.ReLU()  
        self.c4 = nn.Conv2d(128, 128, 3, padding=1)
        self.b4 = nn.BatchNorm2d(128) 
        self.a4 = nn.ReLU()  
        self.p2 = nn.MaxPool2d(2)

        self.c5 = nn.Conv2d(128, 256, 3, padding=1)
        self.b5 = nn.BatchNorm2d(256)  
        self.a5 = nn.ReLU()  
        self.c6 = nn.Conv2d(256, 256, 3, padding=1)
        self.b6 = nn.BatchNorm2d(256)  
        self.a6 = nn.ReLU()  
        self.c7 = nn.Conv2d(256, 256, 3, padding=1)
        self.b7 = nn.BatchNorm2d(256)
        self.a7 = nn.ReLU()
        self.p3 = nn.MaxPool2d(2)

        self.c8 = nn.Conv2d(256, 512, 3, padding=1)
        self.b8 = nn.BatchNorm2d(512)  
        self.a8 = nn.ReLU()  
        self.c9 = nn.Conv2d(512, 512, 3, padding=1)
        self.b9 = nn.BatchNorm2d(512)  
        self.a9 = nn.ReLU()  
        self.c10 = nn.Conv2d(512, 512, 3, padding=1)
        self.b10 = nn.BatchNorm2d(512)
        self.a10 = nn.ReLU()
        self.p4 = nn.MaxPool2d(2)

        self.c11 = nn.Conv2d(512, 512, 3, padding=1)
        self.b11 = nn.BatchNorm2d(512)  
        self.a11 = nn.ReLU()  
        self.c12 = nn.Conv2d(512, 512, 3, padding=1)
        self.b12 = nn.BatchNorm2d(512)  
        self.a12 = nn.ReLU()  
        self.c13 = nn.Conv2d(512, 512, 3, padding=1)
        self.b13 = nn.BatchNorm2d(512)
        self.a13 = nn.ReLU() 
        self.p5 = nn.MaxPool2d(2)

        self.flatten = nn.Flatten() 
        
        self.f1 = nn.Linear(512,512)
        self.a14 = nn.ReLU() 

        self.f2 = nn.Linear(512,512)
        self.a15 = nn.ReLU()  

        self.f3 = nn.Linear(512,10)
        self.a16= nn.Softmax()  
        
        

    def forward(self, x):
        x = self.c1(x)
        x = self.b1(x)
        x = self.a1(x)
        x = self.c2(x)
        x = self.b2(x)
        x = self.a2(x)
        x = self.p1(x)

        x = self.c3(x)
        x = self.b3(x)
        x = self.a3(x)
        x = self.c4(x)
        x = self.b4(x)
        x = self.a4(x)
        x = self.p2(x)

        x = self.c5(x)
        x = self.b5(x)
        x = self.a5(x)
        x = self.c6(x)
        x = self.b6(x)
        x = self.a6(x)
        x = self.c7(x)
        x = self.b7(x)
        x = self.a7(x)
        x = self.p3(x)

        x = self.c8(x)
        x = self.b8(x)
        x = self.a8(x)
        x = self.c9(x)
        x = self.b9(x)
        x = self.a9(x)
        x = self.c10(x)
        x = self.b10(x)
        x = self.a10(x)
        x = self.p4(x)

        x = self.c11(x)
        x = self.b11(x)
        x = self.a11(x)
        x = self.c12(x)
        x = self.b12(x)
        x = self.a12(x)
        x = self.c13(x)
        x = self.b13(x)
        x = self.a13(x)
        x = self.p5(x)

        x = self.flatten(x)
        
        x = self.f1(x)
        x = self.a14(x)

        x = self.f2(x)
        x = self.a15(x)

        x = self.f3(x)
        x = self.a16(x)
        return x
    
net = Net()
print(net)

2, Use the method officially recommended by pytoch

Official recommendations are as follows:

  1. In init, it is recommended to initialize trainable model components, such as Conv2d, BatchNorm2d, Linear, etc., that is, NN Class represented by XXX.
  2. For untrained components, such as activation function, pooling operation, etc., NN functional. The function represented by XXX is not recommended to be initialized in init, but only used in forward.
import torch.nn as nn
import torch.nn.functional as F

# VGG implementation
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # The input shape is 3 * 32 * 32
        self.c1 = nn.Conv2d(3, 64, 3, padding=1)  # Convolution layer 1
        self.b1 = nn.BatchNorm2d(64)  # BN layer 1
        self.c2 = nn.Conv2d(64, 64, 3, padding=1)
        self.b2 = nn.BatchNorm2d(64)  # BN layer 2

        self.c3 = nn.Conv2d(64, 128, 3, padding=1)
        self.b3 = nn.BatchNorm2d(128)  # BN layer 3
        self.c4 = nn.Conv2d(128, 128, 3, padding=1)
        self.b4 = nn.BatchNorm2d(128)  # BN layer 1

        self.c5 = nn.Conv2d(128, 256, 3, padding=1)
        self.b5 = nn.BatchNorm2d(256)  # BN layer 5
        self.c6 = nn.Conv2d(256, 256, 3, padding=1)
        self.b6 = nn.BatchNorm2d(256)  # BN layer 6
        self.c7 = nn.Conv2d(256, 256, 3, padding=1)
        self.b7 = nn.BatchNorm2d(256)

        self.c8 = nn.Conv2d(256, 512, 3, padding=1)
        self.b8 = nn.BatchNorm2d(512)  # BN layer 8
        self.c9 = nn.Conv2d(512, 512, 3, padding=1)
        self.b9 = nn.BatchNorm2d(512)  # BN layer 9
        self.c10 = nn.Conv2d(512, 512, 3, padding=1)
        self.b10 = nn.BatchNorm2d(512)

        self.c11 = nn.Conv2d(512, 512, 3, padding=1)
        self.b11 = nn.BatchNorm2d(512)  # BN layer 11
        self.c12 = nn.Conv2d(512, 512, 3, padding=1)
        self.b12 = nn.BatchNorm2d(512)  # BN layer 12
        self.c13 = nn.Conv2d(512, 512, 3, padding=1)
        self.b13 = nn.BatchNorm2d(512)

        self.flatten = nn.Flatten() 
        self.f1 = nn.Linear(512,512)
        self.f2 = nn.Linear(512,512)
        self.f3 = nn.Linear(512,10)
        

    def forward(self, x):
        x = self.c1(x)
        x = self.b1(x)
        x = F.relu(x)
        x = self.c2(x)
        x = self.b2(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)

        x = self.c3(x)
        x = self.b3(x)
        x = F.relu(x)
        x = self.c4(x)
        x = self.b4(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)
        
        x = self.c5(x)
        x = self.b5(x)
        x = F.relu(x)
        x = self.c6(x)
        x = self.b6(x)
        x = F.relu(x)
        x = self.c7(x)
        x = self.b7(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)

        x = self.c8(x)
        x = self.b8(x)
        x = F.relu(x)
        x = self.c9(x)
        x = self.b9(x)
        x = F.relu(x)
        x = self.c10(x)
        x = self.b10(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)

        x = self.c11(x)
        x = self.b11(x)
        x = F.relu(x)
        x = self.c12(x)
        x = self.b12(x)
        x = F.relu(x)
        x = self.c13(x)
        x = self.b13(x)
        x = F.relu(x)
        x = F.max_pool2d(x,2)

        x = self.flatten(x)
        
        x = self.f1(x)
        x = F.relu(x)
        
        x = self.f2(x)
        x = F.relu(x)
        
        x = self.f3(x)
        x = F.softmax(x)
        
        return x
    
net = Net()
print(net)

3, Use NN Sequential to simplify the code (recommended)

To be honest, I think the first two ways are always easy to understand for novices. Next, after understanding the network structure, we can use the code in this section to help us optimize the code structure. I feel subjectively that if we only talk about the current structure in class, it may be more brief, but it is not conducive to students' understanding.

import torch.nn as nn
import torch.nn.functional as F

class VGG(nn.Module):
    """
    VGG builder
    """
    def __init__(self, arch: object, num_classes=1000) -> object:
        super(VGG, self).__init__()
        self.in_channels = 3
        self.conv3_64 = self.__make_layer(64, arch[0])
        self.conv3_128 = self.__make_layer(128, arch[1])
        self.conv3_256 = self.__make_layer(256, arch[2])
        self.conv3_512a = self.__make_layer(512, arch[3])
        self.conv3_512b = self.__make_layer(512, arch[4])
        self.fc1 = nn.Linear(512, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, num_classes)

    def __make_layer(self, channels, num):
        layers = []
        for i in range(num):
            layers.append(nn.Conv2d(self.in_channels, channels, 3, stride=1, padding=1, bias=False))  # same padding
            layers.append(nn.BatchNorm2d(channels))
            layers.append(nn.ReLU())
            self.in_channels = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv3_64(x)
        out = F.max_pool2d(out, 2)
        out = self.conv3_128(out)
        out = F.max_pool2d(out, 2)
        out = self.conv3_256(out)
        out = F.max_pool2d(out, 2)
        out = self.conv3_512a(out)
        out = F.max_pool2d(out, 2)
        out = self.conv3_512b(out)
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.fc2(out)
        out = self.bn2(out)
        out = F.relu(out)
        return F.softmax(self.fc3(out))

def VGG_11():
    return VGG([1, 1, 2, 2, 2], num_classes=10)

def VGG_13():
    return VGG([1, 1, 2, 2, 2], num_classes=10)

def VGG_16():
    return VGG([2, 2, 3, 3, 3], num_classes=10)

def VGG_19():
    return VGG([2, 2, 4, 4, 4], num_classes=10)

net = VGG_16()
print(net)

Conclusion, the last story (can be crossed out)

I met the simplest bug in the implementation. I was in NN The function forward inherited from module uses x to save my intermediate result. When the result comes to the last step, I don't know which muscle I was wrong at that time. I want to use y for the return value. Originally, there was no problem. As a result, we changed the network structure and confused these things. The final result is that there is only one full connection and one Relu activation in y. in the process of training, the loss rate has not decreased, and the accuracy of the test set is ridiculously poor. I once suspected that it was a problem that I didn't understand the pytorch document, which made me suspicious.

Keywords: Pytorch CNN VGG

Added by jswinkelman on Wed, 26 Jan 2022 06:04:49 +0200