The fourth assignment: CNN actual combat

Use VGG model to fight cat and dog

import numpy as np
import matplotlib.pyplot as plt
import os
import shutil,os
import torch
import torch.nn as nn
import torchvision
from torchvision import models,transforms,datasets
import time
import json


# Determine whether there is a GPU device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

Download data

! wget http://fenggao-image.stor.sinaapp.com/dogscats.zip
! unzip dogscats.zip

Download the test set from the official website of the competition

! wget https://static.leiphone.com/cat_dog.rar
! unrar x cat_dog.rar
shutil.move("./cat_dog/test","./dogscats")

data processing

datasets is a package in torchvision, which can be used to load image data. It can read data from the hard disk in the form of multi thread and transmit it to GPU in the form of mini batch in network training. When using CNN to process images, preprocessing is required. The pictures will be sorted into 224 × 224 × 3 224\times 224 \times 3 two hundred and twenty-four × two hundred and twenty-four × 3, and will be normalized at the same time.

Torchvision supports some complex preprocessing / transformation of input data (normalization, clipping, flipping, jittering, etc.). For details, please refer to the official documentation of torchvision.tranforms.

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

vgg_format = transforms.Compose([
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])

data_dir = './dogscats'

dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), vgg_format)
         for x in ['train', 'valid','test']}

dset_sizes = {x: len(dsets[x]) for x in ['train', 'valid','test']}
dset_classes = dsets['train'].classes

# You can view some properties of dsets through the following code

print(dsets['train'].classes)
print(dsets['train'].class_to_idx)
print(dsets['train'].imgs[:5])
print('dset_sizes: ', dset_sizes)

loader_train = torch.utils.data.DataLoader(dsets['train'], batch_size=64, shuffle=True, num_workers=6)
loader_valid = torch.utils.data.DataLoader(dsets['valid'], batch_size=5, shuffle=False, num_workers=6)
loader_test = torch.utils.data.DataLoader(dsets['test'], batch_size=5, shuffle=False, num_workers=6)

'''
valid There are a total of 2000 figures, each batch There are 5, so the following traversal will output a total of 400
 At the same time, put the first one batch Save to inputs_try, labels_try，View separately
'''
count = 1
for data in loader_valid:
    print(count, end='\n')
    if count == 1:
        inputs_try,labels_try = data
    count +=1

print(labels_try)
print(inputs_try.shape)

# Display picture applet

def imshow(inp, title=None):
#   Imshow for Tensor.
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = np.clip(std * inp + mean, 0,1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

# Show labels_ Five pictures of try, that is, the five pictures of the first batch in valid
out = torchvision.utils.make_grid(inputs_try)
imshow(out, title=[dset_classes[x] for x in labels_try])

Create VGG Model

torchvision integrates many general CNN models pre trained on ImageNet (1.2 million training data), which can be downloaded and used directly.

In this course, we directly use the pre trained VGG model. At the same time, in order to show the prediction results of VGG model on this data, the JSON files of 1000 classes of Imagenet are also downloaded.

In this part of the code, the five input pictures are predicted by VGG model, and the results are processed by softmax. Then the recognition results are displayed. It can be seen that the recognition result is very accurate.

!wget https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json

model_vgg = models.vgg16(pretrained=True)

with open('./imagenet_class_index.json') as f:
    class_dict = json.load(f)
dic_imagenet = [class_dict[str(i)][1] for i in range(len(class_dict))]

inputs_try , labels_try = inputs_try.to(device), labels_try.to(device)
model_vgg = model_vgg.to(device)

outputs_try = model_vgg(inputs_try)

print(outputs_try)
print(outputs_try.shape)

'''
You can see that the result is 5 rows and 1000 columns of data, and each column represents the result of each target recognition.
But I can also observe that the results are very wonderful. There are negative numbers and positive numbers,
In order to VGG The output of the network is transformed into the prediction probability of each class, and we input the results into Softmax function
'''
m_softm = nn.Softmax(dim=1)
probs = m_softm(outputs_try)
vals_try,pred_try = torch.max(probs,dim=1)

print( 'prob sum: ', torch.sum(probs,1))
print( 'vals_try: ', vals_try)
print( 'pred_try: ', pred_try)

print([dic_imagenet[i] for i in pred_try.data])
imshow(torchvision.utils.make_grid(inputs_try.data.cpu()), 
       title=[dset_classes[x] for x in labels_try.data.cpu()])

Modify the last layer and freeze the parameters of the previous layer

The VGG model is shown in the figure below. Note that the network consists of three elements:

Convolution layer (CONV) is a pattern that finds parts in an image
Full connection layer (FC) is to establish the association of features globally
Pool is the invariance to reduce the dimension of the image to improve the feature

Our goal is to use the pre trained model. Therefore, we need to replace the last nn.Linear layer with class 2 from class 1000. In order to freeze the parameters of the previous layer during training, you need to set required_grad=False. In this way, when back propagating the training gradient, the weight of the previous layer will not be updated automatically. During training, only the parameters of the last layer will be updated.

print(model_vgg)

model_vgg_new = model_vgg;

for param in model_vgg_new.parameters():
    param.requires_grad = False
model_vgg_new.classifier._modules['6'] = nn.Linear(4096, 2)
model_vgg_new.classifier._modules['7'] = torch.nn.LogSoftmax(dim = 1)

model_vgg_new = model_vgg_new.to(device)

print(model_vgg_new.classifier)

Train and test the full connection layer

It includes three steps: Step 1, create loss function and optimizer; Step 2, training model; Step 3, test the model.

'''
Step 1: create loss function and optimizer

loss function  NLLLoss() The input is a logarithmic probability vector and a target label. 
It doesn't calculate the logarithmic probability for us. The last layer is log_softmax()Network. 
'''
criterion = nn.NLLLoss()

# Learning rate
lr = 0.001

# Random gradient descent
optimizer_vgg = torch.optim.SGD(model_vgg_new.classifier[6].parameters(),lr = lr)

'''
Step 2: Training Model
'''

def train_model(model,dataloader,size,epochs=1,optimizer=None):
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        running_corrects = 0
        count = 0
        for inputs,classes in dataloader:
            inputs = inputs.to(device)
            classes = classes.to(device)
            outputs = model(inputs)
            loss = criterion(outputs,classes)           
            optimizer = optimizer
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            _,preds = torch.max(outputs.data,1)
            # statistics
            running_loss += loss.data.item()
            running_corrects += torch.sum(preds == classes.data)
            count += len(inputs)
            print('Training: No. ', count, ' process ... total: ', size)
        epoch_loss = running_loss / size
        epoch_acc = running_corrects.data.item() / size
        print('Loss: {:.4f} Acc: {:.4f}'.format(
                     epoch_loss, epoch_acc))
        
        
# model training
train_model(model_vgg_new,loader_train,size=dset_sizes['train'], epochs=1, 
            optimizer=optimizer_vgg)

def test_model(model,dataloader,size):
    model.eval()
    predictions = np.zeros(size)
    all_classes = np.zeros(size)
    all_proba = np.zeros((size,2))
    i = 0
    running_loss = 0.0
    running_corrects = 0
    for inputs,classes in dataloader:
        inputs = inputs.to(device)
        classes = classes.to(device)
        outputs = model(inputs)
        loss = criterion(outputs,classes)           
        _,preds = torch.max(outputs.data,1)
        # statistics
        running_loss += loss.data.item()
        running_corrects += torch.sum(preds == classes.data)
        predictions[i:i+len(classes)] = preds.to('cpu').numpy()
        all_classes[i:i+len(classes)] = classes.to('cpu').numpy()
        all_proba[i:i+len(classes),:] = outputs.data.to('cpu').numpy()
        i += len(classes)
        print('Testing: No. ', i, ' process ... total: ', size)        
    epoch_loss = running_loss / size
    epoch_acc = running_corrects.data.item() / size
    print('Loss: {:.4f} Acc: {:.4f}'.format(
                     epoch_loss, epoch_acc))
    return predictions, all_proba, all_classes
  
predictions, all_proba, all_classes = test_model(model_vgg_new,loader_valid,size=dset_sizes['valid'])

Prediction of test data set

predictions, all_proba, all_classes = test_model(model_vgg_new,loader_test,size=dset_sizes['test'])

The results are exported in cats_ vs_ In dogs.csv

import csv
with open('./dogscats/cats_vs_dogs.csv','w',newline="")as f:
  writer = csv.writer(f)
  for index,cls in enumerate(predictions):
    path = datasets.ImageFolder(os.path.join(data_dir,'test'),vgg_format).imgs[index][0]
    l = path.split("/")
    img_name = l[-1]
    order = int(img_name.split(".")[0])
    writer.writerow([order,int(predictions[index])])

Submit results

Submit the result after sorting the csv file

Keywords: Pytorch Deep Learning Software Engineering CNN

Added by TonyIOD on Sat, 23 Oct 2021 10:17:43 +0300

Programming VIP