VGG
vgg is a classical convolutional neural network, which is mainly composed of convolution layer, normalization, activation function, pooling layer and full connection layer. Among them, the convolution layer > normalization > activation function is the main body, and the pooling layer mainly performs scale transformation on the data, and the full connection layer outputs the final results for classification.
import torch import torch.nn as nn import numpy as np import cv2 from PIL import Image import torchvision.transforms as transforms cfg = { 'VGG11': [64, 'M', 128, 'M', 256,'M', 512, 'M', 512,'M'], 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } class VGG(nn.Module): def __init__(self, vgg_name): super(VGG, self).__init__() self.features = self._make_layers(cfg[vgg_name]) self.fc1 = nn.Linear(512 * 8 * 8, 512 * 4) self.bn_d1 = self._linear_layers() self.fc2 = nn.Linear(512 * 4, 128) self.bn_d2 = self._linear_layers() self.fc3 = nn.Linear(128, 2) @staticmethod def _linear_layers(): layers = list([]) layers.append(torch.nn.Dropout(0.5)) layers.append(torch.nn.ReLU()) return nn.Sequential(*layers) @staticmethod def _make_layers(structure): layers = [] in_channels = 3 for out_channels in structure: if out_channels == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: layers += [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True)] in_channels = out_channels return nn.Sequential(*layers) def forward(self, x): out = self.features(x) out = out.view(out.size(0), -1) out = self.fc1(out) out = self.bn_d1(out) out = self.fc2(out) out = self.bn_d2(out) out = self.fc3(out) return out
train
Here through opencv read image training, PIL read image and opencv read image because the algorithm is different, the image pixel value will be very different, so when using the model, which library is used to read the image with the library method.
import torch import cv2 import torch.nn as nn import torch.utils.data from PIL import Image import torchvision import torchvision.transforms as transforms from model.VggNet import VGG # Super parameter EPOCH = 30 BATCH_SIZE = 10 LR = 0.001 IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp') def default_loader(path): with open(path, 'rb') as f: img = Image.fromarray(cv2.imread(f.name)) return img def load_train_data(): path = "../medias/fire_pictures/val" # Route train_set = torchvision.datasets.ImageFolder(path, transform=transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor()]), loader=default_loader ) train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) return train_loader def load_test_data(): path = "../medias/fire_pictures/test" test_set = torchvision.datasets.ImageFolder(path, transform=transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor()]), loader=default_loader ) test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True) return test_loader def train(filename, model): train_loader = load_train_data() with torch.no_grad(): model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=LR) loss_func = nn.CrossEntropyLoss() for epoch in range(EPOCH): for step, (b_x, b_y) in enumerate(train_loader): if torch.cuda.is_available(): with torch.no_grad(): b_x = b_x.cuda() b_y = b_y.cuda() output = model(b_x) loss = loss_func(output, b_y) optimizer.zero_grad() loss.backward() optimizer.step() if step % 50 == 0: print('Epoch: ', epoch) cnn = model .cpu() torch.save(cnn, '../network/' + filename) def test(filename): cnn = torch.load('./network/' + filename).cuda() test_loader = load_test_data() data_iter = iter(test_loader) images, labels = data_iter.__next__() outputs = cnn(images.cuda()) predicted = torch.softmax(outputs, 1) p = torch.max(predicted, 1) print(p[1].cpu().numpy(), labels.cpu().numpy()) j = 0 for i in range(len(p[1].cpu().numpy())): a = int(p[1].cpu().numpy()[i]) - int(labels.cpu().numpy()[i]) if a == 0: j += 1 print(j / 100) if __name__ == '__main__': vgg = VGG("vgg19") train("vgg19_fire_256.pkl", cnn)