# LeNet-5 ```import tensorflow as tf
help(tf.losses.categorical_crossentropy)
//View the list of default parameters and the introduction to the usage of the category? Crossentropy function
//Where the parameter is from Logits = false by default, and the network prediction value y ﹤ PRED indicates that it must be the output value passing through the Softmax function.
//When from Logits is set to True, the network prediction value y ﹤ PRED indicates that it must be the variable z that has not passed the Softmax function.
from_logits=True Flag location will be softmax The activation function is implemented in the loss function, so there is no need to add it manually softmax Loss function improves the stability of numerical calculation.

Help on function categorical_crossentropy in module tensorflow.python.keras.losses:
categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0)
Computes the categorical crossentropy loss.
Args:
y_true: tensor of true targets.
y_pred: tensor of predicted targets.
from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
we assume that `y_pred` encodes a probability distribution.
``` # VGG # CIFAR10 vs. VGG13  ```def preprocess(x, y):
# Standardization: mapping data between - 1 and 1
x = 2*tf.cast(x, dtype=tf.float32) / 255.-1
y = tf.cast(y, dtype=tf.int32)
return x,y

# Delete a dimension of y, [b, 1] = > [b]
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
# Print training and test set shapes
print(x.shape, y.shape, x_test.shape, y_test.shape)
# Building training set objects
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)
# Building test set objects
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(128)
# Sample a Batch from the training set and observe
sample = next(iter(train_db))
print('sample:', sample.shape, sample.shape, tf.reduce_min(sample), tf.reduce_max(sample))

# Create a list with multiple layers first
conv_layers = [
# Conv conv pooling unit 1
# 64 3 x 3 convolution cores of the same size of input and output
# Half height and half width

# Conv conv pooling unit 2, the output channel is increased to 128, and the height and width are halved
# Half height and half width

# Conv conv pooling unit 3, the output channel is increased to 256, and the height and width are halved
# Half height and half width

# Conv conv pooling unit 4, the output channel is increased to 512, and the height and width are halved
# Half height and half width

# Conv conv pooling unit 5, the output channel is increased to 512, and the height width is halved
# Half height and half width
]

# Using the layer list created earlier to build a network container
conv_net = Sequential(conv_layers)

# Create 3-layer full connection layer subnet
fc_net = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(100, activation=None),
])

# Build 2 subnetworks and print network parameter information
conv_net.build(input_shape=[4, 32, 32, 3])
fc_net.build(input_shape=[4, 512])
conv_net.summary()
fc_net.summary()

# List merging, merging parameters of 2 subnetworks
variables = conv_net.trainable_variables + fc_net.trainable_variables
# Auto update
```
```import  tensorflow as tf
from    tensorflow.keras import layers, optimizers, datasets, Sequential
import  os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tf.random.set_seed(2345)

# 5 units of conv + max pooling
conv_layers = [
# unit 1
# unit 2
# unit 3
# unit 4
# unit 5
]

def preprocess(x, y):
# Standardization: data mapping between [- 1,1]
x = 2*tf.cast(x, dtype=tf.float32) / 255.-1
y = tf.cast(y, dtype=tf.int32)
return x,y

y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
print(x.shape, y.shape, x_test.shape, y_test.shape)
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(64)
sample = next(iter(train_db))
print('sample:', sample.shape, sample.shape,
tf.reduce_min(sample), tf.reduce_max(sample))

def main():
# [b, 32, 32, 3] => [b, 1, 1, 512]
conv_net = Sequential(conv_layers)
fc_net = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(10, activation=None),
])
conv_net.build(input_shape=[None, 32, 32, 3])
fc_net.build(input_shape=[None, 512])
conv_net.summary()
fc_net.summary()

# [1, 2] + [3, 4] => [1, 2, 3, 4]
variables = conv_net.trainable_variables + fc_net.trainable_variables

for epoch in range(50):
for step, (x,y) in enumerate(train_db):
# [b, 32, 32, 3] => [b, 1, 1, 512]
out = conv_net(x)
# flatten, => [b, 512]
out = tf.reshape(out, [-1, 512])
# [b, 512] => [b, 10]
logits = fc_net(out)
# [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10)
# compute loss
loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
loss = tf.reduce_mean(loss)
if step %100 == 0:
print(epoch, step, 'loss:', float(loss))

total_num = 0
total_correct = 0
for x,y in test_db:
out = conv_net(x)
out = tf.reshape(out, [-1, 512])
logits = fc_net(out)
prob = tf.nn.softmax(logits, axis=1) #Convert output to probability
pred = tf.argmax(prob, axis=1) #Obtain the index value corresponding to the maximum probability value as the category number
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32) #Check whether the index value (category number) corresponding to the maximum probability value is consistent with the real label category number
correct = tf.reduce_sum(correct)
total_num += x.shape
total_correct += int(correct)
acc = total_correct / total_num
print(epoch, 'acc:', acc)

if __name__ == '__main__':
main()
```

# Deep residual network ResNet, DenseNet     ```import  os
import  tensorflow as tf
import  numpy as np
from    tensorflow import keras

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32)/255., x_test.astype(np.float32)/255.
# [b, 28, 28] => [b, 28, 28, 1]
x_train, x_test = np.expand_dims(x_train, axis=3), np.expand_dims(x_test, axis=3)
# one hot encode the labels. convert back to numpy as we cannot use a combination of numpy
# and tensors as input to keras
y_train_ohe = tf.one_hot(y_train, depth=10).numpy()
y_test_ohe = tf.one_hot(y_test, depth=10).numpy()
print(x_train.shape, y_train.shape) #(60000, 28, 28, 1) (60000,)
print(x_test.shape, y_test.shape) #(10000, 28, 28, 1) (10000,)

# 3x3 convolution
def conv3x3(channels, stride=1, kernel=(3, 3)):
return keras.layers.Conv2D(channels, kernel, strides=stride, padding='same', use_bias=False, kernel_initializer=tf.random_normal_initializer())

class ResnetBlock(keras.Model):
def __init__(self, channels, strides=1, residual_path=False):
super(ResnetBlock, self).__init__()
self.channels = channels
self.strides = strides
self.residual_path = residual_path
self.conv1 = conv3x3(channels, strides)
self.bn1 = keras.layers.BatchNormalization()
self.conv2 = conv3x3(channels)
self.bn2 = keras.layers.BatchNormalization()
if residual_path:
#In fact, the output of the last layer in the previous block is connected as a residual,
#Add the two elements to the output of the current layer in the current block
self.down_conv = conv3x3(channels, strides, kernel=(1, 1))
self.down_bn = tf.keras.layers.BatchNormalization()

def call(self, inputs, training=None):
residual = inputs
x = self.bn1(inputs, training=training)
x = tf.nn.relu(x)
x = self.conv1(x)
x = self.bn2(x, training=training)
x = tf.nn.relu(x)
x = self.conv2(x)
# this module can be added into self.
# however, module in for can not be added.
if self.residual_path:
residual = self.down_bn(inputs, training=training)
residual = tf.nn.relu(residual)
residual = self.down_conv(residual)
x = x + residual
return x

class ResNet(keras.Model):
def __init__(self, block_list, num_classes, initial_filters=16, **kwargs):
super(ResNet, self).__init__(**kwargs)
self.num_blocks = len(block_list)
self.block_list = block_list
self.in_channels = initial_filters
self.out_channels = initial_filters
self.conv_initial = conv3x3(self.out_channels)
self.blocks = keras.models.Sequential(name='dynamic-blocks')

# build all the blocks
#Traverse each block
for block_id in range(len(block_list)):
#Traverse the layer layer in each block
for layer_id in range(block_list[block_id]):
#Only when it is not the first block and the first layer of a block, that is, when the first layer of a new block is added, the following judgment will be performed
#The current program only performs the following judgment when the block ID is 1 and the layer ID is 0, or the block ID is 2 and the layer ID is 0, that is, as long as the first layer of a new block is added
if block_id != 0 and layer_id == 0:
#Because the number of out ﹣ channels * = 2 output channels is doubled at this time, set strings = 2 to halve the height and width of the output. At the same time, set reset ﹣ path = true,
#In fact, the output of the last layer in the previous block is connected as a residual,
#Add the two elements to the output of the current layer in the current block
block = ResnetBlock(self.out_channels, strides=2, residual_path=True)
#As long as it is any layer layer in the first block, or it is not the first layer in the first block, the following judgment will be performed
#The current program only performs the following judgment when the block ID is 0 and the layer ID is 0 or 1, or the block ID is 1 or 2 and the layer ID is 1
else:
#The current program will not execute the following judgment code
if self.in_channels != self.out_channels:
residual_path = True
#The current program only performs the following judgment when the block ID is 0 and the layer ID is 0 or 1, or the block ID is 1 or 2 and the layer ID is 1
else:
residual_path = False
#The parameter residual path of this code of the current program will only be False
block = ResnetBlock(self.out_channels, residual_path=residual_path)

self.in_channels = self.out_channels
self.out_channels *= 2 #As long as you add a new block, the number of output channels for the new block will double

self.final_bn = keras.layers.BatchNormalization()
self.avg_pool = keras.layers.GlobalAveragePooling2D()
self.fc = keras.layers.Dense(num_classes)

def call(self, inputs, training=None):
out = self.conv_initial(inputs)
out = self.blocks(out, training=training)
out = self.final_bn(out, training=training)
out = tf.nn.relu(out)
out = self.avg_pool(out)
out = self.fc(out)
return out

def main():
num_classes = 10 #Class number
batch_size = 32
epochs = 1
# build model and optimizer
#Build 6 block blocks, and each block block has 2 layer layers
model = ResNet([2, 2, 2], num_classes)
loss=keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.build(input_shape=(None, 28, 28, 1))
print("Number of variables in the model :", len(model.variables))
model.summary()
# train
model.fit(x_train, y_train_ohe, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test_ohe), verbose=1)
# evaluate on test set
#evaluate output loss and accuracy when evaluating and verifying
scores = model.evaluate(x_test, y_test_ohe, batch_size, verbose=1)
print("Final test loss and accuracy :", scores)

if __name__ == '__main__':
main()
```    ```import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import layers, Sequential

class BasicBlock(layers.Layer):
def __init__(self, filter_num, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = layers.Conv2D(filter_num, (3, 3), strides=stride, padding='same')
self.bn1 = layers.BatchNormalization()
self.relu = layers.Activation('relu')
self.conv2 = layers.Conv2D(filter_num, (3, 3), strides=1, padding='same')
self.bn2 = layers.BatchNormalization()

if stride != 1:
self.downsample = Sequential()
else:
self.downsample = lambda x:x

def call(self, inputs, training=None):
# [b, h, w, c]
out = self.conv1(inputs)
out = self.bn1(out,training=training)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out,training=training)
identity = self.downsample(inputs)
output = tf.nn.relu(output)
return output

class ResNet(keras.Model):
def __init__(self, layer_dims, num_classes=100): # [2, 2, 2, 2]
super(ResNet, self).__init__()
self.stem = Sequential([layers.Conv2D(64, (3, 3), strides=(1, 1)),
layers.BatchNormalization(),
layers.Activation('relu'),
])
self.layer1 = self.build_resblock(64,  layer_dims)
self.layer2 = self.build_resblock(128, layer_dims, stride=2)
self.layer3 = self.build_resblock(256, layer_dims, stride=2)
self.layer4 = self.build_resblock(512, layer_dims, stride=2)
# output: [b, 512, h, w],
self.avgpool = layers.GlobalAveragePooling2D()
self.fc = layers.Dense(num_classes)

def call(self, inputs, training=None):
x = self.stem(inputs,training=training)
x = self.layer1(x,training=training)
x = self.layer2(x,training=training)
x = self.layer3(x,training=training)
x = self.layer4(x,training=training)
# [b, c]
x = self.avgpool(x)
# [b, 100]
x = self.fc(x)
return x

def build_resblock(self, filter_num, blocks, stride=1):
res_blocks = Sequential()
# may down sample
for _ in range(1, blocks):
return res_blocks

def resnet18():
return ResNet([2, 2, 2, 2])

def resnet34():
return ResNet([3, 4, 6, 3])
```
```import  os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import  tensorflow as tf
from    tensorflow.keras import layers, optimizers, datasets, Sequential
from    resnet import resnet18
tf.random.set_seed(2345)

def preprocess(x, y):
# [-1~1]
x = tf.cast(x, dtype=tf.float32) / 255. - 0.5
y = tf.cast(y, dtype=tf.int32)
return x,y

y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
print(x.shape, y.shape, x_test.shape, y_test.shape)
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(512)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(512)
sample = next(iter(train_db))
print('sample:', sample.shape, sample.shape, tf.reduce_min(sample), tf.reduce_max(sample))

def main():
# [b, 32, 32, 3] => [b, 1, 1, 512]
model = resnet18()
model.build(input_shape=(None, 32, 32, 3))
model.summary()

for epoch in range(500):
for step, (x,y) in enumerate(train_db):
# [b, 32, 32, 3] => [b, 100]
logits = model(x,training=True)
# [b] => [b, 100]
y_onehot = tf.one_hot(y, depth=100)
# compute loss
loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
loss = tf.reduce_mean(loss)

if step %50 == 0:
print(epoch, step, 'loss:', float(loss))

total_num = 0
total_correct = 0
for x,y in test_db:
logits = model(x,training=False)
prob = tf.nn.softmax(logits, axis=1) #Convert output to probability
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape
total_correct += int(correct)
acc = total_correct / total_num
print(epoch, 'acc:', acc)

if __name__ == '__main__':
main()
```

# CIFAR10 and ResNet18   ```class BasicBlock(layers.Layer):
# Residual module class
def __init__(self, filter_num, stride=1):
super(BasicBlock, self).__init__()
# f(x) contains two ordinary convolution layers, creating convolution layer 1
self.conv1 = layers.Conv2D(filter_num, (3, 3), strides=stride, padding='same')
self.bn1 = layers.BatchNormalization()
self.relu = layers.Activation('relu')
# Create convolution layer 2
self.conv2 = layers.Conv2D(filter_num, (3, 3), strides=1, padding='same')
self.bn2 = layers.BatchNormalization()
# Insert identity layer
if stride != 1:
self.downsample = Sequential()
else:
# Otherwise, connect directly
self.downsample = lambda x:x

def call(self, inputs, training=None):
# Forward propagation function
out = self.conv1(inputs) # Through the first convolution layer
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out) # Through the second convolution layer
out = self.bn2(out)
# Input is converted by identity()
identity = self.downsample(inputs)
# f(x)+x operation
# By activating the function and returning
output = tf.nn.relu(output)
return output
```
```import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import layers, Sequential

class BasicBlock(layers.Layer):
# Residual module
def __init__(self, filter_num, stride=1):
super(BasicBlock, self).__init__()
# First convolution unit
self.conv1 = layers.Conv2D(filter_num, (3, 3), strides=stride, padding='same')
self.bn1 = layers.BatchNormalization()
self.relu = layers.Activation('relu')
# Second convolution unit
self.conv2 = layers.Conv2D(filter_num, (3, 3), strides=1, padding='same')
self.bn2 = layers.BatchNormalization()
# shape matching by 1x1 convolution
if stride != 1:
self.downsample = Sequential()
else:
# shape matching, direct shorting
self.downsample = lambda x:x

def call(self, inputs, training=None):
# [b, h, w, c], through the first convolution unit
out = self.conv1(inputs)
out = self.bn1(out)
out = self.relu(out)
# Through the second convolution unit
out = self.conv2(out)
out = self.bn2(out)
# Through identity module
identity = self.downsample(inputs)
# 2 paths output add directly
output = tf.nn.relu(output) # Activation function
return output

class ResNet(keras.Model):
# Generic ResNet implementation class
def __init__(self, layer_dims, num_classes=10): # [2, 2, 2, 2]
super(ResNet, self).__init__()
# Root network, preprocessing
self.stem = Sequential([layers.Conv2D(64, (3, 3), strides=(1, 1)),
layers.BatchNormalization(),
layers.Activation('relu'),
])
# Stack 4 blocks, each block contains multiple basicblocks, with different setting steps
self.layer1 = self.build_resblock(64,  layer_dims)
self.layer2 = self.build_resblock(128, layer_dims, stride=2)
self.layer3 = self.build_resblock(256, layer_dims, stride=2)
self.layer4 = self.build_resblock(512, layer_dims, stride=2)
# Reduce the height and width to 1x1 through the Pooling layer
self.avgpool = layers.GlobalAveragePooling2D()
# Finally, connect a full connection layer classification
self.fc = layers.Dense(num_classes)

def call(self, inputs, training=None):
# Through the root network
x = self.stem(inputs)
# 4 modules at a time
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# Through the pool layer
x = self.avgpool(x)
# Through full connection layer
x = self.fc(x)
return x

def build_resblock(self, filter_num, blocks, stride=1):
# Auxiliary functions, stacking filter ﹣ num basicblocks
res_blocks = Sequential()
# Only the step size of the first BasicBlock may not be 1, so lower sampling is implemented
for _ in range(1, blocks):
#The other BasicBlock steps are all 1
return res_blocks

def resnet18():
# Realize different ResNet by adjusting the number and configuration of basicblocks in the module
return ResNet([2, 2, 2, 2])

def resnet34():
# Realize different ResNet by adjusting the number and configuration of basicblocks in the module
return ResNet([3, 4, 6, 3])
```
```import  tensorflow as tf
from    tensorflow.keras import layers, optimizers, datasets, Sequential
import  os
from    resnet import resnet18
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tf.random.set_seed(2345)

def preprocess(x, y):
# Map data to - 1 ~ 1
x = 2*tf.cast(x, dtype=tf.float32) / 255. - 1
y = tf.cast(y, dtype=tf.int32) # Type conversion
return x,y

y = tf.squeeze(y, axis=1) # Delete unnecessary dimensions
y_test = tf.squeeze(y_test, axis=1) # Delete unnecessary dimensions
print(x.shape, y.shape, x_test.shape, y_test.shape)
train_db = tf.data.Dataset.from_tensor_slices((x,y)) # Build a training set
# Random break-up, pretreatment, batch
train_db = train_db.shuffle(1000).map(preprocess).batch(512)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test)) #Build test set
# Random break-up, pretreatment, batch
test_db = test_db.map(preprocess).batch(512)
# Take a sample
sample = next(iter(train_db))
print('sample:', sample.shape, sample.shape, tf.reduce_min(sample), tf.reduce_max(sample))

def main():
# [b, 32, 32, 3] => [b, 1, 1, 512]
model = resnet18() # ResNet18 network
model.build(input_shape=(None, 32, 32, 3))
model.summary() # Statistical network parameters
optimizer = optimizers.Adam(lr=1e-4) # Build optimizer

for epoch in range(100): # Training epoch
for step, (x,y) in enumerate(train_db):
# [b, 32, 32, 3] = > [b, 10], forward propagation
logits = model(x)
# [b] = > [b, 10], one hot code
y_onehot = tf.one_hot(y, depth=10)
# Calculate cross entropy
loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
loss = tf.reduce_mean(loss)
# Update network parameters
if step %50 == 0:
print(epoch, step, 'loss:', float(loss))

total_num = 0
total_correct = 0
for x,y in test_db:
logits = model(x)
prob = tf.nn.softmax(logits, axis=1)  #Convert output to probability
pred = tf.argmax(prob, axis=1)  #Obtain the index value corresponding to the maximum probability value as the category number
pred = tf.cast(pred, dtype=tf.int32)
#Check whether the index value (category number) corresponding to the maximum probability value is consistent with the real label category number
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape
total_correct += int(correct)
acc = total_correct / total_num
print(epoch, 'acc:', acc)

if __name__ == '__main__':
main()
``` ```import  os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import  tensorflow as tf
from    tensorflow.keras import layers, optimizers, datasets, Sequential
tf.random.set_seed(2345)

# 5 units of conv + max pooling
conv_layers = [
# unit 1
# unit 2
# unit 3
# unit 4
# unit 5
]

def preprocess(x, y):
# Standardize mapping data to [0 ~ 1]
x = tf.cast(x, dtype=tf.float32) / 255.
y = tf.cast(y, dtype=tf.int32)
return x,y

y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
print(x.shape, y.shape, x_test.shape, y_test.shape)
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(64)
sample = next(iter(train_db))
print('sample:', sample.shape, sample.shape, tf.reduce_min(sample), tf.reduce_max(sample))

def main():
# [b, 32, 32, 3] => [b, 1, 1, 512]
conv_net = Sequential(conv_layers)
fc_net = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(100, activation=None),
])
conv_net.build(input_shape=[None, 32, 32, 3])
fc_net.build(input_shape=[None, 512])
# [1, 2] + [3, 4] => [1, 2, 3, 4]
variables = conv_net.trainable_variables + fc_net.trainable_variables

for epoch in range(50):
for step, (x,y) in enumerate(train_db):
# [b, 32, 32, 3] => [b, 1, 1, 512]
out = conv_net(x)
# flatten, => [b, 512]
out = tf.reshape(out, [-1, 512])
# [b, 512] => [b, 100]
logits = fc_net(out)
# [b] => [b, 100]
y_onehot = tf.one_hot(y, depth=100)
# compute loss
loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
loss = tf.reduce_mean(loss)

if step %100 == 0:
print(epoch, step, 'loss:', float(loss))

total_num = 0
total_correct = 0
for x,y in test_db:
out = conv_net(x)
out = tf.reshape(out, [-1, 512])
logits = fc_net(out)
prob = tf.nn.softmax(logits, axis=1) #softmax converts the output value to a probability value
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape
total_correct += int(correct)
acc = total_correct / total_num
print(epoch, 'acc:', acc)

if __name__ == '__main__':
main()
```    # Cavity convolution  ```# Analog input
x = tf.random.normal([1,7,7,1])
# Set the division rate parameter and a 3 x 3 convolution kernel.
layer = layers.Conv2D(1,kernel_size=3,strides=1,dilation_rate=2)
out = layer(x) # Forward computation
out.shape
```

# Transpose convolution conv2d transpose  ```Principle of transposition convolution operation 1: o+2p-k is s multiple
1.o+2p-k is s multiple: o refers to the width / height of the output of the transposed convolution layer, P refers to the filling of the transposed convolution core, K refers to the width w / height h of the transposed convolution core, and s refers to the step length s of the transposed convolution core.
2. For example, for a single channel characteristic graph (i.e. transposed convolution input) with width w / height h of 2x2, the transposed convolution kernel has width w / height h of 3x3 (i.e. k), step s of 2, and fill p of 0. Then the transposed convolution calculation flow is as follows:
1. Step 1: evenly insert s-1 blank data points of transposed convolution kernel step size between data points in the input single channel characteristic graph, then change from 2x2 to 3x3 matrix.
2. Step 2: the k-p-1 corresponding to the transposition convolution kernel needs to be filled around the 3 x 3 matrix. It is concluded that the 3-0-1 = 2 rows and 2 columns should be filled around the 3 x 3 matrix, so the matrix will change from 3 x 3 to 7 x 7.
3. The third step is to perform the general convolution operation on the 7x7 matrix. The width w / height h of the convolution kernel is 3 x 3 (that is, k, which is the same as the width w / height h of the transposed convolution kernel),
In this stage, the step s of the convolution kernel is always 1 (different from the step s of the transposition convolution), and the filling p is 0 (the same as the filling p of the transposition convolution kernel),
According to the general convolution formula o = ⌊ (i+2*p-k)/s ⌋ + 1 (round up ⌈: the smallest integer larger than yourself, round down ⌊: the largest integer smaller than yourself),
It is concluded that o = ⌊ (7 + 2 * 0-3) / 1 ⌋ + 1 = 5.
3. Conclusion: in the third step, the output 5x5 of the general convolution operation is the output of this transposition convolution operation, then the relationship between the input (2x2) and the output (5x5) of the transposition convolution operation is
When o+2p-k i s s multiple, o=(i-1)*s+k-2*p, O refers to the width / height of the output of the transposed convolution layer, P refers to the filling of the transposed convolution core, K refers to the width / height h of the transposed convolution core, and S refers to the step length s of the transposed convolution core,
i refers to the input of transposition convolution operation, then according to the above formula, when 5 + 2 * 0-3 = 2 is a multiple of 2, there is (2-1) × 2 + 3-2 * 0 = 5.
```  ```#Based on TensorFlow, the above example's transposition convolution operation is realized as follows:
import tensorflow as tf
# Create an X matrix with a height and width of 5x5
x = tf.range(25)+1
#<tf.Tensor: id=5, shape=(25,), dtype=int32, numpy=
#array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
#       18, 19, 20, 21, 22, 23, 24, 25])>
# Reshape is the tensor of legal dimension
x = tf.reshape(x,[1,5,5,1])
x.shape #TensorShape([1, 5, 5, 1])
x = tf.cast(x, tf.float32)
# Create a convolution kernel matrix with fixed content. The shape is [3, 3, 1, 1], i.e. [k,k, number of input channels, number of output channels]
w = tf.constant([[-1,2,-3.],[4,-5,6],[-7,8,-9]])
w.shape #TensorShape([3, 3])
# Tensor adjusted to legal dimension
w = tf.expand_dims(w,axis=2)
w.shape #TensorShape([3, 3, 1])
w = tf.expand_dims(w,axis=3)
w.shape #TensorShape([3, 3, 1, 1]), i.e. [k,k, number of input channels, number of output channels]
#The padding parameter is used to set the padding. There are two values for this parameter: "valid" means no padding is used (only valid window position is used). The default value of padding parameter is "valid";
#"Same" means "the width and height of the output after filling is the same as the input".
# Carry out general convolution operation
# Output height and width are 2x2
out
#<tf.Tensor: id=14, shape=(1, 2, 2, 1), dtype=float32, numpy=
#array([[[[ -67.],
#         [ -77.]],
#        [[-117.],
#         [-127.]]]], dtype=float32)>

# Taking the output 2x2 ([1, 2, 2, 1]) of ordinary convolution as the input of transposition convolution, verify whether the output of transposition convolution is 5x5 ([1, 5, 5, 1])
# The output of general convolution is used as the input of transposition convolution to perform transposition convolution operation
xx = tf.nn.conv2d_transpose(out, w, strides=2, padding='VALID', output_shape=[1,5,5,1])
# The output is 5x5 ([1,5,5,1]). Transposition convolution can restore the output of ordinary convolution to the input of ordinary convolution, but the output of transposition convolution is not equal to the input of ordinary convolution
xx.shape #TensorShape([1, 5, 5, 1])
``` ```#When the input width of the general convolution operation is 5x5 or 6x6, the output is the same 2x2
import tensorflow as tf
# Create a convolution kernel matrix with fixed content. The shape is [3, 3, 1, 1], i.e. [k,k, number of input channels, number of output channels]
w = tf.constant([[-1,2,-3.],[4,-5,6],[-7,8,-9]])
# Tensor adjusted to legal dimension
w = tf.expand_dims(w,axis=2)
w.shape #TensorShape([3, 3, 1])
w = tf.expand_dims(w,axis=3)
w.shape #TensorShape([3, 3, 1, 1]), i.e. [k,k, number of input channels, number of output channels]
x = tf.random.normal([1,6,6,1])
#The padding parameter is used to set the padding. There are two values for this parameter: "valid" means no padding is used (only valid window position is used). The default value of padding parameter is "valid";
#"Same" means "the width and height of the output after filling is the same as the input". padding='SAME 'only when strings = 1 is the same size of output and input.
# 6x6 input is convoluted
out.shape #TensorShape([1, 2, 2, 1]) #When the height and width of the input are 5x5 or 6x6, the output is the same 2x2
x = tf.random.normal([1,5,5,1])
# 6x6 input is convoluted
out.shape #TensorShape([1, 2, 2, 1]) #When the height and width of the input are 5x5 or 6x6, the output is the same 2x2
```
```1. Ordinary convolution formula o = ⌊ (i+2*p-k)/s ⌋ + 1 (round up ⌈: the smallest integer larger than yourself, round down ⌊: the largest integer smaller than yourself)
For example, a single channel characteristic graph (i.e. input I of ordinary convolution operation) with width w / height h of 5x5 or 6x6, convolution kernel with width w / height h of 3x3 (i.e. k), step s of 2 (step must be greater than 1), filling p of 0, output o of ordinary convolution operation are 2x2,
When the step size s is greater than 1, the final rounding down will cause a variety of different input size i to correspond to the same convolution output size o.
For example, if the input i of general convolution is 5, then⌊ (5 + 2 * 0-3) / 2 ⌋ + 1 = 2; if the input i of general convolution is 6, then⌊ (6 + 2 * 0-3) / 2 ⌋ + 1 = 2.

2. Principle of transposition convolution 2: o+2p-k is not s multiple
1.o+2p-k is not a multiple of S: o refers to the width / height of the output of the transposed convolution layer, P refers to the filling of the transposed convolution core, K refers to the width w / height h of the transposed convolution core, and s refers to the step length s of the transposed convolution core.
2. In the ordinary convolution operation, as long as the step size s is greater than 1, a variety of different input size i will correspond to the same convolution output size o after rounding down,
Therefore, in the transposition convolution operation, we naturally hope that the same input size i can be used to get different output size o after the transposition convolution operation,
Therefore, it is necessary to fill row a and column a to realize the output o of different size of transposition convolution, so as to recover the output o of the same size corresponding to the input i of different size in the ordinary convolution.
3. For example, the width w / height h of the current ordinary convolution operation i s 5x5 or 6x6 for the single channel characteristic graph (i.e. the input I of the ordinary convolution operation), the width w / height h of the convolution core is 3x3 (i.e., k), the step S is 2 (the step must be greater than 1), and the filling p is 0,
The output o of ordinary convolution operation is 2x2. Now, we want to realize the customized output of the convolution by 2x2 transposition input, which is 5x5 or 6x6, the filling p of the transposition core is 0, and the width w / height h of the transposition core is 3x3,
The step size s of the transposed convolution kernel is 2. The following two steps are needed to achieve the above purpose:
1. Step 1: a=(o+2p-k)%s, a row and a column need to be filled to realize the output o of transposition convolution of different sizes, so as to recover the output o of the same size corresponding to the input i of different sizes in the ordinary convolution.
For example, when the output of the transposition convolution is 5x5(o is 5), (5 + 2 * 0-3)% 2 = 0, i.e. fill 0 rows and 0 columns to achieve the transposition convolution output size o is 5x5.
For example, when the output of transposition convolution is 6x6(o is 6), the output size o of transposition convolution is 6x6 (6 + 2 * 0-3)% 2 = 1.
2. Step 2: o=(i-1)*s+k-2*p+a, according to the calculated a row and a column to be filled in step 1, transpose the desired size o of convolution output.
For example, if you want to output 5x5(o = 5) by transposing convolution, the calculated 0 rows and 0 columns to be filled: (2-1) * 2 + 3-2 * 0 + 0 = 5.
For example, if you want to convert the convolution output to 6x6(o = 6), you need to fill in one row and one column: (2-1) * 2 + 3-2 * 0 + 1 = 6.
``` ```import tensorflow as tf

# Create a convolution kernel matrix with fixed content. The shape is [3, 3, 1, 1], i.e. [k,k, number of input channels, number of output channels]
w = tf.constant([[-1,2,-3.],[4,-5,6],[-7,8,-9]])
# Tensor adjusted to legal dimension
w = tf.expand_dims(w,axis=2)
w.shape #TensorShape([3, 3, 1])
w = tf.expand_dims(w,axis=3)
w.shape #TensorShape([3, 3, 1, 1]), i.e. [k,k, number of input channels, number of output channels]
x = tf.random.normal([1,6,6,1])
#The padding parameter is used to set the padding. There are two values for this parameter: "valid" means no padding is used (only valid window position is used). The default value of padding parameter is "valid";
#"Same" means "the width and height of the output after filling is the same as the input". padding='SAME 'only when strings = 1 is the same size of output and input.
# 6x6 input is convoluted
out.shape #TensorShape([1, 2, 2, 1]) #When the height and width of the input are 5x5 or 6x6, the output is the same 2x2
x = tf.random.normal([1,5,5,1])
# 6x6 input is convoluted
out.shape #TensorShape([1, 2, 2, 1]) #When the height and width of the input are 5x5 or 6x6, the output is the same 2x2

#Just specify the output size, TensorFlow will automatically deduce the number of rows and columns to be filled
xx = tf.nn.conv2d_transpose(out, w, strides=2, padding='VALID', output_shape=[1,6,6,1])
xx.shape #TensorShape([1, 6, 6, 1])
xx = tf.nn.conv2d_transpose(out, w, strides=2, padding='VALID', output_shape=[1,5,5,1])
xx.shape #TensorShape([1, 5, 5, 1])
```     ```# Create X matrix
x = tf.range(25)+1
# Reshape is the tensor of legal dimension
x = tf.reshape(x,[1,5,5,1])
x = tf.cast(x, tf.float32)
# Creating convolution kernel matrix with fixed content
w = tf.constant([[-1,2,-3.],[4,-5,6],[-7,8,-9]])
# Tensor adjusted to legal dimension
w = tf.expand_dims(w,axis=2)
w = tf.expand_dims(w,axis=3)
# Carry out general convolution operation
out

# The output of general convolution is used as the input of transposition convolution to perform transposition convolution operation
xx = tf.nn.conv2d_transpose(out, w, strides=2, padding='VALID', output_shape=[1,5,5,1])
#<tf.Tensor: id=117, shape=(5, 5), dtype=float32, numpy=
#array([[   67.,  -134.,   278.,  -154.,   231.],
#       [ -268.,   335.,  -710.,   385.,  -462.],
#       [  586.,  -770.,  1620.,  -870.,  1074.],
#       [ -468.,   585., -1210.,   635.,  -762.],
#       [  819.,  -936.,  1942., -1016.,  1143.]], dtype=float32)>

x = tf.random.normal([1,6,6,1])
# 6x6 input is convoluted
out
#<tf.Tensor: id=21, shape=(1, 2, 2, 1), dtype=float32, numpy=
#array([[[[ 20.438847 ],
#         [ 19.160788 ]],
#        [[  0.8098897],
#         [-28.30303  ]]]], dtype=float32)>

# Restore 6x6 size
xx = tf.nn.conv2d_transpose(out, w, strides=2, padding='VALID', output_shape=[1,6,6,1])
xx

# Create a transpose convolution class
xx2 = layer(out)
xx2
```

# Keras depth separable convolution  # Depth separable convolution  # Inception ```import  os
import  tensorflow as tf
import  numpy as np
from    tensorflow import keras

tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train, x_test = x_train.astype(np.float32)/255., x_test.astype(np.float32)/255.
# [b, 28, 28] => [b, 28, 28, 1]
x_train, x_test = np.expand_dims(x_train, axis=3), np.expand_dims(x_test, axis=3)
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(256)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(256)
print(x_train.shape, y_train.shape) #(60000, 28, 28, 1) (60000,)
print(x_test.shape, y_test.shape) #(10000, 28, 28, 1) (10000,)

class ConvBNRelu(keras.Model):
def __init__(self, ch, kernelsz=3, strides=1, padding='same'):
super(ConvBNRelu, self).__init__()
self.model = keras.models.Sequential([
keras.layers.BatchNormalization(),
keras.layers.ReLU()
])

def call(self, x, training=None):
x = self.model(x, training=training)
return x

class InceptionBlk(keras.Model):
def __init__(self, ch, strides=1):
super(InceptionBlk, self).__init__()
self.ch = ch
self.strides = strides
self.conv1 = ConvBNRelu(ch, strides=strides)
self.conv2 = ConvBNRelu(ch, kernelsz=3, strides=strides)
self.conv3_1 = ConvBNRelu(ch, kernelsz=3, strides=strides)
self.conv3_2 = ConvBNRelu(ch, kernelsz=3, strides=1)
self.pool_conv = ConvBNRelu(ch, strides=strides)

def call(self, x, training=None):
x1 = self.conv1(x, training=training)
x2 = self.conv2(x, training=training)
x3_1 = self.conv3_1(x, training=training)
x3_2 = self.conv3_2(x3_1, training=training)
x4 = self.pool(x)
x4 = self.pool_conv(x4, training=training)
# concat along axis=channel only connects on channel dimension
x = tf.concat([x1, x2, x3_2, x4], axis=3)
return x

class Inception(keras.Model):
def __init__(self, num_layers, num_classes, init_ch=16, **kwargs):
super(Inception, self).__init__(**kwargs)
self.in_channels = init_ch
self.out_channels = init_ch #After passing through each num layers layer, the number of out channels output channels will be doubled, and the height and width of the output will also be halved
self.num_layers = num_layers
self.init_ch = init_ch
self.conv1 = ConvBNRelu(init_ch)
self.blocks = keras.models.Sequential(name='dynamic-blocks')

for block_id in range(num_layers):
for layer_id in range(2):
if layer_id == 0:
#After each num layers layer, the height and width of the output of the first layer are halved
block = InceptionBlk(self.out_channels, strides=2) #Half width and height of output
else:
block = InceptionBlk(self.out_channels, strides=1)
# enlarger out_channels per block
self.out_channels *= 2 #After each num layers layer, the number of out channels output channels will double

self.avg_pool = keras.layers.GlobalAveragePooling2D()
self.fc = keras.layers.Dense(num_classes)

def call(self, x, training=None):
out = self.conv1(x, training=training)
out = self.blocks(out, training=training)
out = self.avg_pool(out)
out = self.fc(out)
return out

# build model and optimizer
batch_size = 32
epochs = 100
model = Inception(2, 10)
# derive input shape for every layers.
model.build(input_shape=(None, 28, 28, 1))
model.summary()
#Model: "inception"
#_________________________________________________________________
#Layer (type)                 Output Shape              Param #
#=================================================================
#conv_bn_relu (ConvBNRelu)    multiple                  224
#_________________________________________________________________
#dynamic-blocks (Sequential)  multiple                  292704
#_________________________________________________________________
#global_average_pooling2d (Gl multiple                  0
#_________________________________________________________________
#dense (Dense)                multiple                  1290
#=================================================================
#Total params: 294,218
#Trainable params: 293,226
#Non-trainable params: 992
#_________________________________________________________________

criteon = keras.losses.CategoricalCrossentropy(from_logits=True)
acc_meter = keras.metrics.Accuracy()

for epoch in range(100):
for step, (x, y) in enumerate(db_train):
# print(x.shape, y.shape)
# [b, 10]
logits = model(x)
# [b] vs [b, 10]
loss = criteon(tf.one_hot(y, depth=10), logits)
if step % 10 == 0:
print(epoch, step, 'loss:', loss.numpy())
acc_meter.reset_states() #Clear accuracy index
for x, y in db_test:
# [b, 10]
logits = model(x, training=False)
# [b, 10] => [b]
pred = tf.argmax(logits, axis=1)
print(x.shape, y.shape, pred.shape) #(256, 28, 28, 1) (256,) (256,)
# Comparative calculation of y's batch [b] and pred's batch [b]
acc_meter.update_state(y, pred) #Calculation accuracy index
print(epoch, 'evaluation acc:', acc_meter.result().numpy()) #Get accuracy index

```  230 original articles published, praised by 111, visited 160000+

Keywords: network Lambda Python

Added by graham23s on Sun, 26 Jan 2020 11:25:32 +0200