Simple handwritten digit recognition

Personal notes thank you

Handwritten digit recognition

Reference link:

1. import package

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

2. Import data set

mnist = input_data.read_data_sets('MNIST_data',one_hot=True)  

#The read ﹣ data ﹣ sets function is specially used to download MNIST data sets. Note that here is one ﹣ hot, that is, the label is not a value but a vector
#The data set is divided into three data sets: train, validation and test

1. Return the number of train samples in data set mnist.train.num_examples

2. Return the sample number of data set validation mnist.validation.num_examples

3. Return the number of test samples of data set mnist.test.num_examples

4. Use mnist.train.images to return the pixel values of all pictures in the train data set

5. Use mnist.train.labels to return the labels of all pictures in the train data set

6. Input data into neural network using MNIST. Train. Next? Batch()

3. Define the batch size

batch_size = 100

4. Calculate the number of batches

n_batch = mnist.train.num_examples // batch_size

5. Define the summary function of variables

def variables_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var) #The reduce ﹣ mean() function can perform multiple mean operations. See the definition of the function for details
        tf.summary.scalar('mean',mean) #Set scalar to display in tensorboard
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var-mean))) #Calculate standard deviation
        tf.summary.scalar('max',tf.reduce_max(var)) #Find the maximum value
        tf.summary.scalar('min',tf.reduce_min(var)) #Find the minimum value
        tf.summary.scalar('histogram',var) #histogram

6. Define initialization weight function

def weight_variable(shape,name):
    initial = tf.truncated_normal(shape,stddev=0.1)  #A sensor that produces a shape that obeys the normal distribution
    return tf.Variable(initial,name=name)    #Wrap the initial as a variable, where the value can be changed

#tf.truncated_normal() function: the generated values follow a normal distribution with specified mean and standard deviation, except that values who made it more than 2 standard deviations from the mean a re dropped and re picked

7. Define initialization offset function

def bias_variable(shape,name):
    initial = tf.truncated_normal(shape=shape,stddev=0.1)
    return tf.Variable(initial,name=name)

8. Define convolution function

def conv2d(x,W):
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

#Note: the convolution kernel size of tf.nn is set by initializing the convolution weight

9. Define the pooling layer

def max_pool_2X2(x):
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

#Tf.nn.max_pool (value, ksize, strings, padding, name = none) function parameter:
value: the input of pooling is required. Generally, the pooling layer is connected to the volume accumulation layer, so the input is usually a feature map, still a shape like [batch, height, width, channels]

ksize: the size of the pooled window. Take a four-dimensional vector, usually [1, height, width, 1]. Because we don't want to pool on batch and channels, we set these two dimensions to 1

Strips: similar to convolution, the step length of window sliding on each dimension is generally [1, strip, strip, 1]

padding: similar to convolution, you can take 'VALID' or 'SAME' and return a Tensor with the SAME type. The shape is still in the form of [batch, height, width, channels]

10. Define input

with tf.name_scope('Input'):
    x = tf.placeholder(tf.float32,[None,784],name='x_input')  #The meaning of "None" is to determine what the incoming x is
    y = tf.placeholder(tf.float32, [None, 10], name='y_input')
    with tf.name_scope('x_image'):
        x_image = tf.reshape(x,[-1,28,28,1])  #The meaning of - 1 here is automatically obtained through the internal calculation of the function

11. Initialize the weight of convolution kernel

with tf.name_scope('Conv1'):
    with tf.name_scope('W_conv1'):
        W_conv1 = weight_variable([5,5,1,32],name='W_conv1')   #[5,5,1,32], 1 is the number of input channels, 32 is the number of output channels

12. Initialize offset

    with tf.name_scope('b_conv1'):
        bias_conv1 = bias_variable([32],name='b_conv1')

13. X ﹣ image multiplied by convolution and offset

    with tf.name_scope('conv2d_1'):
        conv2d_1 = conv2d(x_image,W_conv1) + bias_conv1
    with tf.name_scope('relu'):
        h_conv1 = tf.nn.relu(conv2d_1)
    with tf.name_scope('h_pool1'):
        h_pool1 = max_pool_2X2(h_conv1)

14. The second convolution layer

with tf.name_scope('Conv2'):
    with tf.name_scope('W_conv2'):
        W_conv2 = weight_variable([5,5,32,64],name='W_conv2')
    with tf.name_scope('b_conv2'):
        bias_conv2 = bias_variable([64],name='b_conv2')
    with tf.name_scope('conv2d_2'):
        conv2d_2 = conv2d(h_pool1,W_conv2) + bias_conv2
    with tf.name_scope('relu'):
        h_conv2 = tf.nn.relu(conv2d_2)
    with tf.name_scope('h_pool2'):
        h_pool2 = max_pool_2X2(h_conv2)

15. Full connection layer 1

with tf.name_scope('fc1'):
    with tf.name_scope('W_fc1'):
        W_fc1 = weight_variable([7*7*64,1024],name='W_fc1')
    with tf.name_scope('b_fc1'):
        bias_fc1 = bias_variable([1024],name='b_fc1')
    with tf.name_scope('h_pool2_flat'):
        h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64],name='h_pool2_flat')    #Flatten the output of the previous layer into the input of the current layer
    with tf.name_scope('wx_plus_b1'):
        wx_plus_b1 = tf.matmul(h_pool2_flat,W_fc1) + bias_fc1
    with tf.name_scope('relu'):
        h_fc1 = tf.nn.relu(wx_plus_b1)

#Note: the connection between the convolution layer and the full connection layer needs to be flattened, because the convolution layer outputs multi-channel two-dimensional or multi-dimensional data, but the full connection layer can only accept two-dimensional data, that is, each neuron in the full connection layer accepts a one-dimensional vector

16.Dropout layer

    with tf.name_scope('keep_prob'):
        keep_prob = tf.placeholder(tf.float32,name='keep_prob')
    with tf.name_scope('h_fc1_drop'):
        h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob,name='h_fc1_drop')

#tf.nn.dropout() function: the input and output shape s of this function are the same, except for the dropout function

17. Full connection layer 2

    with tf.name_scope('fc2'):
        with tf.name_scope('W_fc2'):
            W_fc2 = weight_variable([1024,10],name='W_fc2')
        with tf.name_scope('bias_fc2'):
            bias_fc2 = bias_variable([10],name='bias_fc2')
        with tf.name_scope('wx_plus_b2'):
            wx_plus_b2 = tf.matmul(h_fc1_drop,W_fc2) + bias_fc2

18.softmax layer

        with tf.name_scope('softmax'):
            prediction = tf.nn.softmax(wx_plus_b2)

#tf.nn.softmax() function: the shape of input and output is the same, except that each element becomes a probability value, and the sum of all elements is 1
Note: the optimization of neural network is actually to approach the last output of each sample in the network to the tag of the sample, so the tag of the sample must be a quantity type value or a sensor

19. Calculation cost function

#Define cost function
    with tf.name_scope('cross_entropy'):
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction),name='cross_entropy_')

#Tf.nn.softmax? Cross? Entropy? With? Logits (labels, Logits) function is a cross entropy function

20. Use Adam optimizer to optimize

    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

21. Accuracy

    with tf.name_scope('accuracy'):
        #Put the result in a Boolean list
        correct_prediction = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))

        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))  #cast function is used to convert dtype, for example, float32 to int32

#tf.argmax(prediction,1) function: axis=1 can be obtained from the function parameter to reduce the prediction in the second dimension, that is, to find the maximum value of each row and return the index of the maximum value of each row
#tf.equal(x,y) function: determines whether the values of X and y are equal, and returns a list of Boolean elements

22. Merge all summaries

merged = tf.summary.merge_all()


with tf.Session() as sess:
    train_writer = tf.summary.FileWriter('logs/train',sess.graph)  #Write sess.graph to a file
    test_writer = tf.summary.FileWriter('logs/test',sess.graph)
    for i in range(1001):
        #Training model
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)  #Return the next `batch_size` examples from this data set,feed_dict={x:batch_xs,y:batch_ys,keep_prob:0.5})
        #Record the parameters of training set calculation,feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
        #Record test set calculation parameters
        batch_xs, batch_ys = mnist.test.next_batch(batch_size)
        summary =,feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})

        if i%100 == 0:
            test_acc =,feed_dict={x:mnist.test.images,y:mnist.test.labels,keep_prob:1.0})
            train_acc =,feed_dict={x: mnist.train.images[:10000], y: mnist.train.labels[:10000], keep_prob: 1.0})
            print('iter' + str(i) + ',Testing Accuracy=' + str(test_acc) + ',Training Accuracy=' + str(train_acc))

24. Steps to build a network

1. Initialize input
2. Initialization weight and offset parameters
3. Build each floor
4. Define cost function
5. Optimization cost function
6. Write Session

Published 6 original articles, won praise 0, visited 148
Private letter follow

Keywords: network Session

Added by fazzfarrell on Wed, 12 Feb 2020 10:30:25 +0200