Propeller rewrites Boston house price

Dataset hyperlink

#Load the propeller, Numpy and related class libraries
import paddle
from paddle.nn import Linear
import paddle.nn.functional as F
import numpy as np
import os
import random

Dynamic graph mode (imperative programming paradigm, analogy to Python): analytical execution mode. Users do not need to define a complete network structure in advance. Each line of network code can obtain the calculation results at the same time.
Static graph pattern (declarative programming paradigm, similar to C + +): a way of compiling before execution. The user needs to define the complete network structure in advance, and then compile and optimize the network structure before execution to obtain the calculation results.

def load_data():
    # Import data from file
    datafile = 'D:Browser download/housing.data'
    data = np.fromfile(datafile, sep=' ', dtype=np.float32)

    # Each data includes 14 items, of which the first 13 items are influencing factors and the 14th item is the corresponding median house price
    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
    feature_num = len(feature_names)

    # Reshape the original data into a shape like [N, 14]
    data = data.reshape([data.shape[0] // feature_num, feature_num])

    # Split the original data set into training set and test set
    # Here, 80% of the data are used for training and 20% for testing
    # The test set and training set must have no intersection
    ratio = 0.8
    offset = int(data.shape[0] * ratio)
    training_data = data[:offset]

    # Calculate the maximum, minimum and average values of the train dataset
    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
                                 training_data.sum(axis=0) / training_data.shape[0]
    
    # Record the normalization parameters of the data and normalize the data during prediction
    global max_values
    global min_values
    global avg_values
    max_values = maximums
    min_values = minimums
    avg_values = avgs

    # Normalize the data
    for i in range(feature_num):
        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])

    # Division ratio of training set and test set
    training_data = data[:offset]
    test_data = data[offset:]
    return training_data, test_data
class Regressor(paddle.nn.Layer):

    # self represents the instance of the class itself
    def __init__(self):
        # Initialize some parameters in the parent class
        super(Regressor, self).__init__()
        
        # Define a full connection layer. The input dimension is 13 and the output dimension is 1
        self.fc = Linear(in_features=13, out_features=1)
    
    # Forward calculation of the network, x is the house price prediction result
    def forward(self, inputs):
        x = self.fc(inputs)
        return x

Declare the defined Regressor instance of the regression model and set the state of the model to training.
Use load_ The data function loads training data and test data.
Set the optimization algorithm and learning rate. The optimization algorithm adopts random gradient descent SGD, and the learning rate is set to 0.01.

# Declare a well-defined linear regression model
model = Regressor()
# Open model training mode
model.train()
# Load data
training_data, test_data = load_data()
# Define the optimization algorithm and use random gradient descent SGD
# The learning rate is set to 0.01
opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())

The model instance has two states: training state train() and forecast status eval(). During training, two processes of forward calculation and back propagation gradient need to be performed, while during prediction, only forward calculation needs to be performed to specify the running state for the model

EPOCH_NUM = 10   # Sets the number of outer cycles
BATCH_SIZE = 10  # Set batch size

# Define outer loop
for epoch_id in range(EPOCH_NUM):
    # Before each iteration, the sequence of training data is randomly disrupted
    np.random.shuffle(training_data)
    # Split the training data, and each batch contains 10 pieces of data
    mini_batches = [training_data[k:k+BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)]
    # Define inner loop
    for iter_id, mini_batch in enumerate(mini_batches):
        x = np.array(mini_batch[:, :-1]) # Get training data of current batch
        y = np.array(mini_batch[:, -1:]) # Get the training label of the current batch (real house price)
        # Convert numpy data into propeller dynamic graph tensor form
        house_features = paddle.to_tensor(x)
        prices = paddle.to_tensor(y)
        
        # Forward calculation
        predicts = model(house_features)
        
        # Calculate loss
        loss = F.square_error_cost(predicts, label=prices)
        avg_loss = paddle.mean(loss)
        if iter_id%20==0:
            print("epoch: {}, iter: {}, loss is: {}".format(epoch_id, iter_id, avg_loss.numpy()))
        
        # Back propagation
        avg_loss.backward()
        # Minimize loss and update parameters
        opt.step()
        # Clear gradient
        opt.clear_grad()
epoch: 0, iter: 0, loss is: [0.1494867]
epoch: 0, iter: 20, loss is: [0.08427192]
epoch: 0, iter: 40, loss is: [0.07899499]
epoch: 1, iter: 0, loss is: [0.04854598]
epoch: 1, iter: 20, loss is: [0.10935007]
epoch: 1, iter: 40, loss is: [0.06932441]
epoch: 2, iter: 0, loss is: [0.09248729]
epoch: 2, iter: 20, loss is: [0.09958001]
epoch: 2, iter: 40, loss is: [0.18924591]
epoch: 3, iter: 0, loss is: [0.03678835]
epoch: 3, iter: 20, loss is: [0.05555127]
epoch: 3, iter: 40, loss is: [0.09124018]
epoch: 4, iter: 0, loss is: [0.02770482]
epoch: 4, iter: 20, loss is: [0.07265414]
epoch: 4, iter: 40, loss is: [0.02839946]
epoch: 5, iter: 0, loss is: [0.05219327]
epoch: 5, iter: 20, loss is: [0.03690437]
epoch: 5, iter: 40, loss is: [0.03987304]
epoch: 6, iter: 0, loss is: [0.02996459]
epoch: 6, iter: 20, loss is: [0.08583923]
epoch: 6, iter: 40, loss is: [0.04165906]
epoch: 7, iter: 0, loss is: [0.04495484]
epoch: 7, iter: 20, loss is: [0.06099489]
epoch: 7, iter: 40, loss is: [0.08534448]
epoch: 8, iter: 0, loss is: [0.04902298]
epoch: 8, iter: 20, loss is: [0.06693803]
epoch: 8, iter: 40, loss is: [0.03054288]
epoch: 9, iter: 0, loss is: [0.02795858]
epoch: 9, iter: 20, loss is: [0.09886132]
epoch: 9, iter: 40, loss is: [0.01993782]

Data preparation: convert the data of a batch into NP Array format, and then convert it to the built-in tensor format of pad.
Forward calculation: pour the sample data of a batch into the network and calculate the output results.
Calculation of Loss function: the previous calculation results and real house prices are used as inputs through the Loss function square_ error_ The cost API calculates the Loss function value (Loss). All API interfaces of the propeller have complete descriptions and use cases. We will introduce the API reference method in detail in the subsequent tutorials.
Back propagation: execute the gradient back propagation backward function, that is, calculate the gradient of each layer from back to front, and update the parameters according to the set optimization algorithm.

# Save the model parameters with the file name LR_model.pdparams
paddle.save(model.state_dict(), 'LR_model.pdparams')
print("The model is saved successfully, and the model parameters are saved in LR_model.pdparams in")
The model is saved successfully, and the model parameters are saved in LR_model.pdparams in
print(model.state_dict())
OrderedDict([('fc.weight', Parameter containing:
Tensor(shape=[13, 1], dtype=float32, place=CPUPlace, stop_gradient=False,
       [[-0.31492886],
        [-0.00053923],
        [-0.58833241],
        [-0.25711799],
        [ 0.05277566],
        [-0.25464550],
        [ 0.35711023],
        [-0.12961355],
        [ 0.13506889],
        [ 0.01279456],
        [ 0.07082576],
        [ 0.08301265],
        [-0.71715754]])), ('fc.bias', Parameter containing:
Tensor(shape=[1], dtype=float32, place=CPUPlace, stop_gradient=False,
       [-0.00065335]))])
def load_one_example():
    # Randomly select one from the loaded test set above as the test data
    idx = np.random.randint(0, test_data.shape[0])
    idx = -10
    one_data, label = test_data[idx, :-1], test_data[idx, -1]
    # Modify the data shape to [1,13]
    one_data =  one_data.reshape([1,-1])

    return one_data, label

Configure the machine resources predicted by the model. This case uses the native computer by default, so there is no need to write code to specify.
Load the trained model parameters into the model instance. The first sentence is to read the model parameters from the file; The second sentence is to load the parameter content into the model. After loading, you need to adjust the status of the model to eval(). As mentioned above, the training state model needs to support both forward calculation and reverse conduction gradient, and the implementation of the model is cumbersome, while the verification and prediction state model only needs to support forward calculation, so the implementation of the model is simpler and the performance is better.
Input the sample characteristics to be predicted into the model and print the prediction results.

# The parameter is the file address where the model parameters are saved
model_dict = paddle.load('LR_model.pdparams')
model.load_dict(model_dict)
model.eval()

# The parameter is the file address of the dataset
one_data, label = load_one_example()
# Convert data to variable format of dynamic graph 
one_data = paddle.to_tensor(one_data)
predict = model(one_data)

# The results are inverse normalized
predict = predict * (max_values[-1] - min_values[-1]) + avg_values[-1]
# Inverse normalization of label data
label = label * (max_values[-1] - min_values[-1]) + avg_values[-1]

print("Inference result is {}, the corresponding label is {}".format(predict.numpy(), label))
Inference result is [[21.392113]], the corresponding label is 19.700000762939453

Keywords: Python Machine Learning Deep Learning paddlepaddle

Added by mubeena on Mon, 24 Jan 2022 21:45:41 +0200