1, Thinking decision framework
import numpy as np import pandas as pd class QLearningTable: # Initialization def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9): # Selection behavior def choose_action(self, observation): # Learn to update parameters def learn(self, s, a, r, s_): # Check whether state exists def check_state_exist(self, state):
2, Function implementation
1. initialization
- Actions: all actions
- epsilon: greedy rate
- lr: learning rate α
- gamma: reward attenuation
- Q table: Q table
def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9): self.actions = actions # a list self.lr = learning_rate # Learning rate self.gamma = reward_decay # Reward attenuation self.epsilon = e_greedy # greediness self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64) # Initial q'table
2. Choose action
- if: select the maximum within the greedy rate (to prevent disorderly order of choice s with the same value)
- else: random selection
def choose_action(self, observation): self.check_state_exist(observation) # Check whether the state exists in q'table # Select action if np.random.uniform() < self.epsilon: # Select the action with the highest Q value state_action = self.q_table.loc[observation, :] # For the same state, there may be multiple identical Q action value s, so let's sort them out action = np.random.choice(state_action[state_action == np.max(state_action)].index) else: # Randomly select action action = np.random.choice(self.actions) return action
3. Learn to update parameters (update Q table)
def learn(self, s, a, r, s_): self.check_state_exist(s_) # Check if s exists in Q table_ q_predict = self.q_table.loc[s, a] # Get Q forecast if s_ != 'terminal': # Get real value q_target = r + self.gamma * self.q_table.loc[s_, :].max() # Next state is not a terminator else: q_target = r # Next state is the terminator # Update Q table: update the corresponding state action value self.q_table.loc[s, a] += self.lr * (q_target - q_predict)
4. Check whether there is current state action value in Q table
If there is no current state, we will insert a set of all 0 data as the initial values of all action s of this state
def check_state_exist(self, state): if state not in self.q_table.index: # append new state to q table self.q_table = self.q_table.append( pd.Series( [0]*len(self.actions), index=self.q_table.columns, name=state, ) )