Getting following error in python: index 100 is out of bounds for axis 0 with size 100. Went through other solutions but I found it hard to understand

My agent's objective is to control the speed of the motor. Here all state values are the rpm value of the motor and actions are defined as 0(decrease rpm by 1),1(no change) and 2(increase rpm by 1). I am using Q learning.

class SpeedControlEnv(Env): #we can access gym env 
    def __init__(self): #initializing actions, observation, spaces,
        #Actions that we can take up, down, no change in speed
        self.action_space = Discrete(3)
        # Observation space to hold the current speed so our agent can take necessary action
        #self.observation_space = Box(low=np.array([0]), high=np.array([100])) #box is used for continuous state space
        self.observation_space = Discrete(100) #discrete observation space

        #set start temp
        self.state = 40 + random.randint(-30,40) #this is the start state for my agent.
        # set time for our agent to complete the task before my motor blows
        self.control_length = 60 # this is in seconds : So my agent has 'n' seconds to bring it in normal state 
    def step(self, action): #what actions agent can take in each steps
        #Take action (0,1,2)
        # 0 -1 = -1 #decreaase speed by 1
        #1 -1 = 0 #no change
        #2- 1 = 1 #increase speed by 1
        self.state += action -1
        #with each action reduce the time my agent has by 1
        self.control_length -= 1
        # assign reward
        if self.state >= 40 and self.state <= 45:
            reward = 1
            reward = -1
        # check if shower is done    
        if self.control_length <= 0:
            done = True
            done = False
        #apply random noise
        #self.state += random.randint(-3,3)
        #set placeholder for information, Required by OpenAI
        info = {}
        #return step information
        return self.state, reward, done , info
    def render(self): #visualization 
    def reset(self): #reset after training or a episode
        #Reset speed that is the state
        self.state = 40 + random.randint(-30,40)
        #Reset control time
        self.control_length = 60
        return self.state

#Defined my hyperparameters as #Initialize all the hyperparameters

num_episodes = 50000 #agent plays step
#max_steps_per_episode = 60 #max steps agent can take in one episode

learning_rate = 0.1 #alpha
discount_rate = 0.99 #gamma

exploration_rate = 1 #epsilon
max_exploration_rate = 1 #max epsilon
min_exploration_rate = 0.01 #min epsilon
exploration_decay_rate = 0.01 #decaying rate of exploration

#My Q learning code is as follow

for episode in range(num_episodes):
  state = env.reset()
  done = False
  reward_current_episode = 0
  for step in range(env.control_length):
    exploration_rate_threshold = random.uniform(0,1)
    if exploration_rate_threshold > exploration_rate:
      action = np.argmax(q_table[state,:])
      action = env.action_space.sample()

    new_state,reward,done,info = env.step(action)

    #Update Q table
    q_table[state,action] = q_table[state, action]*(1-learning_rate) + learning_rate*(reward + discount_rate*np.max(q_table[new_state,:]))

    state = new_state
    reward_current_episode += reward

    if done == True:
  exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate)*np.exp(-exploration_decay_rate*episode)

  #append rewards from current episode to the list of rewards achieved from all episode



IndexError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_6020/ in <module>
     15     #Update Q table
---> 16     q_table[state,action] = q_table[state, action]*(1-learning_rate) + learning_rate*(reward + discount_rate*np.max(q_table[new_state,:]))
     18     state = new_state

IndexError: index 100 is out of bounds for axis 0 with size 100

It will be great if anyone can explain to me why I am getting this error. I am new to programming and machine learning.


  • It looks like you're trying to index a numpy array. Arrays, and just about everything in python and programming in general, are 0-indexed. That means their indices start at 0 instead of 1, which means that the maximum index in an array with 100 items is 99.