python reinforcement-learning openai-gym

OpenAI Integrating custom game into a gym environment

[Introduction] I'm a beginner with OpenAI, I have made a custom game into which I would like to implement a self-learning agent. I followed this guide to set up a repository on GitHub, however I do not understand how I could format my code to work with the contents of gym-foo/gym_foo/envs/foo_env.py

[Question] Is there any chance someone could guide me on how to structure my code to so it’s compatible with:

class FooEnv(gym.Env):
metadata = {'render.modes': ['human']}

def __init__(self):
  ...
def step(self, action):
  ...
def reset(self):
...
def render(self, mode='human', close=False):
  ...

[Code]

import pygame
import time
import random
from pygame.locals import *

pygame.init()

display_width = 1002
display_height = 720

black = (0,0,0)
white = (255,255,255)
red = (220,0,0)
blue = (53,155,255)
green = (0,190,0)
bright_red = (255,0,0)
bright_green = (0,255,0)
dark_blue = (0,102,204)
yellow = (255, 255, 0)

gameDisplay = pygame.display.set_mode((display_width,display_height)) #creates surface/ display
pygame.display.set_caption('Blob Arena') #name of project
clock = pygame.time.Clock() #sets a clock

#________________________________________________________________________________________

blobImage = pygame.image.load('blob2.png')
blobIcon = pygame.image.load('blob_img.png')
bulletpicture = pygame.image.load("bullet.png")
pygame.display.set_icon(blobIcon)
pause = True
blob_width = 51
blob_height = 51
bullet_width = 12
bullet_height = 5

bullets=[]
bullets2=[]

def blob(x,y):
    gameDisplay.blit(blobImage,(x,y)) #drawing to background

def bullets_hit(count):
    font = pygame.font.SysFont(None, 25)
    text = font.render("Score: "+str(count), True, black)
    gameDisplay.blit(text,(0,20))

def player_lives(count):
    font = pygame.font.SysFont(None, 25)
    text = font.render("Lives Left: "+str(count), True, bright_red)
    gameDisplay.blit(text,(0,0))

def game_intro():
    intro = True
    while intro:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

        gameDisplay.fill(dark_blue)
        largeText = pygame.font.Font('freesansbold.ttf', 110)
        TextSurf, TextRect = text_objects("Blob Arena", largeText)  # Returns text surface and rectangle
        TextRect.center = ((display_width / 2), (display_height / 2.5))
        gameDisplay.blit(TextSurf, TextRect)
        button("Training 1", 200, 430, 140, 53, green, bright_green, game_loop)
        button("Training 2", 431, 430, 140, 53, green, bright_green, game_loop)
        button("Training 3", 662, 430, 140, 53, green, bright_green, game_loop)
        button("Human vs AI", 315.5, 550, 140, 53, green, bright_green, game_loop)
        button("Quit", 546.5, 550, 140, 53, red, bright_red, quit_game)

        pygame.display.update()
        clock.tick(15)

def button(msg,x,y,w,h,ic,ac,action=None):
    mouse = pygame.mouse.get_pos()
    click = pygame.mouse.get_pressed() #collects mouse left, right and middle button
    if x + w > mouse[0] > x and y + h > mouse[1] > y:
        pygame.draw.rect(gameDisplay, ac, (x, y, w, h))
        if click[0] == 1 and action!= None:
            action()
    else:
        pygame.draw.rect(gameDisplay, ic, (x, y, w, h))

    smallText = pygame.font.Font("freesansbold.ttf", 20)
    textSurf, textRect = text_objects(msg, smallText)
    textRect.center = ((x + (w / 2)), (y + (h / 2)))
    gameDisplay.blit(textSurf, textRect)

def unpause():
    global pause
    pause = False

def paused():
    largeText = pygame.font.Font('freesansbold.ttf', 110)
    TextSurf, TextRect = text_objects("Paused", largeText)  # Returns text surface and rectangle
    TextRect.center = ((display_width / 2), (display_height / 2.5))
    gameDisplay.blit(TextSurf, TextRect)

    while pause:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

        button("Continue", 315.5, 450, 140, 53, green, bright_green, unpause)
        button("Quit", 546.5, 450, 140, 53, red, bright_red, quit_game)

        pygame.display.update()
        clock.tick(15)

def text_objects(text, font):
    textSurface = font.render(text, True, black)
    return textSurface, textSurface.get_rect()

def quit_game():
    pygame.quit()
    quit()

def game_over():
    largeText = pygame.font.Font('freesansbold.ttf', 110)
    TextSurf, TextRect = text_objects("Game Over", largeText)  # Returns text surface and rectangle
    TextRect.center = ((display_width / 2), (display_height / 2.5))
    gameDisplay.blit(TextSurf, TextRect)

    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

        button("Play Again", 280.5, 450, 140, 53, green, bright_green,game_loop)
        button("Quit", 581.5, 450, 140, 53, red, bright_red,quit_game)

        pygame.display.update()
        clock.tick(15)

def game_loop():
    global pause
    x = (display_width * 0.08)
    y = (display_height * 0.2)

    x_change = 0
    y_change = 0

    blob_speed = 2

    velocity = [2, 2]

    score = 0
    lives = 3

    pos_x = display_width/1.2
    pos_y = display_height/1.2

    previous_time = pygame.time.get_ticks()
    previous_time2 = pygame.time.get_ticks()

    gameExit = False
    while not gameExit:
        for event in pygame.event.get():#monitors hardware movement/ clicks
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

        pos_x += velocity[0]
        pos_y += velocity[1]

        if pos_x + blob_width > display_width or pos_x < 601:
            velocity[0] = -velocity[0]

        if pos_y + blob_height > display_height or pos_y < 0:
            velocity[1] = -velocity[1]

        for b in range(len(bullets2)):
            bullets2[b][0] -= 6

        for bullet in bullets2:
            if bullet[0] < 0:
                bullets2.remove(bullet)


        current_time2 = pygame.time.get_ticks()
        #ready to fire when 500 ms have passed.
        if current_time2 - previous_time2 > 500:
            previous_time2 = current_time2
            bullets2.append([pos_x+25, pos_y+24])

# Checks to see if any keys are held down and remembers them with the variable keys.
        keys = pygame.key.get_pressed()

        for b in range(len(bullets)):
            bullets[b][0] += 6

        for bullet in bullets:
            if bullet[0] > 1005:
                bullets.remove(bullet)

        if keys[pygame.K_SPACE]:
            current_time = pygame.time.get_ticks()
            #ready to fire when 500 ms have passed.
            if current_time - previous_time > 500:
                previous_time = current_time
                bullets.append([x+25, y+24])

# If the player is holding down one key or the other the blob moves in that direction
        if x < 0:
            x = 0
        if keys[pygame.K_a]:
            x_change = -blob_speed
        if x > 401 - blob_width:
            x = 401 - blob_width
        if keys[pygame.K_d]:
            x_change = blob_speed
        if keys[pygame.K_p]:
            pause = True
            paused()


# If the player is holding down both or neither of the keys the blob stops
        if keys[pygame.K_a] and keys[pygame.K_d]:
            x_change = 0
        if not keys[pygame.K_a] and not keys[pygame.K_d]:
            x_change = 0

        if y < 0:
            y = 0
        if keys[pygame.K_w]:
            y_change = -blob_speed
        if y > display_height - blob_height:
            y = display_height - blob_height
        if keys[pygame.K_s]:
            y_change = blob_speed


        if keys[pygame.K_w] and keys[pygame.K_s]:
            y_change = 0
        if not keys[pygame.K_w] and not keys[pygame.K_s]:
            y_change = 0


        #print(event)
        # Reset x and y to new position
        x += x_change
        y += y_change

        gameDisplay.fill(blue)  #changes background surface
        bullets_hit(score)
        player_lives(lives)
        pygame.draw.line(gameDisplay, black, (601, display_height), (601, 0), 3)
        pygame.draw.line(gameDisplay, black, (401, display_height), (401, 0), 3)
        blob(pos_x, pos_y)
        blob(x, y)

        for bullet in bullets:
            gameDisplay.blit(bulletpicture, pygame.Rect(bullet[0], bullet[1], 0, 0))
            if bullet[0] > pos_x and bullet[0] < pos_x + blob_width:
                if bullet[1] > pos_y and bullet[1] < pos_y + blob_height or bullet[1] + bullet_height > pos_y and bullet[1] + bullet_height < pos_y + blob_height:
                    bullets.remove(bullet)
                    score+=1

        for bullet in bullets2:
            gameDisplay.blit(bulletpicture, pygame.Rect(bullet[0], bullet[1], 0, 0))
            if bullet[0] + bullet_width < x + blob_width and bullet[0] > x:
                if bullet[1] > y and bullet[1] < y + blob_height or bullet[1] + bullet_height > y and bullet[1] + bullet_height < y + blob_height:
                    bullets2.remove(bullet)
                    lives-=1

        if lives == 0:
            game_over()


        pygame.display.update() #update screen
        clock.tick(120)#moves frame on (fps in parameters)

game_intro()
game_loop()
pygame.quit()
quit()

[Additional Information] I will be using a reinforcement learning algorithm from https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/reinforcement_learning.py which from my understanding requires a gym environment in order for gym.make() to work. Any help would greatly be appreciated, if anything more is needed please tell me.

Solution

I have no experience with the pygame library and no knowledge of its internal workings, that may have some influence on what code needs to run where, so I'm not 100% sure on all of that. But, it's good to just start with some intuitive understanding of roughly what should be happening where:

__init__() should run any one-time setup. I can imagine something like pygame.init() may have to go in here, but this I'm not 100% sure on because I'm not familiar with pygame.
step() should be called whenever an agent selects an action, and then run a single ''frame'' of the game, move it forwards given the action selected by the agent. Alternatively, if you have a game where a single action takes multiple frames, you should run multiple frames here. Essentially: keep the game moving forwards until you hit a point where the agent should get to choose a new action again, then return the current game state.
reset() should... well, reset the game. So, revert back to the (or a random, whatever you want) initial game state, run any cleanup that may be required. I could, for example, also imagine pygame.init() belonging in here. It depends on what exactly that function does. If it only needs to be run once, it belongs in __init__(). If it needs to run at the start of every new game/"episode", ir belongs in reset().
render() should probably contain most of your graphics related code. You can try to take inspiration from, for example, the cartpole environment in gym, which also draws some rather simple graphics here. It looks like it should draw exactly one frame.

Now, looking at the code you're starting from, there seems to be a signifant amount of User Interface code... all kinds of code related to buttons, pausing/unpausing, a fancy (animated?) intro at the start of the game. I don't know if you can afford to get rid of all this? If you're doing purely Reinforcement Learning, you probably can. If you still need user interaction, you probably can't, and then things become a whole lot more difficult since all these things do not nicely fit the gym framework.

I can try to make a few educated guesses of a few of the remaining parts of the code and where it should go, but you should carefully inspect everything anyway based on the more general guidelines above:

def reset(self):
    # all the following code seems to define the initial game state
    x = (display_width * 0.08)
    y = (display_height * 0.2)

    x_change = 0
    y_change = 0

    blob_speed = 2

    velocity = [2, 2]

    score = 0
    lives = 3

    pos_x = display_width/1.2
    pos_y = display_height/1.2

    bullets=[]
    bullets2=[]

step() should probably contain most of the code in that while loop in game_loop(). The key-input-checking code should be modified though, that should instead start making use of the action passed into step(). Finally, step() is expected to return a tuple containing:

next game state, something the agent can use to decide what the next action will be. This can be raw pixels if you like, or something that's easier to learn from (like a bunch of features telling you where things are).
A reward, some indication of how good your agen't action was.
True if the game is over now, False otherwise.
Some dictionary with whatever extra info you like, for debugging purposes. Can simply be an empty dict {}.