diff --git a/simonet/FlappyAgent.py b/simonet/FlappyAgent.py new file mode 100644 index 0000000..9742510 --- /dev/null +++ b/simonet/FlappyAgent.py @@ -0,0 +1,22 @@ +from state import new_state +import numpy as np +import pickle + +opened = 0 +Q = np.zeros((18,30,21,2)) + +def FlappyPolicy(state, screen): + + global opened + global Q + + if not opened : + file = open("Qtrained",'rb') + Q = pickle.load(file) + opened = 1 + + s = new_state(state) + action = np.argmax(Q[s[0],s[1],s[2]][:]) + return action*119 + + diff --git a/simonet/Qtrained b/simonet/Qtrained new file mode 100644 index 0000000..4c90c15 Binary files /dev/null and b/simonet/Qtrained differ diff --git a/simonet/__pycache__/FlappyAgent.cpython-36.pyc b/simonet/__pycache__/FlappyAgent.cpython-36.pyc new file mode 100644 index 0000000..c65d0bf Binary files /dev/null and b/simonet/__pycache__/FlappyAgent.cpython-36.pyc differ diff --git a/simonet/__pycache__/state.cpython-36.pyc b/simonet/__pycache__/state.cpython-36.pyc new file mode 100644 index 0000000..6c3a682 Binary files /dev/null and b/simonet/__pycache__/state.cpython-36.pyc differ diff --git a/simonet/run.py b/simonet/run.py new file mode 100644 index 0000000..cece890 --- /dev/null +++ b/simonet/run.py @@ -0,0 +1,29 @@ +# You're not allowed to change this file +from ple.games.flappybird import FlappyBird +from ple import PLE +import numpy as np +from FlappyAgent import FlappyPolicy + +game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors. +p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True) +# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes. + +p.init() +reward = 0.0 + +nb_games = 100 +cumulated = np.zeros((nb_games)) + +for i in range(nb_games): + p.reset_game() + + while(not p.game_over()): + state = game.getGameState() + screen = p.getScreenRGB() + action=FlappyPolicy(state, screen) ### Your job is to define this function. + + reward = p.act(action) + cumulated[i] = cumulated[i] + reward + +average_score = np.mean(cumulated) +max_score = np.max(cumulated) diff --git a/simonet/state.py b/simonet/state.py new file mode 100644 index 0000000..5f3536e --- /dev/null +++ b/simonet/state.py @@ -0,0 +1,7 @@ +def new_state(state): + + x = int(round((state['next_pipe_dist_to_player'])/20)) + y = int(round((state['player_y'] - state['next_pipe_bottom_y'])/20))+15 + v = int(state['player_vel'])+10 + + return [x,y,v] diff --git a/simonet/trainning.py b/simonet/trainning.py new file mode 100644 index 0000000..881b153 --- /dev/null +++ b/simonet/trainning.py @@ -0,0 +1,122 @@ +import numpy as np +import pickle +from ple import PLE +from ple.games.flappybird import FlappyBird +from state import new_state + +#Retourner l'action en fonction du argmax (0 ou 1) +def get_action(a): + return a*119 + +#Def epsilon greedy +def epsilon_greedy(Q, new_state, epsilon, state): + a = np.argmax(Q[new_state[0],new_state[1],new_state[2]]) + if np.random.rand() <= epsilon : + if np.random.rand() <= 0.5 * epsilon: + if state['next_pipe_bottom_y'] - state['player_y'] < 50 : + a = 1 + else : + a = 0 + + return a + + +# Parametres +gamma = 0.95 +alpha = 0.9 +epsilon = 0.1 +nb_games = 15000 + +#taille de notre espace des états +X = 18 +Y = 30 +V = 21 + +# Init Q +Q = np.zeros((X,Y,V,2)) +#file = open("Qtrained",'rb') +#Q = pickle.load(file) +#alpha = 0.1 + +#Création du jeu accéléré +game = FlappyBird(graphics="fancy") +p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False) + +# Score des X dernières parties +last_100 = 0 +last_1000 = 0 + +#calcul de l'espace des états +#Xmax = 0 +#Ymax = 0 +#Vmax = 0 + +#file=open('Qtrained', 'rb') +#Q=marshal.load(file) +# For each game +for g in range(1, nb_games): + + # Début du jeu + p.init() + p.reset_game() + state = game.getGameState() + reward = training_reward = 0 + s = new_state(state) + action = epsilon_greedy(Q, s, epsilon, state) + + #calcul de l'espace des états + #if s[0] > Xmax: + # Xmax = s[0] + #if s[1] > Ymax: + # Ymax = s[1] + #if s[2] > Vmax: + # Vmax = s[2] + + while not p.game_over(): + + # Action + reward = p.act(get_action(action)) + + # Calcul de la reward d'entrainement + if reward == -5 : + training_reward = -100 + else: + training_reward = 1 + + # Nouvel état + state_ = game.getGameState() + s_ = new_state(state_) + action_ = epsilon_greedy(Q, s_, epsilon, state) + + # calcul de Q avec l'algorythme SARSA + delta = (training_reward + gamma * Q[s_[0],s_[1],s_[2]][action_] - Q[s[0],s[1],s[2]][action]) + Q[s[0],s[1],s[2]][action] = Q[s[0],s[1],s[2]][action] + alpha * delta + + # Update de l'état + s = s_ + action = action_ + + # Calcul des résultats en cours de compilation + if reward+5: + last_100 += reward + last_1000 += reward + + # contrôle des résultats en cours de compilation et diminution de alpha + if g %100 == 0 : + print('Moyenne des 100 derniers essais : %.2f' %(last_100/100)) + last_100 = 0 + if g %1000 == 0 : + while alpha > 0.1 : + alpha /= 1.01 + print('Moyenne des 1000 derniers essais : %2f' % (last_1000/1000)) + if last_1000 / 1000 > 50: + break + last_1000 = 0 + + +#Résultat de la taille de l'espace des états +#print(Xmax,Ymax,Vmax) + +#Sauvegarde des données avec pickle, marshal ne marchant pas +with open('Qtrained', 'wb') as f: + pickle.dump(Q,f) \ No newline at end of file