Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a366a88
Deterministic policies
MedericFourmy Jan 24, 2018
cfd2711
Initial training algorithm: Simple Q-Learning model, does not converg…
MedericFourmy Jan 30, 2018
585aee5
Neural Q learning implemented
MedericFourmy Feb 5, 2018
d79c6d0
Some success with SARSA algorithm but needs to be tuned
MedericFourmy Feb 6, 2018
24fb8bf
Plain SARSA more or less working
MedericFourmy Feb 7, 2018
594af91
Average of 21 using LAMBDA SARSA
MedericFourmy Feb 9, 2018
38def90
Faster Eps diminution + timing: 12.5
MedericFourmy Feb 15, 2018
4812702
2000000 iteration + shorter time constant for eps decreasing: mean of…
MedericFourmy Feb 16, 2018
63bdc35
After different things tried unsuccessfully, achieved a 45 mean with …
MedericFourmy Feb 20, 2018
fd1414d
Lambda Sarsa --> Avg nb of doors: 70, in avg less than 1 state not se…
MedericFourmy Feb 20, 2018
2b62274
Trying scaler on Q-learning NN
Mar 8, 2018
3e7e408
Added a DISPLAY PARAM
Mar 8, 2018
4a92f62
No window + no scaler
Mar 10, 2018
638f55e
solved nan weights pbe
Mar 10, 2018
ea84312
Play and test as same function
Mar 11, 2018
bac55cb
More refactoring
Mar 11, 2018
6ca19d1
different files
Mar 11, 2018
a5b9146
Files renamed
Mar 11, 2018
aae26e6
Merge pull request #1 from SupaeroDataScience/master
Mar 11, 2018
6c3aa02
Beginning of DQL
Mar 11, 2018
01dad4e
Merge branch 'master' of https://github.com/MedericFourmy/RLchallenge
Mar 11, 2018
6a5db7d
DQL implemented
Mar 11, 2018
f80b4c2
Fixed dumb save every time bug
Mar 11, 2018
6480bdc
Working LambdaSarsa
Mar 11, 2018
4a9ac30
update epsilon bug solved + doom pbe
Mar 11, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions Fourmy/FlappyAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from algorithms import FeaturesLambdaSarsa

POLICY = -1

PREV = None
INDEX = 1
STRAIGHT = [None, None, None, None, 119, 119]
ACTIONS = [None, 119]


def policy_deter1(state, screen):
py = state['player_y']
npby = state['next_pipe_bottom_y']
action = None
if py >= npby-50:
action = 119

return action


def policy_deter2(state, screen):
global PREV
next_action = PREV
py = state['player_y']
npby = state['next_pipe_bottom_y']
if py >= npby-60:
PREV = 119
else:
PREV = None
print(next_action)

return next_action


def straight_ahead_no_chaser(state, screen):
# Corresponding velocity profile: [1.0, 2.0, 3.0, 4.0, -8.0, 0.0]
global INDEX
py = state['player_y']
print(py)
action = STRAIGHT[INDEX]
INDEX += 1
if INDEX >= len(STRAIGHT):
INDEX = 0
return action


def always_up(state, screen):
return 119


def always_down(state, screen):
print(state)
return None


agent_lambda_sarsa = FeaturesLambdaSarsa()
agent_lambda_sarsa.load()


def lambda_sarsa(state, screen):
qvals = agent_lambda_sarsa.get_qvals(state)
act = agent_lambda_sarsa.greedy_action(qvals, 0)
return ACTIONS[act]


POLICIES = [
policy_deter1,
policy_deter2,
straight_ahead_no_chaser,
always_up,
always_down,
lambda_sarsa
]


def FlappyPolicy(state, screen):
return POLICIES[POLICY](state, screen)
Loading