diff --git a/distributed_q_learning/plot/plot.py b/distributed_q_learning/plot/plot.py index 58ba309..5f66dcf 100644 --- a/distributed_q_learning/plot/plot.py +++ b/distributed_q_learning/plot/plot.py @@ -26,20 +26,11 @@ def running_avg(x, ws): if __name__=='__main__': - resdir = '../experiments/reproduce_deterministic' + resdir = '../experiments/reproduce_deterministic/' parq_list = glob.glob(join(resdir, '*/tr_rewards.parquet')) config_list = glob.glob(join(resdir, '*/*_config.csv')) - - config_list = [ - '../experiments/reproduce_deterministic/1/1_config.csv', - '../experiments/reproduce_deterministic/2/2_config.csv', - '../experiments/reproduce_deterministic/3/3_config.csv', - '../experiments/reproduce_deterministic/4/4_config.csv', - '../experiments/reproduce_deterministic/5/5_config.csv', - ] - ws = 5000 plt.figure(figsize=(15,7), dpi=80) @@ -68,4 +59,4 @@ def running_avg(x, ws): plt.xlabel('Episodes') plt.ylabel('Cumulative reward') plt.tight_layout() - plt.savefig('training.png') \ No newline at end of file + plt.savefig('training.png') diff --git a/distributed_q_learning/train.py b/distributed_q_learning/train.py index 51f2c00..d9da105 100644 --- a/distributed_q_learning/train.py +++ b/distributed_q_learning/train.py @@ -207,7 +207,7 @@ def eval_batch( ], columns=['epsilon', 'epsilon decay', 'alpha', 'alpha decay', 'n_episodes']) # Other parameters -out_dir = 'experiments/reproduce_determinstic' +out_dir = 'experiments/reproduce_deterministic' n_workers = multiprocessing.cpu_count() master_seed = 666 log_every = 10_000