pythonlessons
diff --git a/‎CHANGELOG.md‎
Lines changed: 20 additions & 7 deletions b/‎CHANGELOG.md‎
Lines changed: 20 additions & 7 deletions
diff --git a/‎README.md‎
Lines changed: 13 additions & 6 deletions b/‎README.md‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎Tutorials/03_Trading_with_RL.md‎
Lines changed: 43 additions & 0 deletions b/‎Tutorials/03_Trading_with_RL.md‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎Tutorials/Documents/03_FinRock.jpg‎
66.2 KB b/‎Tutorials/Documents/03_FinRock.jpg‎
66.2 KB
diff --git a/‎Tutorials/Documents/03_FinRock_render.png‎
173 KB b/‎Tutorials/Documents/03_FinRock_render.png‎
173 KB
diff --git a/‎playing.py‎ renamed to ‎experiments/playing_random_sinusoid.py‎
Lines changed: 0 additions & 1 deletion b/‎playing.py‎ renamed to ‎experiments/playing_random_sinusoid.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎experiments/testing_ppo_sinusoid.py‎
Lines changed: 58 additions & 0 deletions b/‎experiments/testing_ppo_sinusoid.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎experiments/training_ppo_sinusoid.py‎
Lines changed: 103 additions & 0 deletions b/‎experiments/training_ppo_sinusoid.py‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎finrock/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎finrock/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎finrock/metrics.py‎
Lines changed: 83 additions & 0 deletions b/‎finrock/metrics.py‎
Lines changed: 83 additions & 0 deletions
@@ -1,4 +1,17 @@
-## [0.2.0] - 2023-11-..
+## [0.3.0] - 2023-12-05
+### Added:
+- Added `DifferentActions` and `AccountValue` as metrics. Metrics are the main way to evaluate the performance of the agent.
+- Now `metrics.Metrics` object can be used to calculate the metrics within trading environment.
+- Included `rockrl==0.0.4` as a dependency, which is a reinforcement learning package that I created.
+- Added `experiments/training_ppo_sinusoid.py` to train a simple Dense agent using PPO algorithm on the sinusoid data with discrete actions.
+- Added `experiments/testing_ppo_sinusoid.py` to test the trained agent on the sinusoid data with discrete actions.
+
+### Changed:
+- Renamed and moved `playing.py` to `experiments/playing_random_sinusoid.py`
+- Upgraded `finrock.render.PygameRender`, now we can stop/resume rendering with spacebar and render account value along with the actions
+
+
+## [0.2.0] - 2023-11-29
 ### Added:
 - Created `reward.simpleReward` function to calculate reward based on the action and the difference between the current price and the previous price
 - Created `scalers.MinMaxScaler` object to transform the price data to a range between 0 and 1 and prepare it for the neural networks input
@@ -9,9 +22,9 @@
 
 ## [0.1.0] - 2023-10-17
 ### Initial Release:
-- created the project
-- created code to create random sinusoidal price data
-- created `state.State` object, which holds the state of the market
-- created `render.PygameRender` object, which renders the state of the market using `pygame` library
-- created `trading_env.TradingEnv` object, which is the environment for the agent to interact with
-- created `data_feeder.PdDataFeeder` object, which feeds the environment with data from a pandas dataframe
+- Created the project
+- Created code to create random sinusoidal price data
+- Created `state.State` object, which holds the state of the market
+- Created `render.PygameRender` object, which renders the state of the market using `pygame` library
+- Created `trading_env.TradingEnv` object, which is the environment for the agent to interact with
+- Created `data_feeder.PdDataFeeder` object, which feeds the environment with data from a pandas dataframe
@@ -3,30 +3,37 @@ Reinforcement Learning package for Finance
 
 # Environment Structure:
 <p align="center">
-  <img src="Tutorials\Documents\02_FinRock.jpg">
+  <img src="Tutorials\Documents\03_FinRock.jpg">
 </p>
 
-### Install requirements
+### Install requirements:
 ```
 pip install -r requirements.txt
 pip install pygame
+pip install .
 ```
 
 ### Create sinusoid data:
 ```
 python bin/create_sinusoid_data.py
 ```
 
-### Run environment:
+### Train RL (PPO) agent on discrete actions:
 ```
-python playing.py
+experiments/training_ppo_sinusoid.py
+```
+
+### Test trained agent (Change path to the saved model):
+```
+experiments/testing_ppo_sinusoid.py
 ```
 
 ### Environment Render:
 <p align="center">
-  <img src="Tutorials\Documents\02_FinRock_render.png">
+  <img src="Tutorials\Documents\03_FinRock_render.png">
 </p>
 
 ## Links to YouTube videos:
 - [Introduction to FinRock package](https://youtu.be/xU_YJB7vilA)
-- [Complete Trading Simulation Backbone](https://youtu.be/1z5geob8Yho)
+- [Complete Trading Simulation Backbone](https://youtu.be/1z5geob8Yho)
+- [Training RL agent on Sinusoid data](https://youtu.be/JkA4BuYvWyE)
@@ -0,0 +1,43 @@
+# Complete Trading Simulation Backbone
+
+### Environment Structure:
+<p align="center">
+  <img src="Documents\03_FinRock.jpg">
+</p>
+
+### Link to YouTube video:
+https://youtu.be/JkA4BuYvWyE
+
+### Link to tutorial code:
+https://github.com/pythonlessons/FinRock/tree/0.3.0
+
+### Download tutorial code:
+https://github.com/pythonlessons/FinRock/archive/refs/tags/0.3.0.zip
+
+
+### Install requirements:
+```
+pip install -r requirements.txt
+pip install pygame
+pip install .
+```
+
+### Create sinusoid data:
+```
+python bin/create_sinusoid_data.py
+```
+
+### Train RL (PPO) agent on discrete actions:
+```
+experiments/training_ppo_sinusoid.py
+```
+
+### Test trained agent (Change path to the saved model):
+```
+experiments/testing_ppo_sinusoid.py
+```
+
+### Environment Render:
+<p align="center">
+  <img src="Documents\03_FinRock_render.png">
+</p>
@@ -31,7 +31,6 @@
 while True:
     # simulate model prediction, now use random action
     action = np.random.randint(0, action_space)
-    # action = 0 # always hold
 
     state, reward, terminated, truncated, info = env.step(action)
     rewards += reward
 
@@ -0,0 +1,58 @@
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+tf.get_logger().setLevel('ERROR')
+for gpu in tf.config.experimental.list_physical_devices('GPU'):
+    tf.config.experimental.set_memory_growth(gpu, True)
+
+from finrock.data_feeder import PdDataFeeder
+from finrock.trading_env import TradingEnv
+from finrock.render import PygameRender
+from finrock.scalers import MinMaxScaler
+from finrock.reward import simpleReward
+from finrock.metrics import DifferentActions, AccountValue
+
+
+df = pd.read_csv('Datasets/random_sinusoid.csv')
+df = df[-1000:]
+
+pd_data_feeder = PdDataFeeder(df)
+
+env = TradingEnv(
+    data_feeder = pd_data_feeder,
+    output_transformer = MinMaxScaler(min=pd_data_feeder.min, max=pd_data_feeder.max),
+    initial_balance = 1000.0,
+    max_episode_steps = 1000,
+    window_size = 50,
+    reward_function = simpleReward,
+    metrics = [
+        DifferentActions(),
+        AccountValue(),
+    ]
+)
+
+action_space = env.action_space
+input_shape = env.observation_space.shape
+pygameRender = PygameRender(frame_rate=120)
+
+agent = tf.keras.models.load_model('runs/1701698276/ppo_sinusoid_actor.h5')
+
+state, info = env.reset()
+pygameRender.render(info)
+rewards = 0.0
+while True:
+    # simulate model prediction, now use random action
+    # action = np.random.randint(0, action_space)
+    prob = agent.predict(np.expand_dims(state, axis=0), verbose=False)[0]
+    action = np.argmax(prob)
+
+    state, reward, terminated, truncated, info = env.step(action)
+    rewards += reward
+    pygameRender.render(info)
+
+    if terminated or truncated:
+        print(rewards, info["metrics"]['account_value'])
+        state, info = env.reset()
+        rewards = 0.0
+        pygameRender.reset()
+        pygameRender.render(info)
@@ -0,0 +1,103 @@
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+tf.get_logger().setLevel('ERROR')
+for gpu in tf.config.experimental.list_physical_devices('GPU'):
+    tf.config.experimental.set_memory_growth(gpu, True)
+
+from keras import layers, models
+
+from finrock.data_feeder import PdDataFeeder
+from finrock.trading_env import TradingEnv
+from finrock.scalers import MinMaxScaler
+from finrock.reward import simpleReward
+from finrock.metrics import DifferentActions, AccountValue
+
+from rockrl.utils.misc import MeanAverage
+from rockrl.utils.memory import Memory
+from rockrl.tensorflow import PPOAgent
+
+df = pd.read_csv('Datasets/random_sinusoid.csv')
+df = df[:-1000] # leave 1000 for testing
+
+pd_data_feeder = PdDataFeeder(df)
+
+
+env = TradingEnv(
+    data_feeder = pd_data_feeder,
+    output_transformer = MinMaxScaler(min=pd_data_feeder.min, max=pd_data_feeder.max),
+    initial_balance = 1000.0,
+    max_episode_steps = 1000,
+    window_size = 50,
+    reward_function = simpleReward,
+    metrics = [
+        DifferentActions(),
+        AccountValue(),
+    ]
+)
+
+action_space = env.action_space
+input_shape = env.observation_space.shape
+
+
+actor_model = models.Sequential([
+    layers.Input(shape=input_shape, dtype=tf.float32),
+    layers.Flatten(),
+    layers.Dense(512, activation='elu'),
+    layers.Dense(256, activation='elu'),
+    layers.Dense(64, activation='elu'),
+    layers.Dropout(0.5),
+    layers.Dense(action_space, activation='softmax')
+])
+
+critic_model = models.Sequential([
+    layers.Input(shape=input_shape, dtype=tf.float32),
+    layers.Flatten(),
+    layers.Dense(512, activation='elu'),
+    layers.Dense(256, activation='elu'),
+    layers.Dense(64, activation='elu'),
+    layers.Dropout(0.5),
+    layers.Dense(1, activation=None)
+])
+
+agent = PPOAgent(
+    actor = actor_model,
+    critic = critic_model,
+    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002),
+    batch_size=512,
+    lamda=0.95,
+    kl_coeff=0.5,
+    c2=0.01,
+    writer_comment='ppo_sinusoid',
+)
+
+
+memory = Memory()
+meanAverage = MeanAverage(best_mean_score_episode=1000)
+state, info = env.reset()
+rewards = 0.0
+while True:
+    action, prob = agent.act(state)
+
+    next_state, reward, terminated, truncated, info = env.step(action)
+    memory.append(state, action, reward, prob, terminated, truncated, next_state, info)
+    state = next_state
+
+    if memory.done:
+        history = agent.train(memory)
+        mean_reward = meanAverage(np.sum(memory.rewards))
+
+        if meanAverage.is_best(agent.epoch):
+            agent.save_models('ppo_sinusoid')
+
+        if history['kl_div'] > 0.05:
+            agent.reduce_learning_rate(0.99, verbose=False)
+
+        print(agent.epoch, np.sum(memory.rewards), mean_reward, info["metrics"]['account_value'], history['kl_div'])
+        agent.log_to_writer(info['metrics'])
+        memory.reset()
+        state, info = env.reset()
+
+
+        if agent.epoch >= 10000:
+            break
@@ -1 +1 @@
-__version__ = "0.2.0"
+__version__ = "0.3.0"
@@ -0,0 +1,83 @@
+from .state import State
+
+""" Metrics are used to track and log information about the environment.
+possible metrics:
+- DifferentActions,
+- AccountValue, 
+- MaxDrawdown, 
+- SharpeRatio, 
+- AverageProfit, 
+- AverageLoss, 
+- AverageTrade, 
+- WinRate, 
+- LossRate, 
+- AverageWin, 
+- AverageLoss,
+- AverageWinLossRatio, 
+- AverageTradeDuration, 
+- AverageTradeReturn, 
+"""
+
+class Metric:
+    def __init__(self, name: str="metric") -> None:
+        self.name = name
+        self.reset()
+
+    def update(self, state: State):
+        assert isinstance(state, State), f'state must be State, received: {type(state)}'
+
+        return state
+
+    @property
+    def result(self):
+        raise NotImplementedError
+    
+    def reset(self, prev_state: State=None):
+        assert prev_state is None or isinstance(prev_state, State), f'prev_state must be None or State, received: {type(prev_state)}'
+
+        return prev_state
+    
+
+class DifferentActions(Metric):
+    def __init__(self, name: str="different_actions") -> None:
+        super().__init__(name=name)
+
+    def update(self, state: State):
+        super().update(state)
+
+        if not self.prev_state:
+            self.prev_state = state
+        else:
+            if state.allocation_percentage != self.prev_state.allocation_percentage:
+                self.different_actions += 1
+
+            self.prev_state = state
+
+    @property
+    def result(self):
+        return self.different_actions
+    
+    def reset(self, prev_state: State=None):
+        super().reset(prev_state)
+
+        self.prev_state = prev_state
+        self.different_actions = 0
+
+
+class AccountValue(Metric):
+    def __init__(self, name: str="account_value") -> None:
+        super().__init__(name=name)
+
+    def update(self, state: State):
+        super().update(state)
+
+        self.account_value = state.account_value
+
+    @property
+    def result(self):
+        return self.account_value
+    
+    def reset(self, prev_state: State=None):
+        super().reset(prev_state)
+        
+        self.account_value = prev_state.account_value if prev_state else 0.0
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.2.0"`
	`1`	`+__version__ = "0.3.0"`