Skip to content

Commit a50c041

Browse files
authored
init v0.2.0
init v0.2.0
2 parents e864a08 + 2b798c0 commit a50c041

File tree

15 files changed

+74
-61
lines changed

15 files changed

+74
-61
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
[![Embark](https://img.shields.io/badge/discord-OpenRL-%237289da.svg?logo=discord)](https://discord.gg/qMbVT2qBhr)
2929
[![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://join.slack.com/t/openrlhq/shared_invite/zt-1tqwpvthd-Eeh0IxQ~DIaGqYXoW2IUQg)
3030

31-
OpenRL-v0.1.10 is updated on Oct 27, 2023
31+
OpenRL-v0.2.0 is updated on Dec 20, 2023
3232

3333
The main branch is the latest version of OpenRL, which is under active development. If you just want to have a try with
3434
OpenRL, you can switch to the stable branch.

examples/envpool/envpool_wrappers.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
from packaging import version
1010
from stable_baselines3.common.vec_env import VecEnvWrapper as BaseWrapper
1111
from stable_baselines3.common.vec_env import VecMonitor
12-
from stable_baselines3.common.vec_env.base_vec_env import (VecEnvObs,
13-
VecEnvStepReturn)
12+
from stable_baselines3.common.vec_env.base_vec_env import VecEnvObs, VecEnvStepReturn
1413

1514
is_legacy_gym = version.parse(gym.__version__) < version.parse("0.26.0")
1615

@@ -114,9 +113,9 @@ def __init__(
114113

115114
if is_wrapped_with_monitor:
116115
warnings.warn(
117-
"The environment is already wrapped with a `Monitor` wrapper"
118-
"but you are wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics will be"
119-
"overwritten by the `VecMonitor` ones.",
116+
"The environment is already wrapped with a `Monitor` wrapperbut you are"
117+
" wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics"
118+
" will beoverwritten by the `VecMonitor` ones.",
120119
UserWarning,
121120
)
122121

examples/envpool/make_env.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55
import envpool
66
from gymnasium import Env
77

8-
9-
from openrl.envs.vec_env import (AsyncVectorEnv, RewardWrapper,
10-
SyncVectorEnv, VecMonitorWrapper)
8+
from openrl.envs.vec_env import (
9+
AsyncVectorEnv,
10+
RewardWrapper,
11+
SyncVectorEnv,
12+
VecMonitorWrapper,
13+
)
1114
from openrl.envs.vec_env.vec_info import VecInfoFactory
1215
from openrl.envs.wrappers.base_wrapper import BaseWrapper
1316
from openrl.rewards import RewardFactory
@@ -76,7 +79,7 @@ def make_envpool_envs(
7679
assert kwargs.get("env_type") in ["gym", "dm", "gymnasium"]
7780
kwargs["envpool"] = True
7881

79-
if 'env_wrappers' in kwargs:
82+
if "env_wrappers" in kwargs:
8083
env_wrappers = kwargs.pop("env_wrappers")
8184
else:
8285
env_wrappers = []

examples/envpool/train_ppo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616

1717
""""""
1818
import numpy as np
19-
20-
from openrl.configs.config import create_config_parser
2119
from make_env import make
20+
2221
from examples.envpool.envpool_wrappers import VecAdapter, VecMonitor
22+
from openrl.configs.config import create_config_parser
2323
from openrl.modules.common import PPONet as Net
2424
from openrl.modules.common.ppo_net import PPONet as Net
2525
from openrl.runners.common import PPOAgent as Agent

openrl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
__TITLE__ = "openrl"
2-
__VERSION__ = "v0.1.10"
2+
__VERSION__ = "v0.2.0"
33
__DESCRIPTION__ = "Distributed Deep RL Framework"
44
__AUTHOR__ = "OpenRL Contributors"
55
__EMAIL__ = "huangshiyu@4paradigm.com"

openrl/envs/common/build_envs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ def _make_env() -> Env:
6969
return _make_env
7070

7171
env_fns = [create_env(env_id, env_num, need_env_id) for env_id in range(env_num)]
72-
return env_fns
72+
return env_fns

openrl/envs/common/registration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,4 @@ def make(
173173
vec_info_class = VecInfoFactory.get_vec_info_class(vec_info_class, env)
174174
env = VecMonitorWrapper(vec_info_class, env)
175175

176-
return env
176+
return env

openrl/envs/nlp/daily_dialog_env.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,18 @@ def __init__(
7272
# set the observation and action space here
7373
self._vocab_size = self.tokenizer.vocab_size
7474

75-
self.observation_space = DictSpace({
76-
"input_encoded_pt": spaces.Box(
77-
low=0,
78-
high=self._vocab_size,
79-
shape=(self._max_text_length + self.max_steps,),
80-
),
81-
"input_attention_mask_pt": spaces.Box(
82-
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
83-
),
84-
})
75+
self.observation_space = DictSpace(
76+
{
77+
"input_encoded_pt": spaces.Box(
78+
low=0,
79+
high=self._vocab_size,
80+
shape=(self._max_text_length + self.max_steps,),
81+
),
82+
"input_attention_mask_pt": spaces.Box(
83+
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
84+
),
85+
}
86+
)
8587
self.action_space = Discrete(n=self._vocab_size)
8688
# see https://github.com/huggingface/transformers/issues/4875 : rounding up to nearest power of 2 for better GPU efficiency
8789

@@ -112,7 +114,6 @@ def __init__(
112114
self.reward_function = None
113115

114116
def set_reward(self, reward_fn=None):
115-
116117
self.reward_function = reward_fn
117118

118119
def step_word(self, word: str) -> Tuple[Dict[str, torch.tensor], int, bool, dict]:

openrl/envs/nlp/fake_dialog_env.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@ def __init__(
3030
# set the observation and action space here
3131
self._vocab_size = 2
3232

33-
self.observation_space = DictSpace({
34-
"input_encoded_pt": spaces.Box(
35-
low=0,
36-
high=self._vocab_size,
37-
shape=(self._max_text_length + self.max_steps,),
38-
),
39-
"input_attention_mask_pt": spaces.Box(
40-
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
41-
),
42-
})
33+
self.observation_space = DictSpace(
34+
{
35+
"input_encoded_pt": spaces.Box(
36+
low=0,
37+
high=self._vocab_size,
38+
shape=(self._max_text_length + self.max_steps,),
39+
),
40+
"input_attention_mask_pt": spaces.Box(
41+
low=0, high=1, shape=(self._max_text_length + self.max_steps,)
42+
),
43+
}
44+
)
4345
self.action_space = Discrete(n=self._vocab_size)
4446

4547
n = 2

openrl/envs/nlp/rewards/intent.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@ def __init__(
4141
self.use_model_parallel = False
4242

4343
if intent_model == "builtin_intent":
44-
4544
self._device = "cpu"
46-
self.use_data_parallel = False
47-
45+
self.use_data_parallel = False
46+
4847
from transformers import GPT2Config, GPT2LMHeadModel
4948

5049
class TestTokenizer:

0 commit comments

Comments
 (0)