Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,7 @@ commands:
name: install dependencies
# MUJOCO_KEY is defined in a CircleCI context
# Do some sanity checks to make sure key works
command: |
curl -o /root/.mujoco/mjkey.txt ${MUJOCO_KEY}
md5sum /root/.mujoco/mjkey.txt
[[ -d /venv ]] || /evaluating-rewards/scripts/build_venv.sh /venv
python -c "import mujoco_py"
command: "[[ -d /venv ]] || /evaluating-rewards/scripts/build_venv.sh /venv"

- save_cache:
paths:
Expand Down
13 changes: 5 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ RUN apt-get update -q \
ffmpeg \
software-properties-common \
net-tools \
patchelf \
parallel \
python3.7 \
python3.7-dev \
Expand All @@ -42,15 +43,13 @@ RUN apt-get update -q \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN curl -o /usr/local/bin/patchelf https://s3-us-west-2.amazonaws.com/openai-sci-artifacts/manual-builds/patchelf_0.9_amd64.elf \
&& chmod +x /usr/local/bin/patchelf

ENV LANG C.UTF-8

RUN mkdir -p /root/.mujoco \
&& curl -o mjpro150.zip https://www.roboti.us/download/mjpro150_linux.zip \
&& unzip mjpro150.zip -d /root/.mujoco \
&& rm mjpro150.zip
&& rm mjpro150.zip \
&& curl -o /root/.mujoco/mjkey.txt https://www.roboti.us/file/mjkey.txt

# Set the PATH to the venv before we create the venv, so it's visible in base.
# This is since we may create the venv outside of Docker, e.g. in CI
Expand All @@ -69,9 +68,7 @@ WORKDIR /evaluating-rewards
COPY ./scripts /evaluating-rewards/scripts
COPY ./requirements.txt /evaluating-rewards
COPY ./requirements-dev.txt /evaluating-rewards

# mjkey.txt needs to exist for build, but doesn't need to be a real key
RUN touch /root/.mujoco/mjkey.txt && /evaluating-rewards/scripts/build_venv.sh /venv
RUN /evaluating-rewards/scripts/build_venv.sh /venv

# full stage contains everything.
# Can be used for deployment and local testing.
Expand All @@ -81,7 +78,7 @@ FROM python-req as full
COPY . /evaluating-rewards
# Build a wheel then install to avoid copying whole directory (pip issue #2195)
RUN python setup.py sdist bdist_wheel
RUN pip install dist/evaluating_rewards-*.whl
RUN pip install --upgrade dist/evaluating_rewards-*.whl

# Default entrypoints
CMD ["pytest", "-n", "auto", "-vv", "tests/", "examples/"]
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
seals @ git+https://github.com/HumanCompatibleAI/seals.git@a425714
imitation @ git+https://github.com/HumanCompatibleAI/imitation.git@tf-master
stable-baselines @ git+https://github.com/hill-a/stable-baselines.git
sacred @ git+https://github.com/IDSIA/sacred.git@e62bb6
gym[mujoco]
gym[mujoco]~=0.21.0
tabulate
# Avoid https://github.com/matplotlib/matplotlib/issues/18407
matplotlib!=3.3.1,!=3.3.0
Expand All @@ -17,5 +16,6 @@ seaborn
setuptools
scikit-learn
scipy
stable-baselines>=2.10.1
tensorflow>=1.15,<1.16
xarray
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_version() -> str:
0,
os.path.join(os.path.dirname(__file__), "src", "evaluating_rewards"),
)
from version import ( # type:ignore # pylint:disable=no-name-in-module,import-outside-toplevel
from version import ( # type:ignore # pylint:disable=import-outside-toplevel,import-error
VERSION,
)

Expand All @@ -38,7 +38,7 @@ def get_version() -> str:


def load_requirements(fname):
with open(fname) as f:
with open(fname, "r", encoding="utf-8") as f:
return f.read().strip().split("\n")


Expand Down
4 changes: 2 additions & 2 deletions src/evaluating_rewards/analysis/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,10 @@ def _find_sacred_parent(
seen[parent] = path

config_path = os.path.join(parent, "sacred", "config.json")
with open(config_path, "r") as f:
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)
run_path = os.path.join(parent, "sacred", "run.json")
with open(run_path, "r") as f:
with open(run_path, "r", encoding="utf-8") as f:
run = json.load(f)

return config, run, parent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def evaluate_reward_model(


def plot_state_density(
dataset_generator: datasets.TransitionsCallable, nsamples: int = 2 ** 12, **kwargs
dataset_generator: datasets.TransitionsCallable, nsamples: int = 2**12, **kwargs
):
"""Plots the density of a state distribution.

Expand Down
2 changes: 1 addition & 1 deletion src/evaluating_rewards/distances/common_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def _update_common_configs() -> None:
COMMON_CONFIGS[chk_key] = dict(**base_cfg, y_reward_cfgs=chk_cfgs)

nbits = 4
total_shards = 2 ** nbits
total_shards = 2**nbits
if target_num > total_shards:
shards = _bisect_nbits(nbits)
for i, shard_num in zip(range(total_shards), shards):
Expand Down
2 changes: 1 addition & 1 deletion src/evaluating_rewards/distances/epic_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def sample_mean_rews(
mean_from_obs: np.ndarray,
act_samples: np.ndarray,
next_obs_samples: np.ndarray,
batch_size: int = 2 ** 28,
batch_size: int = 2**28,
) -> Mapping[K, np.ndarray]:
"""
Estimates the mean reward from observations `mean_from_obs` using given samples.
Expand Down
4 changes: 3 additions & 1 deletion src/evaluating_rewards/distances/npec.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def fit_affine(self, batch: types.Transitions):

def fit(
self,
*,
dataset: datasets.TransitionsCallable,
affine_size: Optional[int] = 4096,
**kwargs,
Expand Down Expand Up @@ -136,6 +137,7 @@ def fit_affine(self, batch: types.Transitions) -> base.AffineParameters:

def fit(
self,
*,
dataset: datasets.TransitionsCallable,
total_timesteps: int = int(1e6),
epoch_timesteps: int = 16384,
Expand Down Expand Up @@ -168,7 +170,7 @@ def fit(
logging.info(f"Epoch {epoch}: {affine_stats}")

epoch_stats = super().fit(
dataset, total_timesteps=epoch_timesteps, affine_size=None, **kwargs
dataset=dataset, total_timesteps=epoch_timesteps, affine_size=None, **kwargs
)

for k, v in epoch_stats.items():
Expand Down
12 changes: 6 additions & 6 deletions src/evaluating_rewards/envs/point_mass.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,17 @@ def initial_state(self):
break
return {"pos": pos, "vel": vel, "goal": goal}

def transition(self, old_state, action):
def transition(self, state, action):
action = np.array(action)
action = action.clip(-1, 1)
return {
"pos": old_state["pos"] + self.dt * old_state["vel"],
"vel": old_state["vel"] + self.dt * action,
"goal": old_state["goal"],
"pos": state["pos"] + self.dt * state["vel"],
"vel": state["vel"] + self.dt * action,
"goal": state["goal"],
}

def reward(self, old_state, action, new_state):
del old_state
def reward(self, state, action, new_state):
del state
dist = np.linalg.norm(new_state["pos"] - new_state["goal"])
ctrl_penalty = np.dot(action, action)
return -dist - self.ctrl_coef * ctrl_penalty
Expand Down
2 changes: 1 addition & 1 deletion src/evaluating_rewards/experiments/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def compare_synthetic(
model_potential: bool = True,
discount: float = 0.99,
optimizer: Type[tf.train.Optimizer] = tf.train.AdamOptimizer,
total_timesteps: int = 2 ** 16,
total_timesteps: int = 2**16,
batch_size: int = 128,
test_size: int = 4096,
pretrain: bool = True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def _fixed_width_format(x: float, figs: int = 3) -> str:
num_leading_zeros += 1
else:
break
if x >= 10 ** figs:
if x >= 10**figs:
# No decimal point gives us an extra character to use
figs += 1
fstr = "{:." + str(max(0, figs - num_leading_zeros)) + "g}"
Expand Down
4 changes: 2 additions & 2 deletions src/evaluating_rewards/scripts/rewards/train_regress.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def default_config():
locals().update(**regress_utils.DEFAULT_CONFIG)
checkpoint_interval = 50 # save every checkpoint_interval epochs
dataset_factory = datasets.transitions_factory_from_serialized_policy
dataset_factory_kwargs = dict()
dataset_factory_kwargs = {}

# Model to train and hyperparameters
model_reward_type = base.MLPRewardModel
Expand Down Expand Up @@ -104,7 +104,7 @@ def make_trainer(model, model_scope, target):
def do_training(target, trainer, callback: Optional[base.Callback]):
del target
return trainer.fit(
dataset_generator,
dataset=dataset_generator,
total_timesteps=total_timesteps,
batch_size=batch_size,
callback=callback,
Expand Down
3 changes: 2 additions & 1 deletion src/evaluating_rewards/scripts/script_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def recursive_dict_merge(
elif overwrite:
dest[key] = update_by[key]
else:
raise Exception("Conflict at {}".format(".".join(path + [str(key)])))
msg = "Conflict at " + ".".join(path + [str(key)])
raise Exception(msg)
else:
dest[key] = update_by[key]
return dest
Expand Down
2 changes: 1 addition & 1 deletion tests/test_rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from evaluating_rewards.rewards import base
from tests import common

ENVS = ["FrozenLake-v0", "CartPole-v1", "Pendulum-v0"]
ENVS = ["FrozenLake-v1", "CartPole-v1", "Pendulum-v1"]

STANDALONE_REWARD_MODELS = {
"halfcheetah_ground_truth": {
Expand Down
8 changes: 4 additions & 4 deletions tests/test_synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def f(total_timesteps: int):
"dataset_potential_hids": [],
"model_potential_hids": [],
"learning_rate": 1e-2,
"total_timesteps": 2 ** 18,
"total_timesteps": 2**18,
"batch_size": 256,
},
"rel_upperbound": 0.2,
Expand All @@ -120,7 +120,7 @@ def f(total_timesteps: int):
"dataset_potential_hids": [4],
"model_potential_hids": [32],
"learning_rate": 1e-2,
"total_timesteps": 2 ** 18,
"total_timesteps": 2**18,
"batch_size": 512,
},
"rel_upperbound": 0.2,
Expand All @@ -131,7 +131,7 @@ def f(total_timesteps: int):
"dataset_potential_hids": [4, 4],
"model_potential_hids": [32, 32],
"learning_rate": 1e-2,
"total_timesteps": 2 ** 18,
"total_timesteps": 2**18,
"batch_size": 512,
},
"rel_upperbound": 0.2,
Expand Down Expand Up @@ -283,7 +283,7 @@ def test_pretrain_affine(self, helper_affine, kwargs):
dataset_potential_hids=[4, 4],
model_potential=True,
model_potential_hids=[32, 32],
total_timesteps=2 ** 18,
total_timesteps=2**18,
learning_rate=1e-2,
potential_noise=np.array([0.0, 1.0]),
**kwargs,
Expand Down