Skip to content
This repository was archived by the owner on Sep 7, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configs/experiment/perceiver_conv3d_sat_nwp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ defaults:
# this allows you to overwrite only specified parameters

seed: 518
load_model: /home/ec2-user/github/predict_pv_yield/logs/runs/2021-12-13/14-52-11/checkpoints/epoch_006.ckp

trainer:
min_epochs: 1
Expand Down
8 changes: 5 additions & 3 deletions configs/model/perceiver_conv3d_sat_nwp.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
_target_: predict_pv_yield.models.perceiver.perceiver_conv3d_nwp_sat.Model

forecast_minutes: 30
history_minutes: 60
batch_size: 32
forecast_minutes: 120
history_minutes: 30
batch_size: 16
num_latents: 24
latent_dim: 24
embedding_dem: 0
output_variable: gsp_yield
conv3d_channels: 8
use_future_satellite_images: 0
include_pv_or_gsp_yield_history: False
include_pv_yield_history: True
16 changes: 16 additions & 0 deletions experiments/2021-12/2021-12-01.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
1. Compare no future satellite data, with future satellite data

For conv3d model

With: https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-550/charts
Not: https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-549/charts

Validation Error
With: 0.0665
Not: 0.0670

data:
nwp
sat (no hrv, no future)
no gsp history
pv history
13 changes: 13 additions & 0 deletions experiments/2021-12/2021-12-02.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-571/monitoring

run Perceiver model

data
satdata (no future)
nwp
no gsp
historical pv

epoch time ~ 1.5 hours

Validation
24 changes: 18 additions & 6 deletions predict_pv_yield/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ def training_step(self, batch, batch_idx):
return self._training_or_validation_step(batch, tag="Train")

def validation_step(self, batch: BatchML, batch_idx):
self.validation_or_test_step(batch, batch_idx)

def test_step(self, batch: BatchML, batch_idx):
self.validation_or_test_step(batch, batch_idx)

def validation_or_test_step(self, batch: BatchML, batch_idx):

if type(batch) == dict:
batch = BatchML(**batch)
Expand Down Expand Up @@ -220,18 +226,18 @@ def validation_step(self, batch: BatchML, batch_idx):
pass

# save validation results
capacity = batch.gsp.gsp_capacity[:,-self.forecast_len_30:,0].cpu().numpy()
capacity = batch.gsp.gsp_capacity[0 : self.batch_size,-self.forecast_len_30:,0].cpu().numpy()
predictions = model_output.cpu().numpy()
truths = batch.gsp.gsp_yield[:, -self.forecast_len_30:, 0].cpu().numpy()
truths = batch.gsp.gsp_yield[0 : self.batch_size, -self.forecast_len_30:, 0].cpu().numpy()
predictions = predictions * capacity
truths = truths * capacity

results = make_validation_results(truths_mw=truths,
predictions_mw=predictions,
capacity_mwp=capacity,
gsp_ids=batch.gsp.gsp_id[:, 0].cpu(),
gsp_ids=batch.gsp.gsp_id[0:self.batch_size, 0].cpu(),
batch_idx=batch_idx,
t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc))
t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc)[0:self.batch_size])

# append so in 'validation_epoch_end' the file is saved
if batch_idx == 0:
Expand All @@ -249,8 +255,14 @@ def validation_epoch_end(self, outputs):
current_epoch=self.current_epoch,
logger=self.logger)

def test_step(self, batch, batch_idx):
self._training_or_validation_step(batch, tag="Test")
def test_epoch_end(self, outputs):

logger.info("Test epoch end")

save_validation_results_to_logger(results_dfs=self.results_dfs,
results_file_name=self.results_file_name,
current_epoch=self.current_epoch,
logger=self.logger)

def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=0.0005)
Expand Down
54 changes: 42 additions & 12 deletions predict_pv_yield/models/perceiver/perceiver_conv3d_nwp_sat.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def __init__(
embedding_dem: int = 16,
output_variable: str = "pv_yield",
conv3d_channels: int = 16,
use_future_satellite_images: bool = True, # option not to use future sat images
use_future_satellite_images: bool = False, # option not to use future sat images
include_pv_or_gsp_yield_history: bool = False,
include_pv_yield_history: int = True,
):
"""
Idea is to have a conv3d (+max pool) layer before both sat and nwp data go into perceiver model.
Expand All @@ -86,6 +88,8 @@ def __init__(
self.embedding_dem = embedding_dem
self.output_variable = output_variable
self.use_future_satellite_images = use_future_satellite_images
self.include_pv_yield_history = include_pv_yield_history
self.include_pv_or_gsp_yield_history = include_pv_or_gsp_yield_history

super().__init__()

Expand Down Expand Up @@ -115,10 +119,16 @@ def __init__(
if self.embedding_dem:
self.pv_system_id_embedding = nn.Embedding(num_embeddings=940, embedding_dim=self.embedding_dem)

rnn_input_size = FC_OUTPUT_SIZE
if self.include_pv_or_gsp_yield_history:
rnn_input_size += 1
if self.include_pv_yield_history:
rnn_input_size += 128

# TODO: Get rid of RNNs!
self.encoder_rnn = nn.GRU(
# plus 1 for history
input_size=FC_OUTPUT_SIZE + 1,
input_size=rnn_input_size,
hidden_size=RNN_HIDDEN_SIZE,
num_layers=2,
batch_first=True,
Expand Down Expand Up @@ -169,8 +179,17 @@ def forward(self, x):

nwp_data = nwp_data.reshape(new_batch_size, nwp_width, nwp_height, n_nwp_chans)

assert nwp_width == width, f'widths should be the same({nwp_width},{width})'
assert nwp_height == height, f'heights should be the same({nwp_height},{height})'
# v15 the width and height are a lot less, so lets expand the sat data. There should be a better way
sat_data_zeros = torch.zeros(size=(new_batch_size, nwp_width - width, height, n_chans),
device=sat_data.device)
sat_data = torch.cat([sat_data, sat_data_zeros], dim=1)
sat_data_zeros = torch.zeros(size=(new_batch_size, nwp_width, nwp_height - height, n_chans),
device=sat_data.device)
sat_data = torch.cat([sat_data, sat_data_zeros], dim=2)
new_batch_size, sat_width, sat_height, sat_n_chans = sat_data.shape

assert nwp_width == sat_height, f'widths should be the same({nwp_width},{sat_width})'
assert nwp_height == sat_height, f'heights should be the same({nwp_height},{sat_height})'

data = torch.cat((sat_data, nwp_data), dim=-1)

Expand Down Expand Up @@ -214,14 +233,25 @@ def forward(self, x):
dim=2,
)

if self.output_variable == 'pv_yield':
# take the history of the pv yield of this system,
pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2)
elif self.output_variable == 'gsp_yield':
# take the history of the gsp yield of this system,
gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2)
if self.include_pv_or_gsp_yield_history:
if self.output_variable == 'pv_yield':
# take the history of the pv yield of this system,
pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2)
elif self.output_variable == 'gsp_yield':
# take the history of the gsp yield of this system,
gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float()
encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2)

# add the pv yield history. This can be used if trying to predict gsp
if self.include_pv_yield_history:
pv_yield_history = (
x.pv.pv_yield[:self.batch_size].nan_to_num(nan=0.0).float()
)
# remove future pv
pv_yield_history[:, self.history_len_5 + 1:] = 0.0

encoder_input = torch.cat((rnn_input, pv_yield_history), dim=2)

encoder_output, encoder_hidden = self.encoder_rnn(encoder_input)
decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden)
Expand Down
6 changes: 4 additions & 2 deletions predict_pv_yield/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,17 @@ def train(config: DictConfig) -> Optional[float]:

# Train the model
log.info("Starting training!")
if 'validate_only' in config:
if 'load_model' is config:
model = model.load_from_checkpoint(checkpoint_path=config['load_model'])
elif 'validate_only' in config:
trainer.validate(model=model, datamodule=datamodule)
else:
trainer.fit(model=model, datamodule=datamodule)

# Evaluate model on test set, using the best model achieved during training
if config.get("test_after_training") and not config.trainer.get("fast_dev_run"):
log.info("Starting testing!")
trainer.test()
trainer.test(model=model, datamodule=datamodule)

# Make sure everything closed properly
log.info("Finalizing!")
Expand Down