Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit 006595a

Browse files
Merge pull request #523 from openclimatefix/issue/425-start-end-config
add start and end date
2 parents 4f8b7b4 + f06c064 commit 006595a

File tree

6 files changed

+73
-34
lines changed

6 files changed

+73
-34
lines changed

nowcasting_dataset/config/model.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
are used to validate the values of the data itself.
1212
1313
"""
14+
import logging
1415
from datetime import datetime
1516
from pathlib import Path
1617
from typing import Optional, Union
@@ -32,6 +33,8 @@
3233
IMAGE_SIZE_PIXELS_FIELD = Field(64, description="The number of pixels of the region of interest.")
3334
METERS_PER_PIXEL_FIELD = Field(2000, description="The number of meters per pixel.")
3435

36+
logger = logging.getLogger(__name__)
37+
3538

3639
class General(BaseModel):
3740
"""General pydantic model"""
@@ -86,7 +89,42 @@ def seq_length_60_minutes(self):
8689
return int((self.history_minutes + self.forecast_minutes) / 60 + 1)
8790

8891

89-
class PV(DataSourceMixin):
92+
class StartEndDatetimeMixin(BaseModel):
93+
"""Mixin class to add start and end date"""
94+
95+
start_datetime: datetime = Field(
96+
datetime(2020, 1, 1),
97+
description="Load date from data sources from this date. "
98+
"If None, this will get overwritten by InputData.start_date. ",
99+
)
100+
end_datetime: datetime = Field(
101+
datetime(2021, 9, 1),
102+
description="Load date from data sources up to this date. "
103+
"If None, this will get overwritten by InputData.start_date. ",
104+
)
105+
106+
@root_validator
107+
def check_start_and_end_datetime(cls, values):
108+
"""
109+
Make sure start datetime is before end datetime
110+
"""
111+
112+
start_datetime = values["start_datetime"]
113+
end_datetime = values["end_datetime"]
114+
115+
# check start datetime is less than end datetime
116+
if start_datetime >= end_datetime:
117+
message = (
118+
f"Start datetime ({start_datetime}) "
119+
f"should be less than end datetime ({end_datetime})"
120+
)
121+
logger.error(message)
122+
assert Exception(message)
123+
124+
return values
125+
126+
127+
class PV(DataSourceMixin, StartEndDatetimeMixin):
90128
"""PV configuration model"""
91129

92130
pv_filename: str = Field(
@@ -163,7 +201,7 @@ class NWP(DataSourceMixin):
163201
nwp_meters_per_pixel: int = METERS_PER_PIXEL_FIELD
164202

165203

166-
class GSP(DataSourceMixin):
204+
class GSP(DataSourceMixin, StartEndDatetimeMixin):
167205
"""GSP configuration model"""
168206

169207
gsp_zarr_path: str = Field("gs://solar-pv-nowcasting-data/PV/GSP/v2/pv_gsp.zarr")
@@ -372,8 +410,12 @@ class Process(BaseModel):
372410

373411
@validator("local_temp_path")
374412
def local_temp_path_to_path_object_expanduser(cls, v):
375-
"""Convert the path in string format to a `pathlib.PosixPath` object
376-
and call `expanduser` on the latter."""
413+
"""
414+
Convert temp path to Path
415+
416+
Convert the path in string format to a `pathlib.PosixPath` object
417+
and call `expanduser` on the latter.
418+
"""
377419
return Path(v).expanduser()
378420

379421

nowcasting_dataset/data_sources/gsp/gsp_data_source.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,9 @@ class GSPDataSource(ImageDataSource):
4141
# zarr_path of where the gsp data is stored
4242
zarr_path: Union[str, Path]
4343
# start datetime, this can be None
44-
# TODO: Issue #425: Use config to set start_dt and end_dt.
45-
start_dt: Optional[datetime] = pd.Timestamp("2020-01-01")
44+
start_datetime: Optional[datetime] = None
4645
# end datetime, this can be None
47-
# TODO: Issue #425: Use config to set start_dt and end_dt.
48-
end_dt: Optional[datetime] = pd.Timestamp("2022-01-01")
46+
end_datetime: Optional[datetime] = None
4947
# the threshold where we only taken gsp's with a maximum power, above this value.
5048
threshold_mw: int = 0
5149
# get the data for the gsp at the center too.
@@ -68,8 +66,6 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
6866
Set random seed and load data
6967
"""
7068
super().__post_init__(image_size_pixels, meters_per_pixel)
71-
# TODO: Issue #425: Remove this logger warning.
72-
logger.warning("GSPDataSource is using hard-coded start_dt and end_dt!")
7369
self.rng = np.random.default_rng()
7470
self.load()
7571

@@ -102,7 +98,7 @@ def load(self):
10298

10399
# load gsp data from file / gcp
104100
self.gsp_power, self.gsp_capacity = load_solar_gsp_data(
105-
self.zarr_path, start_dt=self.start_dt, end_dt=self.end_dt
101+
self.zarr_path, start_dt=self.start_datetime, end_dt=self.end_datetime
106102
)
107103

108104
# drop any gsp below a threshold mw. This is to get rid of any small GSP where

nowcasting_dataset/data_sources/pv/pv_data_source.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ class PVDataSource(ImageDataSource):
3535
filename: Union[str, Path]
3636
metadata_filename: Union[str, Path]
3737
# TODO: Issue #425: Use config to set start_dt and end_dt.
38-
start_dt: Optional[datetime.datetime] = pd.Timestamp("2020-01-01")
39-
end_dt: Optional[datetime.datetime] = pd.Timestamp("2022-01-01")
38+
start_datetime: Optional[datetime.datetime] = None
39+
end_datetime: Optional[datetime.datetime] = None
4040
random_pv_system_for_given_location: Optional[bool] = True
4141
#: Each example will always have this many PV systems.
4242
#: If less than this number exist in the data then pad with NaNs.
@@ -48,8 +48,7 @@ class PVDataSource(ImageDataSource):
4848
def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
4949
"""Post Init"""
5050
super().__post_init__(image_size_pixels, meters_per_pixel)
51-
# TODO: Issue #425: Remove this logger warning.
52-
logger.warning("PVDataSource is using hard-coded start_dt and end_dt!")
51+
5352
self.rng = np.random.default_rng()
5453
self.load()
5554

@@ -101,7 +100,9 @@ def _load_pv_power(self):
101100

102101
logger.debug(f"Loading PV Power data from {self.filename}")
103102

104-
pv_power = load_solar_pv_data(self.filename, start_dt=self.start_dt, end_dt=self.end_dt)
103+
pv_power = load_solar_pv_data(
104+
self.filename, start_dt=self.start_datetime, end_dt=self.end_datetime
105+
)
105106

106107
# A bit of hand-crafted cleaning
107108
if 30248 in pv_power.columns:

tests/data_sources/gsp/test_gsp_data_source.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ def test_gsp_pv_data_source_init():
1818

1919
_ = GSPDataSource(
2020
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
21-
start_dt=datetime(2020, 4, 1),
22-
end_dt=datetime(2020, 4, 2),
21+
start_datetime=datetime(2020, 4, 1),
22+
end_datetime=datetime(2020, 4, 2),
2323
history_minutes=30,
2424
forecast_minutes=60,
2525
image_size_pixels=64,
@@ -33,8 +33,8 @@ def test_gsp_pv_data_source_get_locations():
3333

3434
gsp = GSPDataSource(
3535
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
36-
start_dt=datetime(2020, 4, 1),
37-
end_dt=datetime(2020, 4, 2),
36+
start_datetime=datetime(2020, 4, 1),
37+
end_datetime=datetime(2020, 4, 2),
3838
history_minutes=30,
3939
forecast_minutes=60,
4040
image_size_pixels=64,
@@ -65,8 +65,8 @@ def test_gsp_pv_data_source_get_example():
6565

6666
gsp = GSPDataSource(
6767
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
68-
start_dt=datetime(2020, 4, 1),
69-
end_dt=datetime(2020, 4, 2),
68+
start_datetime=datetime(2020, 4, 1),
69+
end_datetime=datetime(2020, 4, 2),
7070
history_minutes=30,
7171
forecast_minutes=60,
7272
image_size_pixels=64,
@@ -91,8 +91,8 @@ def test_gsp_pv_data_source_get_batch():
9191

9292
gsp = GSPDataSource(
9393
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
94-
start_dt=datetime(2020, 4, 1),
95-
end_dt=datetime(2020, 4, 2),
94+
start_datetime=datetime(2020, 4, 1),
95+
end_datetime=datetime(2020, 4, 2),
9696
history_minutes=30,
9797
forecast_minutes=60,
9898
image_size_pixels=64,
@@ -121,8 +121,8 @@ def test_drop_gsp_north_of_boundary(test_data_folder):
121121

122122
gsp = GSPDataSource(
123123
zarr_path=f"{test_data_folder}/gsp/test.zarr",
124-
start_dt=datetime(2020, 4, 1),
125-
end_dt=datetime(2020, 4, 2),
124+
start_datetime=datetime(2020, 4, 1),
125+
end_datetime=datetime(2020, 4, 2),
126126
history_minutes=30,
127127
forecast_minutes=60,
128128
image_size_pixels=64,

tests/data_sources/test_pv_data_source.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ def test_get_example_and_batch(): # noqa: D103
4242
meters_per_pixel=2000,
4343
filename=PV_DATA_FILENAME,
4444
metadata_filename=PV_METADATA_FILENAME,
45-
start_dt=datetime.fromisoformat("2020-04-01 00:00:00.000"),
46-
end_dt=datetime.fromisoformat("2020-04-02 00:00:00.000"),
45+
start_datetime=datetime.fromisoformat("2020-04-01 00:00:00.000"),
46+
end_datetime=datetime.fromisoformat("2020-04-02 00:00:00.000"),
4747
load_azimuth_and_elevation=False,
4848
load_from_gcs=False,
4949
)
@@ -75,8 +75,8 @@ def test_passive():
7575
pv = PVDataSource(
7676
filename=filename,
7777
metadata_filename=filename_metadata,
78-
start_dt=datetime(2020, 3, 28),
79-
end_dt=datetime(2020, 4, 1),
78+
start_datetime=datetime(2020, 3, 28),
79+
end_datetime=datetime(2020, 4, 1),
8080
history_minutes=60,
8181
forecast_minutes=30,
8282
image_size_pixels=64,

tests/test_manager.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def test_sample_spatial_and_temporal_locations_for_examples(): # noqa: D103
3838

3939
gsp = GSPDataSource(
4040
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
41-
start_dt=datetime(2020, 4, 1),
42-
end_dt=datetime(2020, 4, 2),
41+
start_datetime=datetime(2020, 4, 1),
42+
end_datetime=datetime(2020, 4, 2),
4343
history_minutes=30,
4444
forecast_minutes=60,
4545
image_size_pixels=64,
@@ -139,8 +139,8 @@ def test_batches():
139139

140140
gsp = GSPDataSource(
141141
zarr_path=filename,
142-
start_dt=datetime(2020, 4, 1),
143-
end_dt=datetime(2020, 4, 2),
142+
start_datetime=datetime(2020, 4, 1),
143+
end_datetime=datetime(2020, 4, 2),
144144
history_minutes=30,
145145
forecast_minutes=60,
146146
image_size_pixels=64,

0 commit comments

Comments
 (0)