Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit c9d3603

Browse files
Merge pull request #275 from openclimatefix/v10
V10
2 parents 6c42c54 + 5cf24da commit c9d3603

File tree

14 files changed

+117
-44
lines changed

14 files changed

+117
-44
lines changed

nowcasting_dataset/config/on_premises.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ input_data:
5656
topographic_filename: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/Topographic/europe_dem_1km_osgb.tif
5757

5858
output_data:
59-
filepath: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v_testing/
59+
filepath: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v10/
6060
process:
6161
batch_size: 32
6262
seed: 1234

nowcasting_dataset/data_sources/datasource_output.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from pathlib import Path
77

88
import numpy as np
9+
import xarray as xr
10+
from xarray.ufuncs import isinf, isnan
911

1012
from nowcasting_dataset.dataset.xr_utils import PydanticXArrayDataSet
1113
from nowcasting_dataset.filesystem.utils import makedirs
@@ -50,6 +52,54 @@ def save_netcdf(self, batch_i: int, path: Path):
5052
encoding = {name: {"compression": "lzf"} for name in self.data_vars}
5153
self.to_netcdf(local_filename, engine="h5netcdf", mode="w", encoding=encoding)
5254

55+
def check_nan_and_inf(self, data: xr.Dataset, variable_name: str = None):
56+
""" Check that all values are non NaNs and not infinite"""
57+
58+
if isnan(data).any():
59+
message = f"Some {self.__class__.__name__} data values are NaNs"
60+
message += f" ({variable_name})" if variable_name is not None else None
61+
logger.error(message)
62+
raise Exception(message)
63+
64+
if isinf(data).any():
65+
message = f"Some {self.__class__.__name__} data values are Infinite"
66+
message += f" ({variable_name})" if variable_name is not None else None
67+
logger.error(message)
68+
raise Exception(message)
69+
70+
def check_dataset_greater_than_or_equal_to(
71+
self, data: xr.Dataset, min_value: int, variable_name: str = None
72+
):
73+
""" Check data is greater than a certain value """
74+
if (data < min_value).any():
75+
message = f"Some {self.__class__.__name__} data values are less than {min_value}"
76+
message += f" ({variable_name})" if variable_name is not None else None
77+
logger.error(message)
78+
raise Exception(message)
79+
80+
def check_dataset_less_than_or_equal_to(
81+
self, data: xr.Dataset, max_value: int, variable_name: str = None
82+
):
83+
""" Check data is less than a certain value """
84+
if (data > max_value).any():
85+
message = f"Some {self.__class__.__name__} data values are less than {max_value}"
86+
message += f" ({variable_name})" if variable_name is not None else None
87+
logger.error(message)
88+
raise Exception(message)
89+
90+
def check_dataset_not_equal(
91+
self, data: xr.Dataset, value: int, raise_error: bool = True, variable_name: str = None
92+
):
93+
""" Check data is not equal than a certain value """
94+
if np.isclose(data, value).any():
95+
message = f"Some {self.__class__.__name__} data values are equal to {value}"
96+
message += f" ({variable_name})" if variable_name is not None else None
97+
if raise_error:
98+
logger.error(message)
99+
raise Exception(message)
100+
else:
101+
logger.warning(message)
102+
53103

54104
def pad_nans(array, pad_width) -> np.ndarray:
55105
"""Pad nans with nans"""

nowcasting_dataset/data_sources/gsp/gsp_data_source.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def _get_time_slice(self, t0_dt: pd.Timestamp) -> [pd.DataFrame]:
348348
# remove any nans
349349
power = power.dropna(axis="columns", how="any")
350350

351-
logger.debug(f"Found {len(power.columns)} GSP")
351+
logger.debug(f"Found {len(power.columns)} GSP valid data for {t0_dt}")
352352

353353
return power
354354

nowcasting_dataset/data_sources/gsp/gsp_model.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
""" Model for output of GSP data """
22
import logging
33

4-
from xarray.ufuncs import isinf, isnan
5-
64
from nowcasting_dataset.data_sources.datasource_output import DataSourceOutput
75

86
logger = logging.getLogger(__name__)
@@ -17,8 +15,8 @@ class GSP(DataSourceOutput):
1715
@classmethod
1816
def model_validation(cls, v):
1917
""" Check that all values are non NaNs """
20-
assert (~isnan(v.data)).all(), "Some gsp data values are NaNs"
21-
assert (~isinf(v.data)).all(), "Some gsp data values are Infinite"
22-
assert (v.data >= 0).all(), f"Some gsp data values are below 0 {v.data.min()}"
18+
19+
v.check_nan_and_inf(data=v.data)
20+
v.check_dataset_greater_than_or_equal_to(data=v.data, min_value=0)
2321

2422
return v

nowcasting_dataset/data_sources/nwp/nwp_model.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33

44
import logging
55

6-
from xarray.ufuncs import isinf, isnan
7-
86
from nowcasting_dataset.data_sources.datasource_output import DataSourceOutput
97

108
logger = logging.getLogger(__name__)
@@ -21,6 +19,7 @@ class NWP(DataSourceOutput):
2119
@classmethod
2220
def model_validation(cls, v):
2321
""" Check that all values are not NaNs """
24-
assert (~isnan(v.data)).all(), "Some nwp data values are NaNs"
25-
assert (~isinf(v.data)).all(), "Some nwp data values are Infinite"
22+
23+
v.check_nan_and_inf(data=v.data)
24+
2625
return v

nowcasting_dataset/data_sources/pv/pv_data_source.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,15 @@ def _get_time_slice(self, t0_dt: pd.Timestamp) -> [pd.DataFrame]:
122122
end_dt = self._get_end_dt(t0_dt)
123123
del t0_dt # t0 is not used in the rest of this method!
124124
selected_pv_power = self.pv_power.loc[start_dt:end_dt].dropna(axis="columns", how="any")
125+
126+
pv_power_zero_or_above_flag = selected_pv_power.ge(0).all()
127+
128+
if pv_power_zero_or_above_flag.sum() != len(selected_pv_power.columns):
129+
n = len(selected_pv_power.columns) - pv_power_zero_or_above_flag.sum()
130+
logger.debug(f"Will be removing {n} pv systems as they have negative values")
131+
132+
selected_pv_power = selected_pv_power.loc[:, pv_power_zero_or_above_flag]
133+
125134
return selected_pv_power
126135

127136
def _get_central_pv_system_id(

nowcasting_dataset/data_sources/pv/pv_model.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
""" Model for output of PV data """
2-
from xarray.ufuncs import isinf, isnan
2+
3+
import logging
34

45
from nowcasting_dataset.data_sources.datasource_output import DataSourceOutput
56

7+
logger = logging.getLogger(__name__)
8+
69

710
class PV(DataSourceOutput):
811
""" Class to store PV data as a xr.Dataset with some validation """
@@ -12,10 +15,9 @@ class PV(DataSourceOutput):
1215

1316
@classmethod
1417
def model_validation(cls, v):
15-
""" Check that all values are not Nan, Infinite, or < 0."""
16-
assert (~isnan(v.data)).all(), "Some pv data values are NaNs"
17-
assert (~isinf(v.data)).all(), "Some pv data values are Infinite"
18-
assert (v.data >= 0).all(), "Some pv data values are below 0"
18+
""" Check that all values are non NaNs """
19+
v.check_nan_and_inf(data=v.data)
20+
v.check_dataset_greater_than_or_equal_to(data=v.data, min_value=0)
1921

2022
assert v.time is not None
2123
assert v.x_coords is not None
Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
""" Model for output of satellite data """
22
from __future__ import annotations
33

4-
from xarray.ufuncs import isinf, isnan
4+
import logging
55

66
from nowcasting_dataset.data_sources.datasource_output import DataSourceOutput
77

8+
logger = logging.getLogger(__name__)
9+
810

911
class Satellite(DataSourceOutput):
1012
""" Class to store satellite data as a xr.Dataset with some validation """
@@ -14,8 +16,8 @@ class Satellite(DataSourceOutput):
1416

1517
@classmethod
1618
def model_validation(cls, v):
17-
""" Check that all values are not NaN, Infinite, or -1."""
18-
assert (~isnan(v.data)).all(), "Some satellite data values are NaNs"
19-
assert (~isinf(v.data)).all(), "Some satellite data values are Infinite"
20-
assert (v.data != -1).all(), "Some satellite data values are -1's"
19+
""" Check that all values are non negative """
20+
v.check_nan_and_inf(data=v.data)
21+
# put this validation back in when issue is done
22+
v.check_dataset_not_equal(data=v.data, value=-1, raise_error=False)
2123
return v

nowcasting_dataset/data_sources/sun/sun_data_source.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
""" Loading Raw data """
2+
import logging
23
from dataclasses import dataclass
34
from datetime import datetime
45
from numbers import Number
@@ -14,6 +15,8 @@
1415
from nowcasting_dataset.data_sources.sun.sun_model import Sun
1516
from nowcasting_dataset.dataset.xr_utils import convert_data_array_to_dataset
1617

18+
logger = logging.getLogger(__name__)
19+
1720

1821
@dataclass
1922
class SunDataSource(DataSource):
@@ -85,6 +88,9 @@ def get_example(
8588
return Sun(sun)
8689

8790
def _load(self):
91+
92+
logger.info(f"Loading Sun data from {self.zarr_path}")
93+
8894
self.azimuth, self.elevation = load_from_zarr(
8995
zarr_path=self.zarr_path, start_dt=self.start_dt, end_dt=self.end_dt
9096
)
Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
""" Model for Sun features """
2-
from xarray.ufuncs import isinf, isnan
2+
import logging
33

44
from nowcasting_dataset.data_sources.datasource_output import DataSourceOutput
55

6+
logger = logging.getLogger(__name__)
7+
68

79
class Sun(DataSourceOutput):
810
""" Class to store Sun data as a xr.Dataset with some validation """
@@ -13,22 +15,21 @@ class Sun(DataSourceOutput):
1315
@classmethod
1416
def model_validation(cls, v):
1517
""" Check that all values are non NaNs """
16-
assert (~isnan(v.elevation)).all(), "Some elevation data values are NaNs"
17-
assert (~isinf(v.elevation)).all(), "Some elevation data values are Infinite"
18-
19-
assert (~isnan(v.azimuth)).all(), "Some azimuth data values are NaNs"
20-
assert (~isinf(v.azimuth)).all(), "Some azimuth data values are Infinite"
21-
22-
assert (0 <= v.azimuth).all(), f"Some azimuth data values are lower 0, {v.azimuth.min()}"
23-
assert (
24-
v.azimuth <= 360
25-
).all(), f"Some azimuth data values are greater than 360, {v.azimuth.max()}"
26-
27-
assert (
28-
-90 <= v.elevation
29-
).all(), f"Some elevation data values are lower -90, {v.elevation.min()}"
30-
assert (
31-
v.elevation <= 90
32-
).all(), f"Some elevation data values are greater than 90, {v.elevation.max()}"
18+
v.check_nan_and_inf(data=v.elevation, variable_name="elevation")
19+
v.check_nan_and_inf(data=v.azimuth, variable_name="azimuth")
20+
21+
v.check_dataset_greater_than_or_equal_to(
22+
data=v.azimuth, variable_name="azimuth", min_value=0
23+
)
24+
v.check_dataset_less_than_or_equal_to(
25+
data=v.azimuth, variable_name="azimuth", max_value=360
26+
)
27+
28+
v.check_dataset_greater_than_or_equal_to(
29+
data=v.elevation, variable_name="elevation", min_value=-90
30+
)
31+
v.check_dataset_less_than_or_equal_to(
32+
data=v.elevation, variable_name="elevation", max_value=90
33+
)
3334

3435
return v

0 commit comments

Comments
 (0)