Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit 68f66c2

Browse files
Merge pull request #676 from openclimatefix/issue/675-sun-data-on-the-fly
add option to load sun data on the fly
2 parents b74b985 + 10a78a8 commit 68f66c2

File tree

3 files changed

+82
-27
lines changed

3 files changed

+82
-27
lines changed

nowcasting_dataset/config/model.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,15 @@ class Sun(DataSourceMixin):
483483
"gs://solar-pv-nowcasting-data/Sun/v1/sun.zarr/",
484484
description="Path to the Sun data source i.e Azimuth and Elevation",
485485
)
486+
load_live: bool = Field(
487+
False, description="Option to load sun data on the fly, rather than from file"
488+
)
489+
490+
elevation_limit: int = Field(
491+
10,
492+
description="The limit to the elevations for examples. "
493+
"Datetimes below this limits will be ignored",
494+
)
486495

487496

488497
class InputData(Base):

nowcasting_dataset/data_sources/sun/sun_data_source.py

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
""" Loading Raw data """
22
import logging
33
from dataclasses import dataclass
4+
from datetime import datetime
45
from numbers import Number
56
from pathlib import Path
67
from typing import List, Tuple, Union
@@ -14,7 +15,7 @@
1415
from nowcasting_dataset.data_sources.metadata.metadata_model import SpaceTimeLocation
1516
from nowcasting_dataset.data_sources.sun.raw_data_load_save import load_from_zarr, x_y_to_name
1617
from nowcasting_dataset.data_sources.sun.sun_model import Sun
17-
from nowcasting_dataset.geospatial import calculate_azimuth_and_elevation_angle
18+
from nowcasting_dataset.geospatial import calculate_azimuth_and_elevation_angle, osgb_to_lat_lon
1819

1920
logger = logging.getLogger(__name__)
2021

@@ -24,6 +25,8 @@ class SunDataSource(DataSource):
2425
"""Add azimuth and elevation angles of the sun."""
2526

2627
zarr_path: Union[str, Path]
28+
load_live: bool = False
29+
elevation_limit: int = 10
2730

2831
def __post_init__(self):
2932
"""Post Init"""
@@ -37,7 +40,8 @@ def get_data_model_for_batch():
3740

3841
def check_input_paths_exist(self) -> None:
3942
"""Check input paths exist. If not, raise a FileNotFoundError."""
40-
nd_fs_utils.check_path_exists(self.zarr_path)
43+
if not self.load_live:
44+
nd_fs_utils.check_path_exists(self.zarr_path)
4145

4246
def get_example(self, location: SpaceTimeLocation) -> xr.Dataset:
4347
"""
@@ -64,26 +68,40 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset:
6468
start_dt = self._get_start_dt(t0_datetime_utc)
6569
end_dt = self._get_end_dt(t0_datetime_utc)
6670

67-
# The names of the columns get truncated when saving, therefore we need to look for the
68-
# name of the columns near the location we are looking for
69-
locations = np.array(
70-
[[float(z.split(",")[0]), float(z.split(",")[1])] for z in self.azimuth.columns]
71-
)
72-
location = locations[
73-
np.isclose(locations[:, 0], x_center_osgb) & np.isclose(locations[:, 1], y_center_osgb)
74-
]
75-
# lets make sure there is atleast one
76-
assert len(location) > 0
77-
# Take the first location, and x and y coordinates are the first and center entries in
78-
# this array.
79-
location = location[0]
80-
# make name of column to pull data from. The columns name will be about
81-
# something like '22222.555,3333.6666'
82-
name = x_y_to_name(x=location[0], y=location[1])
83-
84-
del x_center_osgb, y_center_osgb
85-
azimuth = self.azimuth.loc[start_dt:end_dt][name]
86-
elevation = self.elevation.loc[start_dt:end_dt][name]
71+
if not self.load_live:
72+
73+
# The names of the columns get truncated when saving, therefore we need to look for the
74+
# name of the columns near the location we are looking for
75+
locations = np.array(
76+
[[float(z.split(",")[0]), float(z.split(",")[1])] for z in self.azimuth.columns]
77+
)
78+
location = locations[
79+
np.isclose(locations[:, 0], x_center_osgb)
80+
& np.isclose(locations[:, 1], y_center_osgb)
81+
]
82+
# lets make sure there is atleast one
83+
assert len(location) > 0
84+
# Take the first location, and x and y coordinates are the first and center entries in
85+
# this array.
86+
location = location[0]
87+
# make name of column to pull data from. The columns name will be about
88+
# something like '22222.555,3333.6666'
89+
name = x_y_to_name(x=location[0], y=location[1])
90+
91+
del x_center_osgb, y_center_osgb
92+
azimuth = self.azimuth.loc[start_dt:end_dt][name]
93+
elevation = self.elevation.loc[start_dt:end_dt][name]
94+
95+
else:
96+
97+
latitude, longitude = osgb_to_lat_lon(x=x_center_osgb, y=y_center_osgb)
98+
99+
datestamps = pd.date_range(start=start_dt, end=end_dt, freq="5T").tolist()
100+
azimuth_elevation = calculate_azimuth_and_elevation_angle(
101+
latitude=latitude, longitude=longitude, datestamps=datestamps
102+
)
103+
azimuth = azimuth_elevation["azimuth"]
104+
elevation = azimuth_elevation["elevation"]
87105

88106
azimuth = azimuth.to_xarray().rename({"index": "time"})
89107
elevation = elevation.to_xarray().rename({"index": "time"})
@@ -97,7 +115,8 @@ def _load(self):
97115

98116
logger.info(f"Loading Sun data from {self.zarr_path}")
99117

100-
self.azimuth, self.elevation = load_from_zarr(zarr_path=self.zarr_path)
118+
if not self.load_live:
119+
self.azimuth, self.elevation = load_from_zarr(zarr_path=self.zarr_path)
101120

102121
def get_locations(
103122
self, t0_datetimes_utc: pd.DatetimeIndex
@@ -112,13 +131,20 @@ def datetime_index(self) -> pd.DatetimeIndex:
112131
latitude = 51
113132
longitude = 0
114133

134+
if not self.load_live:
135+
datestamps = self.elevation.index
136+
else:
137+
datestamps = pd.date_range(
138+
datetime(2019, 1, 1), datetime(2019, 12, 31, 23, 55), freq="5T"
139+
)
140+
115141
# get elevation for all datetimes
116142
azimuth_elevation = calculate_azimuth_and_elevation_angle(
117-
latitude=latitude, longitude=longitude, datestamps=self.elevation.index
143+
latitude=latitude, longitude=longitude, datestamps=datestamps
118144
)
119145

120146
# only select elevations > 10
121-
mask = azimuth_elevation["elevation"] >= 10
147+
mask = azimuth_elevation["elevation"] >= self.elevation_limit
122148

123149
# create warnings, so we know how many datetimes will be dropped.
124150
# Should be slightly more than half as its night time 50% of the time
@@ -128,7 +154,7 @@ def datetime_index(self) -> pd.DatetimeIndex:
128154
f"out of {len(azimuth_elevation)} as elevation is < 10"
129155
)
130156

131-
datetimes = self.elevation[mask].index
157+
datetimes = datestamps[mask]
132158

133159
# Sun data is only for 2019, so to expand on these by
134160
# repeating data from 2014 to 2023

tests/data_sources/sun/test_sun_data_source.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
def test_init(test_data_folder): # noqa 103
99
zarr_path = test_data_folder + "/sun/test.zarr"
1010

11-
_ = SunDataSource(zarr_path=zarr_path, history_minutes=30, forecast_minutes=60)
11+
sun = SunDataSource(zarr_path=zarr_path, history_minutes=30, forecast_minutes=60)
12+
_ = sun.datetime_index()
1213

1314

1415
def test_get_example(test_data_folder): # noqa 103
@@ -43,3 +44,22 @@ def test_get_example_different_year(test_data_folder): # noqa 103
4344

4445
assert len(example.elevation) == 19
4546
assert len(example.azimuth) == 19
47+
48+
49+
def test_get_load_live(): # noqa 103
50+
51+
sun_data_source = SunDataSource(
52+
zarr_path="", history_minutes=30, forecast_minutes=60, load_live=True
53+
)
54+
_ = sun_data_source.datetime_index()
55+
56+
x = 256895.63164759654
57+
y = 666180.3018829626
58+
start_dt = pd.Timestamp("2021-04-01 12:00:00.000")
59+
60+
example = sun_data_source.get_example(
61+
location=SpaceTimeLocation(t0_datetime_utc=start_dt, x_center_osgb=x, y_center_osgb=y)
62+
)
63+
64+
assert len(example.elevation) == 19
65+
assert len(example.azimuth) == 19

0 commit comments

Comments
 (0)