Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit df8b3da

Browse files
Merge pull request #364 from openclimatefix/issue/fake-data-update
make pv coords more relastic
2 parents 1dd417a + 955c5c3 commit df8b3da

File tree

2 files changed

+94
-19
lines changed

2 files changed

+94
-19
lines changed

nowcasting_dataset/data_sources/fake.py

Lines changed: 87 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
convert_coordinates_to_indexes_for_list_datasets,
2222
join_list_dataset_to_batch_dataset,
2323
)
24+
from nowcasting_dataset.geospatial import lat_lon_to_osgb
2425

2526

2627
def gsp_fake(
@@ -196,32 +197,84 @@ def topographic_fake(batch_size, image_size_pixels):
196197
return Topographic(xr_dataset)
197198

198199

200+
def add_uk_centroid_osgb(x, y):
201+
"""
202+
Add an OSGB value to make in center of UK
203+
204+
Args:
205+
x: random values, OSGB
206+
y: random values, OSGB
207+
208+
Returns: X,Y random coordinates [OSGB]
209+
"""
210+
211+
# get random OSGB center in the UK
212+
lat = np.random.uniform(51, 55)
213+
lon = np.random.uniform(-2.5, 1)
214+
x_center, y_center = lat_lon_to_osgb(lat=lat, lon=lon)
215+
216+
# make average 0
217+
x = x - x.mean()
218+
y = y - y.mean()
219+
220+
# put in the uk
221+
x = x + x_center
222+
y = y + y_center
223+
224+
return x, y
225+
226+
227+
def create_random_point_coordinates_osgb(size: int):
228+
"""Make random coords [OSGB] for pv site, of gsp"""
229+
# this is about 100KM
230+
HUNDRED_KILOMETERS = 10 ** 5
231+
x = np.random.randint(0, HUNDRED_KILOMETERS, size)
232+
y = np.random.randint(0, HUNDRED_KILOMETERS, size)
233+
234+
return add_uk_centroid_osgb(x, y)
235+
236+
237+
def make_random_image_coords_osgb(size: int):
238+
"""Make random coords for image. These are ranges for the pixels"""
239+
240+
ONE_KILOMETER = 10 ** 3
241+
242+
# 4 kilometer spacing seemed about right for real satellite images
243+
x = 4 * ONE_KILOMETER * np.array((range(0, size)))
244+
y = 4 * ONE_KILOMETER * np.array((range(0, size)))
245+
246+
return add_uk_centroid_osgb(x, y)
247+
248+
199249
def create_image_array(
200250
dims=("time", "x", "y", "channels"),
201251
seq_length_5=19,
202252
image_size_pixels=64,
203253
channels=SAT_VARIABLE_NAMES,
204254
):
205255
"""Create Satellite or NWP fake image data"""
256+
257+
x, y = make_random_image_coords_osgb(size=image_size_pixels)
258+
206259
ALL_COORDS = {
207260
"time": pd.date_range("2021-01-01", freq="5T", periods=seq_length_5),
208-
"x": np.random.randint(low=0, high=1000, size=image_size_pixels),
209-
"y": np.random.randint(low=0, high=1000, size=image_size_pixels),
261+
"x": x,
262+
"y": y,
210263
"channels": np.array(channels),
211264
}
212265
coords = [(dim, ALL_COORDS[dim]) for dim in dims]
213266
image_data_array = xr.DataArray(
214-
abs(
215-
np.random.randn(
216-
seq_length_5,
217-
image_size_pixels,
218-
image_size_pixels,
219-
len(channels),
267+
abs( # to make sure average is about 100
268+
np.random.uniform(
269+
0,
270+
200,
271+
size=(seq_length_5, image_size_pixels, image_size_pixels, len(channels)),
220272
)
221273
),
222274
coords=coords,
223275
name="data",
224276
) # Fake data for testing!
277+
225278
return image_data_array
226279

227280

@@ -252,11 +305,24 @@ def create_gsp_pv_dataset(
252305
"id": np.random.choice(range(1000), number_of_systems, replace=False),
253306
}
254307
coords = [(dim, ALL_COORDS[dim]) for dim in dims]
308+
309+
# make pv yield
310+
data = np.random.randn(
311+
seq_length,
312+
number_of_systems,
313+
)
314+
data = data.clip(min=0)
315+
316+
# smooth the data, the convolution method smooeths that data across systems first,
317+
# and then a bit across time (depending what you set N)
318+
N = int(seq_length / 2)
319+
data = np.convolve(data.ravel(), np.ones(N) / N, mode="same").reshape(
320+
(seq_length, number_of_systems)
321+
)
322+
323+
# make into a Data Array
255324
data_array = xr.DataArray(
256-
np.random.randn(
257-
seq_length,
258-
number_of_systems,
259-
),
325+
data,
260326
coords=coords,
261327
) # Fake data for testing!
262328

@@ -267,20 +333,23 @@ def create_gsp_pv_dataset(
267333

268334
data = data_array.to_dataset(name="power_mw")
269335

336+
# make random coords
337+
x, y = create_random_point_coordinates_osgb(size=number_of_systems)
338+
270339
x_coords = xr.DataArray(
271-
data=np.sort(
272-
np.random.choice(range(2 * number_of_systems), number_of_systems, replace=False)
273-
),
340+
data=x,
274341
dims=["id"],
275342
)
276343

277344
y_coords = xr.DataArray(
278-
data=np.sort(
279-
np.random.choice(range(2 * number_of_systems), number_of_systems, replace=False)
280-
),
345+
data=y,
281346
dims=["id"],
282347
)
283348

349+
# make first coords centroid
350+
x_coords.data[0] = x_coords.data.mean()
351+
y_coords.data[0] = y_coords.data.mean()
352+
284353
data["capacity_mwp"] = capacity
285354
data["x_coords"] = x_coords
286355
data["y_coords"] = y_coords

nowcasting_dataset/data_sources/satellite/satellite_data_source.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def open(self) -> None:
4747
"""
4848
self._data = self._open_data()
4949
self._data = self._data.sel(variable=list(self.channels))
50+
if "variable" in self._data.dims:
51+
self._data = self._data.rename({"variable": "channels"})
5052

5153
def _open_data(self) -> xr.DataArray:
5254
return open_sat_data(zarr_path=self.zarr_path, consolidated=self.consolidated)
@@ -125,7 +127,9 @@ def get_example(
125127
y_center_osgb=y_meters_center,
126128
)
127129

128-
selected_data = selected_data.rename({"variable": "channels"})
130+
if "variable" in list(selected_data.dims):
131+
selected_data = selected_data.rename({"variable": "channels"})
132+
129133
selected_data = self._post_process_example(selected_data, t0_dt)
130134

131135
if selected_data.shape != self._shape_of_example:
@@ -281,6 +285,8 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray:
281285
)
282286

283287
data_array = dataset["stacked_eumetsat_data"]
288+
if "stacked_eumetsat_data" == data_array.name:
289+
data_array.name = "data"
284290
del dataset
285291

286292
# Flip coordinates to top-left first

0 commit comments

Comments
 (0)