Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit bb03b4f

Browse files
committed
Pull more variables from pvlive
1 parent 80d8857 commit bb03b4f

File tree

3 files changed

+49
-41
lines changed

3 files changed

+49
-41
lines changed

nowcasting_dataset/config/gcp.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ input_data:
88
#---------------------- GSP -------------------
99
gsp:
1010
forecast_minutes: 60
11-
gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v3/pv_gsp.zarr
11+
gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
1212
history_minutes: 60
1313

1414
#---------------------- NWP -------------------

nowcasting_dataset/data_sources/gsp/pvlive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def load_pv_gsp_raw_data_from_pvlive(
8787
# limit the total number of concurrent tasks to be 4, so that we don't hit the pvlive api
8888
# too much
8989
future_tasks = []
90-
with futures.ThreadPoolExecutor(max_workers=4) as executor:
90+
with futures.ThreadPoolExecutor(max_workers=1) as executor:
9191
for gsp_id in gsp_ids:
9292

9393
# set the first chunk start and end times

scripts/generate_raw_data/get_raw_pv_gsp_data.py

Lines changed: 47 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
logging.getLogger().setLevel(logging.DEBUG)
3030
logging.getLogger("urllib3").setLevel(logging.WARNING)
3131

32-
config_filename = Pathy(nowcasting_dataset.__file__).parent / "config" / "gcp.yaml"
32+
config_filename = Pathy.fluid(nowcasting_dataset.__file__).parent / "config" / "gcp.yaml"
3333
config = load_yaml_configuration(config_filename)
3434

3535
start = datetime(2016, 1, 1, tzinfo=pytz.utc)
@@ -42,42 +42,50 @@
4242
LOCAL_TEMP_PATH = Path("~/temp/").expanduser()
4343
delete_all_files_in_temp_path(path=LOCAL_TEMP_PATH)
4444

45-
# get data
46-
data_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, normalize_data=False)
4745

48-
# pivot to index as datetime_gmt, and columns as gsp_id
49-
data_generation = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="generation_mw")
50-
data_generation_xarray = xr.DataArray(
51-
data_generation, name="generation_mw", dims=["datetime_gmt", "gsp_id"]
52-
)
53-
54-
data_capacity = data_df.pivot(
55-
index="datetime_gmt", columns="gsp_id", values="installedcapacity_mwp"
56-
)
57-
data_capacity_xarray = xr.DataArray(
58-
data_capacity, name="installedcapacity_mwp", dims=["datetime_gmt", "gsp_id"]
59-
)
60-
61-
data_xarray = xr.merge([data_generation_xarray, data_capacity_xarray])
62-
63-
# save config to file
64-
with open(os.path.join(LOCAL_TEMP_PATH, "configuration.yaml"), "w+") as f:
65-
yaml.dump(config_gsp, f, allow_unicode=True)
66-
67-
# Make encoding
68-
encoding = {
69-
var: {"compressor": numcodecs.Blosc(cname="zstd", clevel=5)} for var in data_xarray.data_vars
70-
}
71-
72-
# save data to file
73-
data_xarray.to_zarr(os.path.join(LOCAL_TEMP_PATH, "pv_gsp.zarr"), mode="w", encoding=encoding)
74-
75-
# upload to gcp
76-
upload_and_delete_local_files(dst_path=gcp_path, local_path=LOCAL_TEMP_PATH)
77-
78-
79-
# # code to change 'generation_mw' to 'generation_normalised'
80-
# data_xarray = xr.open_dataset(gcp_path + '/pv_gsp.zarr', engine="zarr")
81-
# data_xarray.__setitem__('gsp_id', [int(gsp_id) for gsp_id in data_xarray.gsp_id])
82-
# data_xarray = data_xarray.rename({"generation_mw": "generation_normalised"})
83-
# data_xarray.to_zarr(gcp_path + '/pv_gsp.zarr', mode="w", encoding=encoding)
46+
def fetch_data():
47+
# get data
48+
data_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, normalize_data=False)
49+
50+
# pivot to index as datetime_gmt, and columns as gsp_id
51+
data_generation_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="generation_mw")
52+
data_installedcapacity_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="installedcapacity_mwp")
53+
data_capacity_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="capacity_mwp")
54+
data_updated_gmt_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="updated_gmt")
55+
data_xarray = xr.Dataset(
56+
data_vars={
57+
"generation_mw": (("datetime_gmt", "gsp_id"), data_generation_df),
58+
"installedcapacity_mwp": (("datetime_gmt", "gsp_id"), data_installedcapacity_df),
59+
"capacity_mwp": (("datetime_gmt", "gsp_id"), data_capacity_df),
60+
"updated_gmt": (("datetime_gmt", "gsp_id"), data_updated_gmt_df),
61+
},
62+
coords={
63+
"datetime_gmt": data_generation_df.index,
64+
"gsp_id": data_generation_df.columns
65+
},
66+
)
67+
68+
# save config to file
69+
with open(os.path.join(LOCAL_TEMP_PATH, "configuration.yaml"), "w+") as f:
70+
yaml.dump(config_gsp, f, allow_unicode=True)
71+
72+
# Make encoding
73+
encoding = {
74+
var: {"compressor": numcodecs.Blosc(cname="zstd", clevel=5)} for var in data_xarray.data_vars
75+
}
76+
77+
# save data to file
78+
data_xarray.to_zarr(os.path.join(LOCAL_TEMP_PATH, "pv_gsp.zarr"), mode="w", encoding=encoding)
79+
80+
# upload to gcp
81+
upload_and_delete_local_files(dst_path=gcp_path, local_path=LOCAL_TEMP_PATH)
82+
83+
# # code to change 'generation_mw' to 'generation_normalised'
84+
# data_xarray = xr.open_dataset(gcp_path + '/pv_gsp.zarr', engine="zarr")
85+
# data_xarray.__setitem__('gsp_id', [int(gsp_id) for gsp_id in data_xarray.gsp_id])
86+
# data_xarray = data_xarray.rename({"generation_mw": "generation_normalised"})
87+
# data_xarray.to_zarr(gcp_path + '/pv_gsp.zarr', mode="w", encoding=encoding)
88+
89+
90+
if __name__ == "__main__":
91+
fetch_data()

0 commit comments

Comments
 (0)