Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit 5d03c35

Browse files
authored
Merge pull request #283 from openclimatefix/jack/remove_load_configuration_from_gcs
remove load_configuration_from_gcs()
2 parents 6be367d + 27e3e83 commit 5d03c35

File tree

5 files changed

+17
-46
lines changed

5 files changed

+17
-46
lines changed

nowcasting_dataset/config/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ See `model.py` for documentation of the expected configuration fields.
99

1010
See either `gcp.yaml` or `on_premises.yaml` for example config files.
1111

12+
All paths must include the protocol prefix. For local files,
13+
it's sufficient to just start with a '/'. For aws, start with 's3://',
14+
for gcp start with 'gs://'.
15+
16+
1217
# Example
1318

1419
```python

nowcasting_dataset/config/load.py

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
""" Loading configuration functions """
2-
import io
32
import logging
4-
import os
53
from typing import Union
64

75
import fsspec
8-
import gcsfs
96
import yaml
107
from pathy import Pathy
118

@@ -33,36 +30,3 @@ def load_yaml_configuration(filename: Union[str, Pathy]) -> Configuration:
3330
configuration = Configuration(**configuration)
3431

3532
return configuration
36-
37-
38-
def load_configuration_from_gcs(
39-
gcp_dir: str, bucket: str = "solar-pv-nowcasting-data", filename: str = "configuration.yaml"
40-
) -> Configuration:
41-
"""
42-
Load configuration from gcs
43-
44-
gcp_dir: the directory where the configruation is saved
45-
bucket: the gcs bucket to load from
46-
filename: the filename that will be loaded
47-
48-
Returns: configuration class
49-
"""
50-
logger.info("Loading configuration from gcs")
51-
52-
bucket_and_dir = os.path.join(f"gs://{bucket}", gcp_dir)
53-
filename = os.path.join(bucket_and_dir, filename)
54-
logger.debug(f"Will be opening {filename}")
55-
56-
# set up gcs
57-
gcs = gcsfs.GCSFileSystem(access="read_only")
58-
59-
# load the file into bytes
60-
with gcs.open(filename, mode="rb") as file:
61-
file_bytes = file.read()
62-
63-
# load the bytes to yaml
64-
with io.BytesIO(file_bytes) as file:
65-
data = yaml.load(file)
66-
67-
# put into pydantic class and returns
68-
return Configuration(**data)

nowcasting_dataset/config/model.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
""" Configuration model for the dataset """
1+
""" Configuration model for the dataset
2+
3+
All paths must include the protocol prefix. For local files,
4+
it's sufficient to just start with a '/'. For aws, start with 's3://',
5+
for gcp start with 'gs://'.
6+
"""
27
from datetime import datetime
38
from typing import Optional
49

@@ -152,11 +157,7 @@ class Sun(DataSourceMixin):
152157

153158
class InputData(BaseModel):
154159
"""
155-
Input data model
156-
157-
All paths must include the protocol prefix. For local files,
158-
it's sufficient to just start with a '/'. For aws, start with 's3://',
159-
for gcp start with 'gs://'.
160+
Input data model.
160161
"""
161162

162163
pv: PV = PV()

scripts/validate_ml_data.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import nowcasting_dataset
66
import torch
7-
from nowcasting_dataset.config.load import load_configuration_from_gcs, load_yaml_configuration
7+
from nowcasting_dataset.config.load import load_yaml_configuration
88
from nowcasting_dataset.dataset.datasets import NetCDFDataset, worker_init_fn
99
from nowcasting_dataset.dataset.validate import ValidatorDataset
1010
from nowcasting_dataset.cloud.utils import get_maximum_batch_id
@@ -17,7 +17,6 @@
1717

1818
# load configuration, this can be changed to a different filename as needed
1919
filename = os.path.join(os.path.dirname(nowcasting_dataset.__file__), "config", "gcp.yaml")
20-
config = load_configuration_from_gcs(gcp_dir="prepared_ML_training_data/v5/")
2120
config = load_yaml_configuration(filename=filename)
2221

2322
DST_NETCDF4_PATH = config.output_data.filepath

tests/config/test_config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88

99
import nowcasting_dataset
10-
from nowcasting_dataset.config.load import load_yaml_configuration, load_configuration_from_gcs
10+
from nowcasting_dataset.config.load import load_yaml_configuration
1111
from nowcasting_dataset.config.model import Configuration, set_git_commit
1212
from nowcasting_dataset.config.save import save_yaml_configuration
1313

@@ -87,7 +87,9 @@ def test_load_to_gcs():
8787
"""
8888
Check that configuration can be loaded to gcs
8989
"""
90-
config = load_configuration_from_gcs(gcp_dir="prepared_ML_training_data/v-default")
90+
config = load_yaml_configuration(
91+
filename="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v-default/configuration.yaml"
92+
)
9193

9294
assert isinstance(config, Configuration)
9395

0 commit comments

Comments
 (0)