Skip to content

Commit fe35c4f

Browse files
committed
Save progress - Prototype url search
1 parent cf437bd commit fe35c4f

File tree

5 files changed

+294
-153
lines changed

5 files changed

+294
-153
lines changed

climateset/download/cmip6_downloader.py

Lines changed: 12 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
1-
from pyesgf.search import SearchConnection
2-
31
from climateset.download.abstract_downloader import AbstractDownloader
42
from climateset.download.constants.esgf import CMIP6
53
from climateset.download.downloader_config import (
64
CMIP6DownloaderConfig,
75
create_cmip6_downloader_config_from_file,
86
)
9-
from climateset.download.utils import (
10-
download_model_variable,
11-
get_upload_version,
12-
handle_base_search_constraints,
13-
)
7+
from climateset.download.utils import search_and_download_esgf_model_single_var
148
from climateset.utils import create_logger
159

1610
LOGGER = create_logger(__name__)
@@ -71,78 +65,19 @@ def download_from_model_single_var(
7165
preferred_version: data upload version, if 'latest', the newest version will get selected always
7266
default_grid_label: default gridding method in which the data is provided
7367
"""
74-
conn = SearchConnection(url=self.config.node_link, distrib=False)
75-
76-
facets = (
77-
"project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, "
78-
"version, grid_label, experiment_id"
79-
)
80-
81-
self.logger.info("Using download_from_model_single_var() function")
82-
83-
ctx = conn.new_context(
84-
project=project,
85-
experiment_id=experiment,
86-
source_id=model,
68+
results_list = search_and_download_esgf_model_single_var(
69+
model=model,
8770
variable=variable,
88-
facets=facets,
71+
experiment=experiment,
72+
project=project,
73+
default_frequency=default_frequency,
74+
default_grid_label=default_grid_label,
75+
preferred_version=preferred_version,
76+
ensemble_members=self.config.ensemble_members,
77+
max_ensemble_members=self.config.max_ensemble_members,
78+
base_path=self.config.data_dir,
8979
)
90-
91-
ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label)
92-
93-
variants = list(ctx.facet_counts["variant_label"])
94-
95-
if len(variants) < 1:
96-
self.logger.info(
97-
"No items were found for this request. Please check on the esgf server if the combination of your "
98-
"model/scenarios/variables exists."
99-
)
100-
raise ValueError(
101-
f"Downloader did not find any items on esgf for your request with: Project {project}, "
102-
f"Experiment {experiment}, Model {model}, Variable {variable}."
103-
)
104-
105-
self.logger.info(f"Available variants : {variants}\n")
106-
self.logger.info(f"Length : {len(variants)}")
107-
108-
# TODO refactor logic of if/else
109-
if not self.config.ensemble_members:
110-
if self.config.max_ensemble_members > len(variants):
111-
self.logger.info("Less ensemble members available than maximum number desired. Including all variants.")
112-
ensemble_member_final_list = variants
113-
else:
114-
self.logger.info(
115-
f"{len(variants)} ensemble members available than desired (max {self.config.max_ensemble_members}. "
116-
f"Choosing only the first {self.config.max_ensemble_members}.)."
117-
)
118-
ensemble_member_final_list = variants[: self.config.max_ensemble_members]
119-
else:
120-
self.logger.info(f"Desired list of ensemble members given: {self.config.ensemble_members}")
121-
ensemble_member_final_list = list(set(variants) & set(self.config.ensemble_members))
122-
if len(ensemble_member_final_list) == 0:
123-
self.logger.info("WARNING: no overlap between available and desired ensemble members!")
124-
self.logger.info("Skipping.")
125-
return
126-
127-
for ensemble_member in ensemble_member_final_list:
128-
self.logger.info(f"Ensembles member: {ensemble_member}")
129-
ctx_ensemble = ctx.constrain(variant_label=ensemble_member)
130-
131-
version = get_upload_version(context=ctx, preferred_version=preferred_version)
132-
if version:
133-
ctx_ensemble = ctx_ensemble.constrain(version=version)
134-
135-
results = ctx_ensemble.search()
136-
137-
self.logger.info(f"Result len {len(results)}")
138-
139-
download_model_variable(
140-
project=CMIP6,
141-
model_id=model,
142-
search_results=results,
143-
variable=variable,
144-
base_path=self.config.data_dir,
145-
)
80+
self.logger.info(f"Download results: {results_list}")
14681

14782

14883
def cmip6_download_from_config(config):
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
NODE_LINK_URLS = [
2+
"https://esgf-node.llnl.gov/esg-search",
3+
"https://esgf.ceda.ac.uk/esg-search",
4+
"https://esgf-data.dkrz.de/esg-search",
5+
"https://esgf-node.ipsl.upmc.fr/esg-search",
6+
"https://esg-dn1.nsc.liu.se/esg-search",
7+
"https://esgf.nci.org.au/esg-search",
8+
"https://esgf.nccs.nasa.gov/esg-search",
9+
"https://esgdata.gfdl.noaa.gov/esg-search",
10+
]

climateset/download/input4mips_downloader.py

Lines changed: 15 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
1-
from pyesgf.search import SearchConnection
2-
31
from climateset.download.abstract_downloader import AbstractDownloader
42
from climateset.download.constants.esgf import INPUT4MIPS
53
from climateset.download.downloader_config import (
64
Input4mipsDownloaderConfig,
75
create_input4mips_downloader_config_from_file,
86
)
97
from climateset.download.utils import (
10-
download_metadata_variable,
11-
download_raw_input_variable,
12-
get_upload_version,
13-
handle_base_search_constraints,
8+
search_and_download_esgf_biomass_single_var,
9+
search_and_download_esgf_raw_single_var,
1410
)
1511
from climateset.utils import create_logger
1612

@@ -67,38 +63,18 @@ def download_raw_input_single_var(
6763
"""
6864
self.logger.info("Using download_raw_input_single_var() function")
6965

70-
facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label"
71-
7266
# Search context is sensitive to order and sequence, which is why
7367
# it's done in different steps instead of putting everything in `new_context`
74-
conn = SearchConnection(url=self.config.node_link, distrib=False)
75-
ctx = conn.new_context(
76-
project=project,
68+
results_list = search_and_download_esgf_raw_single_var(
7769
variable=variable,
70+
project=project,
7871
institution_id=institution_id,
79-
facets=facets,
72+
default_grid_label=default_grid_label,
73+
default_frequency=default_frequency,
74+
preferred_version=preferred_version,
75+
data_dir=self.config.data_dir,
8076
)
81-
ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label)
82-
83-
mips_targets = list(ctx.facet_counts["target_mip"])
84-
self.logger.info(f"Available target mips: {mips_targets}")
85-
86-
for target in mips_targets:
87-
ctx_target = ctx.constrain(target_mip=target)
88-
version = get_upload_version(context=ctx_target, preferred_version=preferred_version)
89-
if version:
90-
ctx_target = ctx_target.constrain(version=version)
91-
92-
results = ctx_target.search()
93-
self.logger.info(f"Result len {len(results)}")
94-
if len(results) > 0:
95-
download_raw_input_variable(
96-
project=INPUT4MIPS,
97-
institution_id=institution_id,
98-
search_results=results,
99-
variable=variable,
100-
base_path=self.config.data_dir,
101-
)
77+
self.logger.info(f"Download results: {results_list}")
10278

10379
def download_meta_historic_biomassburning_single_var(
10480
self,
@@ -123,38 +99,20 @@ def download_meta_historic_biomassburning_single_var(
12399
variable_id = variable.replace("_", "-")
124100
variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}"
125101
self.logger.info(variable, variable_id, institution_id)
126-
facets = "nominal_resolution,version"
127102

128103
# Search context is sensitive to order and sequence, which is why
129104
# it's done in different steps instead of putting everything in `new_context`
130-
conn = SearchConnection(url=self.config.node_link, distrib=False)
131-
ctx = conn.new_context(
132-
project=project,
105+
results = search_and_download_esgf_biomass_single_var(
133106
variable=variable_search,
134107
variable_id=variable_id,
108+
project=project,
135109
institution_id=institution_id,
136-
target_mip="CMIP",
137-
facets=facets,
138-
)
139-
ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label)
140-
141-
version = get_upload_version(context=ctx, preferred_version=preferred_version)
142-
if version:
143-
ctx = ctx.constrain(version=version)
144-
145-
results = ctx.search()
146-
self.logger.info(f"Result len {len(results)}")
147-
148-
result_list = [r.file_context().search() for r in results]
149-
self.logger.info(f"List of results :\n{result_list}")
150-
151-
download_metadata_variable(
152-
project=INPUT4MIPS,
153-
institution_id=institution_id,
154-
search_results=results,
155-
variable=variable,
110+
default_grid_label=default_grid_label,
111+
default_frequency=default_frequency,
112+
preferred_version=preferred_version,
156113
base_path=self.config.data_dir,
157114
)
115+
self.logger.info(f"Download results: {results}")
158116

159117

160118
def input4mips_download_from_config(config):

0 commit comments

Comments
 (0)