From ce5fb25f1715e8949c2e43a92abf28a5de0dacba Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Fri, 4 Oct 2024 16:46:12 +0200 Subject: [PATCH 01/74] fix of GPF graphs from pull request by Aleksandr Tulenkov --- ost/graphs/S1_GRD2ARD/3_LSmap.xml | 1 - ost/graphs/S1_GRD2ARD/3_ML_TC.xml | 1 - ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml | 1 - ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml | 1 - 4 files changed, 4 deletions(-) diff --git a/ost/graphs/S1_GRD2ARD/3_LSmap.xml b/ost/graphs/S1_GRD2ARD/3_LSmap.xml index 1ec47f53..284a7e4d 100755 --- a/ost/graphs/S1_GRD2ARD/3_LSmap.xml +++ b/ost/graphs/S1_GRD2ARD/3_LSmap.xml @@ -48,7 +48,6 @@ false false true - false false false false diff --git a/ost/graphs/S1_GRD2ARD/3_ML_TC.xml b/ost/graphs/S1_GRD2ARD/3_ML_TC.xml index 39c21394..2ca9d487 100755 --- a/ost/graphs/S1_GRD2ARD/3_ML_TC.xml +++ b/ost/graphs/S1_GRD2ARD/3_ML_TC.xml @@ -39,7 +39,6 @@ false false true - false false false false diff --git a/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml b/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml index 44bc70d8..e3f29736 100755 --- a/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml +++ b/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml @@ -39,7 +39,6 @@ false false true - false false false false diff --git a/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml b/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml index cd711ddc..a0f7f97a 100755 --- a/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml +++ b/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml @@ -52,7 +52,6 @@ false false true - false false false false From 366b40b86e208902d0f10f48dab443d0e0c62d12 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Fri, 4 Oct 2024 08:27:40 +0200 Subject: [PATCH 02/74] make Dockerfile work on Ubuntu 20 --- Dockerfile | 22 ++++++++++++++++------ requirements.txt | 42 +++++++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 27 deletions(-) mode change 100755 => 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile index 16004cb1..50e94995 100755 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,13 @@ ENV TBX="esa-snap_sentinel_unix_${TBX_VERSION}_${TBX_SUBVERSION}.sh" \ HOME=/home/ost \ PATH=$PATH:/home/ost/programs/snap/bin:/home/ost/programs/OTB-${OTB_VERSION}-Linux64/bin +RUN apt-get update && apt-get install -yq wget libquadmath0 + +RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/libgfortran3_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i libgfortran3_6.4.0-17ubuntu1_amd64.deb + # install all dependencies RUN groupadd -r ost && \ useradd -r -g ost ost && \ @@ -29,7 +36,6 @@ RUN groupadd -r ost && \ libgdal-dev \ python3-gdal \ libspatialindex-dev \ - libgfortran3 \ wget \ unzip \ imagemagick \ @@ -46,7 +52,7 @@ RUN alias python=python3 && \ rm $TBX && \ rm snap.varfile && \ cd /home/ost/programs && \ - wget https://www.orfeo-toolbox.org/packages/${OTB} && \ + wget https://www.orfeo-toolbox.org/packages/archives/OTB/${OTB} && \ chmod +x $OTB && \ ./${OTB} && \ rm -f OTB-${OTB_VERSION}-Linux64.run @@ -60,11 +66,15 @@ RUN /home/ost/programs/snap/bin/snap --nosplash --nogui --modules --update-all 2 # set usable memory to 12G RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions +COPY requirements.txt $HOME + # get OST and tutorials -RUN python3 -m pip install git+https://github.com/ESA-PhiLab/OpenSarToolkit.git && \ - git clone https://github.com/ESA-PhiLab/OST_Notebooks && \ - jupyter labextension install @jupyter-widgets/jupyterlab-manager && \ - jupyter nbextension enable --py widgetsnbextension +RUN python3 -m pip install git+https://github.com/ESA-PhiLab/OpenSarToolkit.git -c requirements.txt && \ + git clone https://github.com/ESA-PhiLab/OST_Notebooks + +#RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager +#RUN jupyter nbextension enable --py widgetsnbextension +RUN pip install widgetsnbextension EXPOSE 8888 CMD jupyter lab --ip='0.0.0.0' --port=8888 --no-browser --allow-root diff --git a/requirements.txt b/requirements.txt old mode 100755 new mode 100644 index 50362e8c..f205cc5d --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,22 @@ -descartes -fiona -gdal>=2 -godale -pyproj>=2.1 -geopandas>=0.8 -jupyterlab -matplotlib -numpy -pandas -psycopg2-binary -rasterio -requests -scipy -shapely -tqdm -imageio -rtree -retrying -pytest +descartes==1.1.0 +fiona==1.9.6 +gdal==3.0.4 +godale==0.3 +pyproj==3.5.0 +geopandas==0.13.2 +jupyterlab==4.2.3 +matplotlib==3.7.5 +numpy==1.24.4 +pandas==1.5.3 +psycopg2-binary==2.9.9 +rasterio==1.3.10 +requests==2.32.3 +scipy==1.10.1 +shapely==2.0.5 +tqdm==4.66.4 +imageio==2.34.2 +rtree==1.3.0 +retrying==1.3.3 +pytest==8.2.2 pytest-cov -pytest-runner \ No newline at end of file +pytest-runner From c30ed836ce7e2b342d9311e82d814e0ddc0bbe5c Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Fri, 4 Oct 2024 08:28:30 +0200 Subject: [PATCH 03/74] add support for CDSE as replacement for SciHub --- ost/helpers/copernicus.py | 203 +++++++++++++++++++++++++++++++++++++- ost/s1/download.py | 12 ++- ost/s1/s1scene.py | 55 +++++++++-- 3 files changed, 258 insertions(+), 12 deletions(-) diff --git a/ost/helpers/copernicus.py b/ost/helpers/copernicus.py index fb729354..984a6563 100644 --- a/ost/helpers/copernicus.py +++ b/ost/helpers/copernicus.py @@ -2,11 +2,14 @@ import getpass import logging +import multiprocessing +import urllib from pathlib import Path from datetime import datetime as dt - +import tqdm import requests from shapely.wkt import loads +from ost.helpers import helpers as h logger = logging.getLogger(__name__) @@ -21,6 +24,32 @@ def ask_credentials(): return uname, pword +def connect(uname=None, pword=None, base_url="https://catalogue.dataspace.copernicus.eu"): + """Generates an opener for the Copernicus apihub/dhus + + :param uname: username of Copernicus' CDSE + :type uname: str + :param pword: password of Copernicus' CDSE + :type pword: str + :param base_url: + :return: an urllib opener instance for Copernicus' CDSE + :rtype: opener object + """ + + if not uname: + print(" If you do not have a CDSE user" " account go to: https://browser.dataspace.copernicus.eu") + uname = input(" Your CDSE Username:") + + if not pword: + pword = getpass.getpass(" Your CDSE Password:") + + # create opener + manager = urllib.request.HTTPPasswordMgrWithDefaultRealm() + manager.add_password(None, base_url, uname, pword) + handler = urllib.request.HTTPBasicAuthHandler(manager) + opener = urllib.request.build_opener(handler) + + return opener def get_access_token(username, password: None): @@ -176,3 +205,175 @@ def get_advanced_metadata(metafile, access_token): beginposition, endposition, acqdate, 0 # placeholder for size ) + + +def s1_download(uuid, filename, uname, pword, base_url="https://catalogue.dataspace.copernicus.eu"): + """Single scene download function for CDSE + + :param uuid: product's uuid + :param filename: local path for the download + :param uname: username of CDSE + :param pword: password of CDSE + :param base_url: + + :return: + """ + + # get out the arguments + if isinstance(filename, str): + filename = Path(filename) + + # check if file is partially downloaded + first_byte = filename.stat().st_size if filename.exists() else 0 + + # ask for credentials in case they are not defined as input + if not uname or not pword: + ask_credentials() + + # define url + url = f"{base_url}/odata/v1/Products({uuid})/$value" + + # get first response for file Size + access_token = get_access_token(uname, pword) + # we use some random url for checking (also for czech mirror) + with requests.Session() as session: + headers = {'Authorization': f'Bearer {access_token}', + "Range": f"bytes={first_byte}-"} + request = session.request("get", url) + response = session.get(request.url, headers=headers, stream=True) + + # check response + if response.status_code == 401: + raise ValueError(" ERROR: Username/Password are incorrect.") + elif response.status_code != 200: + print(" ERROR: Something went wrong, will try again in 30 seconds.") + response.raise_for_status() + + # get download size + remaining_length = int(response.headers.get("content-length", 0)) + print(f"{filename.name} {first_byte=} {remaining_length=}") + if remaining_length == 0: + return + + # define chunk_size + chunk_size = 8192 + + # actual download + with open(filename, "ab") as file: + for chunk in response.iter_content(chunk_size): + if chunk: + file.write(chunk) + #pbar.update(len(chunk)) + #print(f"reading {filename.name} {len(chunk)}") + else: + print(f"reading {filename.name} empty chunk") + print(f"{filename.name} downloaded, {filename.stat().st_size=}") + + logger.info(f"Checking zip archive {filename.name} for consistency") + zip_test = h.check_zipfile(filename) + + # if it did not pass the test, remove the file + # in the while loop it will be downloaded again + if zip_test is not None: + logger.info(f"{filename.name} did not pass the zip test. Re-downloading " f"the full scene.") + #filename.unlink() + #first_byte = 0 + raise ValueError(f"zip test failed for {filename.name}") + # otherwise we change the status to downloaded + logger.info(f"{filename.name} passed the zip test.") + with open(filename.with_suffix(".downloaded"), "w") as file: + file.write("successfully downloaded \n") + + +def s1_download_parallel(argument_list): + """Helper function for parallel download from scihub""" + + uuid, filename, uname, pword, base_url = argument_list + s1_download(uuid, filename, uname, pword, base_url) + + +def batch_download( + inventory_df, + download_dir, + uname, + pword, + concurrent=2, + base_url="https://catalogue.dataspace.copernicus.eu", +): + """Batch download Sentinel-1 on the basis of an OST inventory GeoDataFrame + + :param inventory_df: + :param download_dir: + :param uname: + :param pword: + :param concurrent: + :param base_url: + + :return: + """ + from ost import Sentinel1Scene as S1Scene + + if isinstance(download_dir, str): + download_dir = Path(download_dir) + + # create list of scenes + scenes = inventory_df["identifier"].tolist() + + check, i = False, 1 + while check is False and i <= 10: + + download_list = [] + for scene_id in scenes: + scene = S1Scene(scene_id) + filepath = scene.download_path(download_dir, True) + + try: + uuid = inventory_df["uuid"][inventory_df["identifier"] == scene_id].tolist() + except KeyError: + #uuid = [scene.scihub_uuid(connect(uname=uname, pword=pword, base_url=base_url))] + print("cannot find uuid in inventory " + str(inventory_df)) + raise + + if Path(f"{filepath}.downloaded").exists(): + logger.debug(f"{scene.scene_id} is already downloaded.") + else: + # create list objects for download + download_list.append([uuid[0], filepath, uname, pword, base_url]) + + if download_list: + pool = multiprocessing.Pool(processes=concurrent) + pool.map(s1_download_parallel, download_list) + + downloaded_scenes = list(download_dir.glob("**/*.downloaded")) + + if len(inventory_df["identifier"].tolist()) == len(downloaded_scenes): + logger.info("All products are downloaded.") + check = True + else: + check = False + for scene in scenes: + + scene = S1Scene(scene) + file_path = scene.download_path(download_dir) + + if file_path.with_suffix(".downloaded").exists(): + scenes.remove(scene.scene_id) + + i += 1 + + +def check_connection(uname, pword, base_url="https://catalogue.dataspace.copernicus.eu"): + """Check if a connection with CDSE can be established + :param uname: + :param pword: + :param base_url: + :return: + """ + access_token = get_access_token(uname, pword) + # we use some random url for checking (also for czech mirror) + url = f"{base_url}/odata/v1/Products(8f30a536-c01c-4ef4-ac74-be3378dc44c4)/$value" + with requests.Session() as session: + headers = {'Authorization': f'Bearer {access_token}'} + request = session.request("head", url) + response = session.get(request.url, headers=headers, stream=True) + return response.status_code diff --git a/ost/s1/download.py b/ost/s1/download.py index 9277f434..9fbef73e 100644 --- a/ost/s1/download.py +++ b/ost/s1/download.py @@ -15,7 +15,7 @@ from ost.s1.s1scene import Sentinel1Scene as S1Scene from ost.helpers import helpers as h -from ost.helpers import scihub, peps, asf, onda # , asf_wget +from ost.helpers import scihub, peps, asf, onda, copernicus # , asf_wget logger = logging.getLogger(__name__) @@ -90,9 +90,10 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un print(" (2) Alaska Satellite Facility (NASA, full archive)") print(" (3) PEPS (CNES, 1 year rolling archive)") print(" (4) ONDA DIAS (ONDA DIAS full archive for SLC -" " or GRD from 30 June 2019)") + print(" (5) CDSE") # print(' (5) Alaska Satellite Facility (using WGET - ' # 'unstable - use only if 2 does not work)') - mirror = input(" Type 1, 2, 3, or 4: ") + mirror = input(" Type 1, 2, 3, 4, or 5: ") if not uname: print(" Please provide username for the selected server") @@ -118,7 +119,8 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un error_code = peps.check_connection(uname, pword) elif int(mirror) == 4: error_code = onda.check_connection(uname, pword) - # elif int(mirror) == 5: + elif int(mirror) == 5: + error_code = copernicus.check_connection(uname, pword) # error_code = asf_wget.check_connection(uname, pword) # hidden option for downloading from czech mirror elif int(mirror) == 321: @@ -128,7 +130,7 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un if error_code == 401: raise ValueError("Username/Password are incorrect") - elif error_code != 200: + elif error_code != 200 and error_code != 301 and error_code != 404: raise ValueError(f"Some connection error. Error code {error_code}.") # download in parallel @@ -140,6 +142,8 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un peps.batch_download(inventory_df, download_dir, uname, pword, concurrent) elif int(mirror) == 4: # ONDA DIAS onda.batch_download(inventory_df, download_dir, uname, pword, concurrent) + elif int(mirror) == 5: # CDSE + copernicus.batch_download(inventory_df, download_dir, uname, pword, concurrent) if int(mirror) == 321: # scihub czech mirror scihub.batch_download( inventory_df, diff --git a/ost/s1/s1scene.py b/ost/s1/s1scene.py index dd63227f..630d7ab7 100644 --- a/ost/s1/s1scene.py +++ b/ost/s1/s1scene.py @@ -37,7 +37,7 @@ import pandas as pd import geopandas as gpd -from ost.helpers import scihub, peps, onda, asf, raster as ras, helpers as h +from ost.helpers import scihub, peps, onda, asf, raster as ras, helpers as h, copernicus from ost.helpers.settings import APIHUB_BASEURL, OST_ROOT from ost.helpers.settings import set_log_level, check_ard_parameters from ost.s1.grd_to_ard import grd_to_ard, ard_to_rgb @@ -244,7 +244,7 @@ def info_dict(self): return inf_dict - def download(self, download_dir, mirror=None): + def download(self, download_dir, mirror=None, uname=None, pword=None): if not mirror: logger.info("One or more of your scenes need to be downloaded.") @@ -253,9 +253,10 @@ def download(self, download_dir, mirror=None): print(" (2) Alaska Satellite Facility (NASA, full archive)") print(" (3) PEPS (CNES, 1 year rolling archive)") print(" (4) ONDA DIAS (ONDA DIAS full archive for" " SLC - or GRD from 30 June 2019)") + print(" (5) CDSE") # print(' (5) Alaska Satellite Facility (using WGET' # ' - unstable - use only if 2 fails)') - mirror = input(" Type 1, 2, 3, or 4: ") + mirror = input(" Type 1, 2, 3, 4, or 5: ") from ost.s1 import download @@ -263,16 +264,19 @@ def download(self, download_dir, mirror=None): download_dir = Path(download_dir) if mirror == "1": - uname, pword = scihub.ask_credentials() + if uname is None or pword is None: + uname, pword = scihub.ask_credentials() opener = scihub.connect(uname=uname, pword=pword) df = pd.DataFrame({"identifier": [self.scene_id], "uuid": [self.scihub_uuid(opener)]}) elif mirror == "2": - uname, pword = asf.ask_credentials() + if uname is None or pword is None: + uname, pword = asf.ask_credentials() df = pd.DataFrame({"identifier": [self.scene_id]}) elif mirror == "3": - uname, pword = peps.ask_credentials() + if uname is None or pword is None: + uname, pword = peps.ask_credentials() df = pd.DataFrame( { "identifier": [self.scene_id], @@ -280,9 +284,16 @@ def download(self, download_dir, mirror=None): } ) elif mirror == "4": - uname, pword = onda.ask_credentials() + if uname is None or pword is None: + uname, pword = onda.ask_credentials() opener = onda.connect(uname=uname, pword=pword) df = pd.DataFrame({"identifier": [self.scene_id], "uuid": [self.ondadias_uuid(opener)]}) + elif mirror == "5": + if uname is None or pword is None: + uname, pword = copernicus.ask_credentials() + opener = copernicus.connect(uname=uname, pword=pword) + df = pd.DataFrame({"identifier": [self.scene_id], "uuid": [self.copernicus_uuid(opener)]}) + else: raise ValueError("You entered the wrong mirror.") # else: # ASF @@ -645,6 +656,36 @@ def safe_annotation_get(self, download_dir, data_mount=None): return gdf_final.drop_duplicates(["AnxTime"], keep="first") + def copernicus_uuid(self, opener): + + # construct the basic the url + base_url = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=" + + # request + action = urllib.parse.quote(f"Name eq '{self.scene_id}.SAFE'") + + # construct the download url + url = base_url + action + + try: + # get the request + req = opener.open(url) + except URLError as error: + if hasattr(error, "reason"): + logger.info(f"{CONNECTION_ERROR}{error.reason}") + sys.exit() + elif hasattr(error, "code"): + logger.info(f"{CONNECTION_ERROR_2}{error.reason}") + sys.exit() + else: + # write the request to to the response variable + # (i.e. the xml coming back from scihub) + response = req.read().decode("utf-8") + + # return uuid from response + # "Id":"1b64f9bb-2e8e-58ec-abac-45f4f5b61d22","Name":"S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.SAFE" + return response.split('"Id":"')[1].split('","Name":')[0] + # onda dias uuid extractor def ondadias_uuid(self, opener): From cc5fba931066884478a7e5b8a5d60a26d4734535 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Fri, 4 Oct 2024 08:30:48 +0200 Subject: [PATCH 04/74] ignore IDEA ide files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 10d04cff..f37d901f 100755 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,4 @@ dmypy.json # IDE .vscode +.idea From 508a5e698096c49b756285e564edac6167717dd4 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Tue, 8 Oct 2024 10:17:03 +0200 Subject: [PATCH 05/74] add first application script for pre-processing --- ost/app/__init__.py | 0 ost/app/preprocessing.py | 121 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 ost/app/__init__.py create mode 100644 ost/app/preprocessing.py diff --git a/ost/app/__init__.py b/ost/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py new file mode 100644 index 00000000..74cea670 --- /dev/null +++ b/ost/app/preprocessing.py @@ -0,0 +1,121 @@ +import sys +from pathlib import Path +from pprint import pprint +from ost import Sentinel1Scene +import click + +#from ost.helpers.settings import set_log_level +#import logging +#set_log_level(logging.DEBUG) + +@click.command() +@click.argument("input") +@click.option("--ard-type", + type=click.Choice(['OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine']), + default='Earth Engine') +@click.option("--with-speckle-filter", + default=False) +@click.option("--resampling-method", + type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), + default="BILINEAR_INTERPOLATION") +@click.option("--resolution", + default=100) +def run( + input: str, + resolution: int, + with_speckle_filter: bool, + resampling_method: str +): + # get home folder + #home = Path.home() + home = "." + + # create a processing directory + #output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') + #output_dir.mkdir(parents=True, exist_ok=True) + #print(str(output_dir)) + output_dir = "." + + # create a S1Scene class instance based on the scene identifier of the first ever Dual-Pol Sentinel-1 IW product + + #--------------------------------------------------- + # Some scenes to choose from + + # very first IW (VV/VH) S1 image available over Istanbul/Turkey + # NOTE:only available via ASF data mirror + #scene_id = 'S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04' + #scene_id = 'S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB' + scene_id = input[input.rfind("/")+1:input.rfind(".")] + + # other scenes with different scene types to process (uncomment) + # IW scene (dual-polarised HH/HV) over Norway/Spitzbergen + # scene_id = 'S1B_IW_GRDH_1SDH_20200325T150411_20200325T150436_020850_02789D_2B85' + + # IW scene (single-polarised VV) over Ecuadorian Amazon + # scene_id = 'S1A_IW_GRDH_1SSV_20150205T232009_20150205T232034_004494_00583A_1C80' + + # EW scene (dual-polarised VV/VH) over Azores (needs a different DEM, see cell of ARD parameters below) + # scene_id = 'S1B_EW_GRDM_1SDV_20200303T193150_20200303T193250_020532_026E82_5CE9' + + # EW scene (dual-polarised HH/HV) over Greenland + # scene_id = 'S1B_EW_GRDM_1SDH_20200511T205319_20200511T205419_021539_028E4E_697E' + + # Stripmap mode S5 scene (dual-polarised VV/VH) over Germany + # scene_id = 'S1B_S5_GRDH_1SDV_20170104T052519_20170104T052548_003694_006587_86AB' + #--------------------------------------------------- + + # create an S1Scene instance + s1 = Sentinel1Scene(scene_id) + + # print summarising infos about the scene + s1.info() + + s1.download(output_dir, mirror="5", uname='martin.boettcher@brockmann-consult.de', pword='...') + + # Template ARD parameters + + # we change ARD type + # possible choices are: + # 'OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine' + #s1.update_ard_parameters('Earth-Engine') + s1.update_ard_parameters(ard_type) + print('-----------------------------------------------------------------------------------------------------------') + print('Dictionary of Earth Engine ARD parameters:') + print('-----------------------------------------------------------------------------------------------------------') + pprint(s1.ard_parameters['single_ARD']) + print('-----------------------------------------------------------------------------------------------------------') + + # Customised ARD parameters + + # we cusomize the resolution and image resampling + s1.ard_parameters['single_ARD']['resolution'] = resolution # set output resolution to 100m + s1.ard_parameters['single_ARD']['remove_speckle'] = with_speckle_filter # apply a speckle filter + s1.ard_parameters['single_ARD']['dem']['image_resampling'] = resampling_method # BICUBIC_INTERPOLATION is default + + # s1.ard_parameters['single_ARD']['product_type'] = 'RTC-gamma0' + + # uncomment this for the Azores EW scene + # s1.ard_parameters['single_ARD']['dem']['dem_name'] = 'GETASSE30' + print('-----------------------------------------------------------------------------------------------------------') + print('Dictionary of our customised ARD parameters for the final scene processing:') + print('-----------------------------------------------------------------------------------------------------------') + pprint(s1.ard_parameters['single_ARD']) + print('-----------------------------------------------------------------------------------------------------------') + + s1.create_ard( + infile=s1.get_path(output_dir), + out_dir=output_dir, + overwrite=True + ) + + print(' The path to our newly created ARD product can be obtained the following way:') + print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_dimap}") + + s1.create_rgb(outfile = output_dir.joinpath(f'{s1.start_date}.tif')) + + print(' The path to our newly created RGB product can be obtained the following way:') + print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") + +if __name__ == "__main__": + sys.exit(run()) + From cc708ecf59c3c52d11cf8cae4f4b1eb46298c41e Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Tue, 8 Oct 2024 10:28:16 +0200 Subject: [PATCH 06/74] add first application script for pre-processing --- ost/app/preprocessing.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 74cea670..e8339920 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -2,14 +2,11 @@ from pathlib import Path from pprint import pprint from ost import Sentinel1Scene -import click - -#from ost.helpers.settings import set_log_level -#import logging -#set_log_level(logging.DEBUG) - @click.command() @click.argument("input") +@click.argument("output-dir") +@click.option("--resolution", + default=100) @click.option("--ard-type", type=click.Choice(['OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine']), default='Earth Engine') @@ -18,11 +15,11 @@ @click.option("--resampling-method", type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), default="BILINEAR_INTERPOLATION") -@click.option("--resolution", - default=100) def run( input: str, + output_dir: str, resolution: int, + ard_type: str, with_speckle_filter: bool, resampling_method: str ): @@ -34,7 +31,6 @@ def run( #output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') #output_dir.mkdir(parents=True, exist_ok=True) #print(str(output_dir)) - output_dir = "." # create a S1Scene class instance based on the scene identifier of the first ever Dual-Pol Sentinel-1 IW product @@ -116,6 +112,12 @@ def run( print(' The path to our newly created RGB product can be obtained the following way:') print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") +#from ost.helpers.settings import set_log_level +#import logging +#set_log_level(logging.DEBUG) + +import click + if __name__ == "__main__": sys.exit(run()) From fb4451883cb755b0a3851e3da1af66466f25158d Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Tue, 8 Oct 2024 11:06:34 +0200 Subject: [PATCH 07/74] add first application script for pre-processing --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f205cc5d..5892fc8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +click>=8.0 descartes==1.1.0 fiona==1.9.6 gdal==3.0.4 From ca4b547c2a774db38410eecd08db8532481fe9ed Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Tue, 8 Oct 2024 11:16:07 +0200 Subject: [PATCH 08/74] add first application script for pre-processing --- ost/app/preprocessing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index e8339920..36b44fae 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -2,6 +2,8 @@ from pathlib import Path from pprint import pprint from ost import Sentinel1Scene +import click + @click.command() @click.argument("input") @click.argument("output-dir") From 98867cecfd7d3658f8cb3667d0143896e8a44c62 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Tue, 8 Oct 2024 11:23:17 +0200 Subject: [PATCH 09/74] add first application script for pre-processing --- ost/app/preprocessing.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 36b44fae..88233370 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -17,13 +17,18 @@ @click.option("--resampling-method", type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), default="BILINEAR_INTERPOLATION") +@click.option("--cdse-user") +@click.option("--cdse-password") + def run( input: str, output_dir: str, resolution: int, ard_type: str, with_speckle_filter: bool, - resampling_method: str + resampling_method: str, + cdse_user: str, + cdse_password: str, ): # get home folder #home = Path.home() @@ -68,7 +73,7 @@ def run( # print summarising infos about the scene s1.info() - s1.download(output_dir, mirror="5", uname='martin.boettcher@brockmann-consult.de', pword='...') + s1.download(output_dir, mirror="5", uname=cdse_user, pword=cdse_password) # Template ARD parameters From 98ff312786d02a0ca301d92b01a0c08108f35d69 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Tue, 8 Oct 2024 11:31:20 +0200 Subject: [PATCH 10/74] add first application script for pre-processing --- ost/app/preprocessing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 88233370..f0ad68cf 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -10,8 +10,8 @@ @click.option("--resolution", default=100) @click.option("--ard-type", - type=click.Choice(['OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine']), - default='Earth Engine') + type=click.Choice(['OST_GTC', 'OST-RTC', 'CEOS', 'Earth-Engine']), + default='Earth-Engine') @click.option("--with-speckle-filter", default=False) @click.option("--resampling-method", From 3babde96b5d0adac23639805685b07c1b645f54b Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Tue, 8 Oct 2024 12:25:51 +0200 Subject: [PATCH 11/74] add first application script for pre-processing --- ost/app/preprocessing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index f0ad68cf..f9441764 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -38,6 +38,7 @@ def run( #output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') #output_dir.mkdir(parents=True, exist_ok=True) #print(str(output_dir)) + output_path = Path(output_dir) # create a S1Scene class instance based on the scene identifier of the first ever Dual-Pol Sentinel-1 IW product @@ -106,15 +107,15 @@ def run( print('-----------------------------------------------------------------------------------------------------------') s1.create_ard( - infile=s1.get_path(output_dir), - out_dir=output_dir, + infile=s1.get_path(output_path), + out_dir=output_path, overwrite=True ) print(' The path to our newly created ARD product can be obtained the following way:') print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_dimap}") - s1.create_rgb(outfile = output_dir.joinpath(f'{s1.start_date}.tif')) + s1.create_rgb(outfile = output_path.joinpath(f'{s1.start_date}.tif')) print(' The path to our newly created RGB product can be obtained the following way:') print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") From c8ead14de62d4c31f68a8fa785e4eb511db1e305 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 8 Oct 2024 17:13:34 +0200 Subject: [PATCH 12/74] Add CWL file for Notebook 1 workflow --- resources/opensar.cwl | 126 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 resources/opensar.cwl diff --git a/resources/opensar.cwl b/resources/opensar.cwl new file mode 100644 index 00000000..d826e255 --- /dev/null +++ b/resources/opensar.cwl @@ -0,0 +1,126 @@ +cwlVersion: v1.0 +$namespaces: + s: https://schema.org/ +s:softwareVersion: 1.0.0 +schemas: + - http://schema.org/version/9.0/schemaorg-current-http.rdf +$graph: + - class: Workflow + label: OST Notebook 1 + doc: Preprocessing an S1 image with OST + id: main + requirements: [] + inputs: + input: + type: Directory + label: Input S1 GRD + resolution: + type: int + label: Resolution + doc: Resolution in metres + ard-type: + type: + type: enum + symbols: + - OST_GTC + - OST-RTC + - CEOS + - Earth-Engine + label: ARD type + doc: Type of analysis-ready data to produce + with-speckle-filter: + type: boolean + label: Speckle filter + doc: Whether to apply a speckle filter + resampling-method: + type: + type: enum + symbols: + - BILINEAR_INTERPOLATION + - BICUBIC_INTERPOLATION + label: Resampling method + doc: Resampling method to use + cdse-user: + type: string + label: CDSE user + doc: CDSE user name + cdse-password: + type: string + label: CDSE password + doc: Password for the specified CDSE user + + outputs: + - id: stac_catalog + outputSource: + - run_script/ost_ard + type: Directory + + steps: + run_script: + run: "#ost_script_1" + in: + input: input + resolution: resolution + ard-type: ard-type + with-speckle-filter: with-speckle-filter + resampling-method: resampling-method + cdse-user: cdse-user + cdse-password: cdse-password + out: + - ost_ard + + - class: CommandLineTool + id: ost_script_1 + requirements: + DockerRequirement: + dockerPull: ost:v1 + + baseCommand: + - python3 + - /usr/local/lib/python3.8/dist-packages/ost/app/preprocessing.py + arguments: [] + inputs: + input: + type: Directory + inputBinding: + position: 1 + resolution: + type: int + inputBinding: + prefix: --resolution + ard-type: + type: + type: enum + symbols: + - OST_GTC + - OST-RTC + - CEOS + - Earth-Engine + inputBinding: + prefix: --ard-type + with-speckle-filter: + type: boolean + inputBinding: + prefix: --with-speckle-filter + resampling-method: + type: + type: enum + symbols: + - BILINEAR_INTERPOLATION + - BICUBIC_INTERPOLATION + inputBinding: + prefix: --resampling-method + cdse-user: + type: string + inputBinding: + prefix: --cdse-user + cdse-password: + type: string + inputBinding: + prefix: --cdse-password + + outputs: + ost_ard: + outputBinding: + glob: . + type: Directory From d1030b9133cba2ff2167fbc7930db4383f76cbab Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 12:13:22 +0200 Subject: [PATCH 13/74] attempt to fix conversion from DIMAP to TIFF --- ost/helpers/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ost/helpers/helpers.py b/ost/helpers/helpers.py index cca5618c..050bb9e3 100644 --- a/ost/helpers/helpers.py +++ b/ost/helpers/helpers.py @@ -102,7 +102,8 @@ def move_dimap(infile_prefix, outfile_prefix, to_tif): if to_tif: - gdal.Warp(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) + #gdal.Warp(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) + gdal.Translate(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) else: From 990f8c019a7bf1e489e771fd4a121f76507072bb Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 12:14:18 +0200 Subject: [PATCH 14/74] avoid access to CDSE if input is already available --- ost/helpers/copernicus.py | 14 ++++++-------- ost/s1/download.py | 4 +++- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ost/helpers/copernicus.py b/ost/helpers/copernicus.py index 984a6563..896cf4f0 100644 --- a/ost/helpers/copernicus.py +++ b/ost/helpers/copernicus.py @@ -326,17 +326,15 @@ def batch_download( for scene_id in scenes: scene = S1Scene(scene_id) filepath = scene.download_path(download_dir, True) - - try: - uuid = inventory_df["uuid"][inventory_df["identifier"] == scene_id].tolist() - except KeyError: - #uuid = [scene.scihub_uuid(connect(uname=uname, pword=pword, base_url=base_url))] - print("cannot find uuid in inventory " + str(inventory_df)) - raise - if Path(f"{filepath}.downloaded").exists(): logger.debug(f"{scene.scene_id} is already downloaded.") else: + try: + uuid = inventory_df["uuid"][inventory_df["identifier"] == scene_id].tolist() + except KeyError: + #uuid = [scene.scihub_uuid(connect(uname=uname, pword=pword, base_url=base_url))] + print("cannot find uuid in inventory " + str(inventory_df)) + raise # create list objects for download download_list.append([uuid[0], filepath, uname, pword, base_url]) diff --git a/ost/s1/download.py b/ost/s1/download.py index 9fbef73e..49ce560f 100644 --- a/ost/s1/download.py +++ b/ost/s1/download.py @@ -120,7 +120,9 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un elif int(mirror) == 4: error_code = onda.check_connection(uname, pword) elif int(mirror) == 5: - error_code = copernicus.check_connection(uname, pword) + # we avoid checking the connection, all products may be downloaded already + #error_code = copernicus.check_connection(uname, pword) + pass # error_code = asf_wget.check_connection(uname, pword) # hidden option for downloading from czech mirror elif int(mirror) == 321: From 5f5bd7d71ad47c64e232fcb3d1b054a4ec85f436 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 12:16:02 +0200 Subject: [PATCH 15/74] run in mounted external dir, create input dir structure, attempt to generate tiff --- ost/app/preprocessing.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index f9441764..417f3edc 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -1,4 +1,5 @@ import sys +import os from pathlib import Path from pprint import pprint from ost import Sentinel1Scene @@ -6,7 +7,6 @@ @click.command() @click.argument("input") -@click.argument("output-dir") @click.option("--resolution", default=100) @click.option("--ard-type", @@ -22,7 +22,6 @@ def run( input: str, - output_dir: str, resolution: int, ard_type: str, with_speckle_filter: bool, @@ -32,13 +31,13 @@ def run( ): # get home folder #home = Path.home() - home = "." + home = Path(".") # create a processing directory #output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') #output_dir.mkdir(parents=True, exist_ok=True) #print(str(output_dir)) - output_path = Path(output_dir) + output_path = Path("/home/ost/shared") # create a S1Scene class instance based on the scene identifier of the first ever Dual-Pol Sentinel-1 IW product @@ -50,6 +49,16 @@ def run( #scene_id = 'S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04' #scene_id = 'S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB' scene_id = input[input.rfind("/")+1:input.rfind(".")] + year = scene_id[17:21] + month = scene_id[21:23] + day = scene_id[23:25] + os.makedirs(f"SAR/GRD/{year}/{month}/{day}", exist_ok=True) + try: + os.link(input, f"SAR/GRD/{year}/{month}/{day}/{scene_id}.zip") + with open(f"SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", mode="w") as f: + f.write("successfully found here") + except: + pass # other scenes with different scene types to process (uncomment) # IW scene (dual-polarised HH/HV) over Norway/Spitzbergen @@ -74,7 +83,7 @@ def run( # print summarising infos about the scene s1.info() - s1.download(output_dir, mirror="5", uname=cdse_user, pword=cdse_password) + s1.download(output_path, mirror="5", uname=cdse_user, pword=cdse_password) # Template ARD parameters @@ -95,6 +104,7 @@ def run( s1.ard_parameters['single_ARD']['resolution'] = resolution # set output resolution to 100m s1.ard_parameters['single_ARD']['remove_speckle'] = with_speckle_filter # apply a speckle filter s1.ard_parameters['single_ARD']['dem']['image_resampling'] = resampling_method # BICUBIC_INTERPOLATION is default + s1.ard_parameters['single_ARD']['to_tif'] = True # s1.ard_parameters['single_ARD']['product_type'] = 'RTC-gamma0' @@ -115,10 +125,10 @@ def run( print(' The path to our newly created ARD product can be obtained the following way:') print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_dimap}") - s1.create_rgb(outfile = output_path.joinpath(f'{s1.start_date}.tif')) +# s1.create_rgb(outfile = output_path.joinpath(f'{s1.start_date}.tif')) - print(' The path to our newly created RGB product can be obtained the following way:') - print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") +# print(' The path to our newly created RGB product can be obtained the following way:') +# print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") #from ost.helpers.settings import set_log_level #import logging From d1e4963ff9f56a0390df920f3a0a2cdb8fbbbc2c Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 12:25:19 +0200 Subject: [PATCH 16/74] run in mounted external dir, create input dir structure, attempt to generate tiff --- ost/helpers/helpers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ost/helpers/helpers.py b/ost/helpers/helpers.py index 050bb9e3..6939f5ac 100644 --- a/ost/helpers/helpers.py +++ b/ost/helpers/helpers.py @@ -104,6 +104,10 @@ def move_dimap(infile_prefix, outfile_prefix, to_tif): #gdal.Warp(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) gdal.Translate(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) + image = gdal.Open(outfile_prefix.with_suffix(".tif"), 1) # 0 = read-only, 1 = read-write. + gdal.SetConfigOption('COMPRESS_OVERVIEW', 'DEFLATE') + image.BuildOverviews('NEAREST', [4, 8, 16, 32, 64, 128], gdal.TermProgress_nocb) + del image else: From 45f1f63d016b7796aa5031a52cfba68f3faabce4 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 12:41:26 +0200 Subject: [PATCH 17/74] run in mounted external dir, create input dir structure, attempt to generate tiff --- ost/app/preprocessing.py | 4 ++-- ost/s1/download.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 417f3edc..4943cad7 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -17,8 +17,8 @@ @click.option("--resampling-method", type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), default="BILINEAR_INTERPOLATION") -@click.option("--cdse-user") -@click.option("--cdse-password") +@click.option("--cdse-user", default="dummy") +@click.option("--cdse-password", default="dummy") def run( input: str, diff --git a/ost/s1/download.py b/ost/s1/download.py index 49ce560f..6d248142 100644 --- a/ost/s1/download.py +++ b/ost/s1/download.py @@ -122,7 +122,7 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un elif int(mirror) == 5: # we avoid checking the connection, all products may be downloaded already #error_code = copernicus.check_connection(uname, pword) - pass + error_code = 200 # error_code = asf_wget.check_connection(uname, pword) # hidden option for downloading from czech mirror elif int(mirror) == 321: From 8b7d2df2d13c71f7a1519c026629a0fd26f8c866 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 13:50:04 +0200 Subject: [PATCH 18/74] run in mounted external dir, create input dir structure, attempt to generate tiff --- ost/app/preprocessing.py | 9 +++++---- ost/helpers/copernicus.py | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 4943cad7..24dbc656 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -37,7 +37,8 @@ def run( #output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') #output_dir.mkdir(parents=True, exist_ok=True) #print(str(output_dir)) - output_path = Path("/home/ost/shared") + output_dir = "/home/ost/shared" + output_path = Path(output_dir) # create a S1Scene class instance based on the scene identifier of the first ever Dual-Pol Sentinel-1 IW product @@ -52,10 +53,10 @@ def run( year = scene_id[17:21] month = scene_id[21:23] day = scene_id[23:25] - os.makedirs(f"SAR/GRD/{year}/{month}/{day}", exist_ok=True) + os.makedirs(f"{output_dir}/SAR/GRD/{year}/{month}/{day}", exist_ok=True) try: - os.link(input, f"SAR/GRD/{year}/{month}/{day}/{scene_id}.zip") - with open(f"SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", mode="w") as f: + os.link(input, f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip") + with open(f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", mode="w") as f: f.write("successfully found here") except: pass diff --git a/ost/helpers/copernicus.py b/ost/helpers/copernicus.py index 896cf4f0..c57cc295 100644 --- a/ost/helpers/copernicus.py +++ b/ost/helpers/copernicus.py @@ -326,7 +326,8 @@ def batch_download( for scene_id in scenes: scene = S1Scene(scene_id) filepath = scene.download_path(download_dir, True) - if Path(f"{filepath}.downloaded").exists(): + logger.info(f"checking path {filepath}") + if Path(f"{filepath[:-4]}.downloaded").exists(): logger.debug(f"{scene.scene_id} is already downloaded.") else: try: From a5edd99fab5d9ca4a0265c6875ce73be0c1e50df Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 14:51:07 +0200 Subject: [PATCH 19/74] run in mounted external dir, create input dir structure, attempt to generate tiff --- ost/helpers/copernicus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ost/helpers/copernicus.py b/ost/helpers/copernicus.py index c57cc295..eedd3d08 100644 --- a/ost/helpers/copernicus.py +++ b/ost/helpers/copernicus.py @@ -327,7 +327,7 @@ def batch_download( scene = S1Scene(scene_id) filepath = scene.download_path(download_dir, True) logger.info(f"checking path {filepath}") - if Path(f"{filepath[:-4]}.downloaded").exists(): + if (filepath.parent() / filepath.stem + "downloaded").exists(): logger.debug(f"{scene.scene_id} is already downloaded.") else: try: From bf13b784ae1511da21b6b56b5e1cf12036d29888 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 15:13:26 +0200 Subject: [PATCH 20/74] run in mounted external dir, create input dir structure, attempt to generate tiff --- ost/helpers/copernicus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ost/helpers/copernicus.py b/ost/helpers/copernicus.py index eedd3d08..64cb3050 100644 --- a/ost/helpers/copernicus.py +++ b/ost/helpers/copernicus.py @@ -327,7 +327,7 @@ def batch_download( scene = S1Scene(scene_id) filepath = scene.download_path(download_dir, True) logger.info(f"checking path {filepath}") - if (filepath.parent() / filepath.stem + "downloaded").exists(): + if (filepath.parent / filepath.stem + ".downloaded").exists(): logger.debug(f"{scene.scene_id} is already downloaded.") else: try: From 343289ee016fd598f0f6429c7c4da119fd893779 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 15:48:31 +0200 Subject: [PATCH 21/74] run in mounted external dir, create input dir structure, attempt to generate tiff --- ost/helpers/copernicus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ost/helpers/copernicus.py b/ost/helpers/copernicus.py index 64cb3050..629eae8d 100644 --- a/ost/helpers/copernicus.py +++ b/ost/helpers/copernicus.py @@ -327,7 +327,7 @@ def batch_download( scene = S1Scene(scene_id) filepath = scene.download_path(download_dir, True) logger.info(f"checking path {filepath}") - if (filepath.parent / filepath.stem + ".downloaded").exists(): + if (filepath.parent / (filepath.stem + ".downloaded")).exists(): logger.debug(f"{scene.scene_id} is already downloaded.") else: try: From 0a37ca5479bd55d621197222da506cff788eacf4 Mon Sep 17 00:00:00 2001 From: martin-boettcher Date: Wed, 9 Oct 2024 22:11:02 +0200 Subject: [PATCH 22/74] generate tiff, place DEMs in working dir outside of container --- ost/app/preprocessing.py | 2 +- ost/generic/common_wrappers.py | 46 ++++++++++++++++++++ ost/helpers/helpers.py | 22 ++++++++-- resources/Dockerfile | 79 ++++++++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 4 deletions(-) create mode 100644 resources/Dockerfile diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 24dbc656..1710595e 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -124,7 +124,7 @@ def run( ) print(' The path to our newly created ARD product can be obtained the following way:') - print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_dimap}") + print(f"{s1.ard_dimap}") # s1.create_rgb(outfile = output_path.joinpath(f'{s1.start_date}.tif')) diff --git a/ost/generic/common_wrappers.py b/ost/generic/common_wrappers.py index da3e28fe..9cad7fbf 100644 --- a/ost/generic/common_wrappers.py +++ b/ost/generic/common_wrappers.py @@ -7,6 +7,7 @@ from ost.helpers import helpers as h from ost.helpers.settings import GPT_FILE, OST_ROOT from ost.helpers.errors import GPTRuntimeError, NotValidFileError +from multiprocessing import cpu_count logger = logging.getLogger(__name__) @@ -416,3 +417,48 @@ def mt_speckle_filter(in_stack, out_stack, logfile, config_dict): return str(out_stack.with_suffix(".dim")) else: raise NotValidFileError(f"Product did not pass file check: {return_code}") + + +@retry(stop_max_attempt_number=3, wait_fixed=1) +def convert_to_tiff( + input, + output, + logfile, +): + """ + Converts product into TIFF format + :param input: path to a product in BEAM-DIMAP format + :param output: path to the target TIFF file + :param logfile: SNAP logfile + :param config_dict: + :return: + """ + + # get relevant config parameters + cpus = cpu_count() # config_dict["snap_cpu_parallelism"] + + logger.debug("Converting to GeoTIFF") + + command = ( + f"{GPT_FILE} Subset " + f"-t {output} " + f"-f GeoTIFF " + f"-x -q {cpus} " + f"{input}" + ) + return_code = h.run_command(command, logfile) + + if return_code == 0: + logger.debug(f"Successfully created TIFF file {output}") + else: + raise GPTRuntimeError( + f"TIFF conversion of {input} exited with error {return_code}. " + f"See {logfile} for Snap's error message." + ) + + # do check routine + return_msg = h.check_out_tiff(output) + if return_msg == 0: + logger.debug("Product passed validity check.") + else: + raise NotValidFileError(f"Product did not pass file check: {return_msg}") diff --git a/ost/helpers/helpers.py b/ost/helpers/helpers.py index 6939f5ac..e51e0832 100644 --- a/ost/helpers/helpers.py +++ b/ost/helpers/helpers.py @@ -16,6 +16,7 @@ from pathlib import Path from datetime import timedelta from osgeo import gdal +from ost.generic.common_wrappers import convert_to_tiff logger = logging.getLogger(__name__) @@ -103,8 +104,23 @@ def move_dimap(infile_prefix, outfile_prefix, to_tif): if to_tif: #gdal.Warp(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) - gdal.Translate(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) - image = gdal.Open(outfile_prefix.with_suffix(".tif"), 1) # 0 = read-only, 1 = read-write. + convert_to_tiff( + infile_prefix.with_suffix(".dim"), + infile_prefix.with_suffix(".uncompressed.tif"), + outfile_prefix.with_suffix(".log"), + ) + # TODO use COG output format instead of writing GTiff and update afterwards + gdal.Translate( + str(outfile_prefix.with_suffix(".tif")), + str(infile_prefix.with_suffix(".uncompressed.tif")), + creationOptions={ + "TILED": "YES", + "BLOCKXSIZE": "512", + "BLOCKYSIZE": "512", + "COMPRESS": "DEFLATE", + } + ) + image = gdal.Open(str(outfile_prefix.with_suffix(".tif")), 1) # 0 = read-only, 1 = read-write. gdal.SetConfigOption('COMPRESS_OVERVIEW', 'DEFLATE') image.BuildOverviews('NEAREST', [4, 8, 16, 32, 64, 128], gdal.TermProgress_nocb) del image @@ -185,7 +201,7 @@ def check_out_tiff(file, test_stats=True): if test_stats: # open the file ds = gdal.Open(str(file)) - stats = ds.GetRasterBand(1).GetStatistics(0, 1) + stats = ds.GetRasterBand(1).ComputeStatistics(False) # if difference of min and max is 0 and mean are all 0 if stats[1] - stats[0] == 0 and stats[2] == 0: diff --git a/resources/Dockerfile b/resources/Dockerfile new file mode 100644 index 00000000..5f3b0944 --- /dev/null +++ b/resources/Dockerfile @@ -0,0 +1,79 @@ +FROM ubuntu:20.04 + +LABEL maintainer="Andreas Vollrath, FAO" +LABEL OpenSARToolkit='0.12.3' + +# set work directory to home and download snap +WORKDIR /home/ost + +# copy the snap installation config file into the container +COPY snap.varfile $HOME + +# update variables +ENV OTB_VERSION="7.3.0" \ + TBX_VERSION="8" \ + TBX_SUBVERSION="0" +ENV TBX="esa-snap_sentinel_unix_${TBX_VERSION}_${TBX_SUBVERSION}.sh" \ + SNAP_URL="http://step.esa.int/downloads/${TBX_VERSION}.${TBX_SUBVERSION}/installers" \ + OTB=OTB-${OTB_VERSION}-Linux64.run \ + HOME=/home/ost \ + PATH=$PATH:/home/ost/programs/snap/bin:/home/ost/programs/OTB-${OTB_VERSION}-Linux64/bin + +RUN apt-get update && apt-get install -yq wget libquadmath0 sudo + +RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/libgfortran3_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i libgfortran3_6.4.0-17ubuntu1_amd64.deb + +# install all dependencies +RUN groupadd -r ost && \ + useradd -r -g ost ost && \ + apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq \ + python3 \ + python3-pip \ + git \ + libgdal-dev \ + python3-gdal \ + libspatialindex-dev \ + wget \ + unzip \ + imagemagick \ + nodejs \ + npm + +RUN alias python=python3 && \ + rm -rf /var/lib/apt/lists/* && \ + python3 -m pip install jupyterlab && \ + mkdir /home/ost/programs && \ + wget $SNAP_URL/$TBX && \ + chmod +x $TBX && \ + ./$TBX -q -varfile snap.varfile && \ + rm $TBX && \ + rm snap.varfile && \ + cd /home/ost/programs && \ + wget https://www.orfeo-toolbox.org/packages/archives/OTB/${OTB} && \ + chmod +x $OTB && \ + ./${OTB} && \ + rm -f OTB-${OTB_VERSION}-Linux64.run + +# update snap to latest version +RUN /home/ost/programs/snap/bin/snap --nosplash --nogui --modules --update-all 2>&1 | while read -r line; do \ + echo "$line" && \ + [ "$line" = "updates=0" ] && sleep 2 && pkill -TERM -f "snap/jre/bin/java"; \ + done; exit 0 + +# set usable memory to 12G +RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions + +COPY constraints.txt $HOME + +# get OST and tutorials +RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git -c constraints.txt + +#RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager +#RUN jupyter nbextension enable --py widgetsnbextension +#RUN pip install widgetsnbextension + +#EXPOSE 8888 +#CMD jupyter lab --ip='0.0.0.0' --port=8888 --no-browser --allow-root From 2eaef4929a646783c4ea0db2d2105f30d53cec14 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 11 Oct 2024 17:39:54 +0200 Subject: [PATCH 23/74] Add STAC input and output to preprocessing.py preprocessing.py now conforms to EO Application Package best practice by interpreting its input not as a direct path to a data file but as the path to a directory containing a STAC catalog containing an item containing an asset giving the path to the actual input. Similarly, after processing, preprocessing.py also writes a STAC catalog describing the output data. --- ost/app/preprocessing.py | 57 ++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + requirements.txt | 1 + 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 1710595e..f7a03a07 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -1,9 +1,12 @@ +from datetime import datetime import sys import os +import pathlib from pathlib import Path from pprint import pprint from ost import Sentinel1Scene import click +import pystac @click.command() @click.argument("input") @@ -49,13 +52,18 @@ def run( # NOTE:only available via ASF data mirror #scene_id = 'S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04' #scene_id = 'S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB' - scene_id = input[input.rfind("/")+1:input.rfind(".")] + + # We expect input to be the path to a directory containing a STAC catalog + # which will lead us to the actual input zip. + input_path = get_zip_from_stac(input) + + scene_id = input_path[input_path.rfind("/")+1:input_path.rfind(".")] year = scene_id[17:21] month = scene_id[21:23] day = scene_id[23:25] os.makedirs(f"{output_dir}/SAR/GRD/{year}/{month}/{day}", exist_ok=True) try: - os.link(input, f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip") + os.link(input_path, f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip") with open(f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", mode="w") as f: f.write("successfully found here") except: @@ -126,6 +134,9 @@ def run( print(' The path to our newly created ARD product can be obtained the following way:') print(f"{s1.ard_dimap}") + # Write a STAC catalog and item pointing to the output product. + write_stac_for_dimap(input, str(s1.ard_dimap)) + # s1.create_rgb(outfile = output_path.joinpath(f'{s1.start_date}.tif')) # print(' The path to our newly created RGB product can be obtained the following way:') @@ -135,6 +146,48 @@ def run( #import logging #set_log_level(logging.DEBUG) + +def get_zip_from_stac(stac_root: str) -> str: + stac_path = pathlib.Path(stac_root) + catalog = pystac.Catalog.from_file(str(stac_path / "catalog.json")) + item_links = [link for link in catalog.links if link.rel == "item"] + assert(len(item_links) == 1) + item_link = item_links[0] + item = pystac.Item.from_file(str(stac_path / item_link.href)) + zip_assets = [ + asset for asset in item.assets.values() + if asset.media_type == "application/zip" + ] + assert(len(zip_assets) == 1) + zip_asset = zip_assets[0] + zip_path = stac_path / zip_asset.href + return str(zip_path) + + +def write_stac_for_dimap(stac_root: str, dimap_path: str) -> None: + asset = pystac.Asset( + roles=["data"], + href=dimap_path, + media_type="application/dimap" + ) + item = pystac.Item( + id="result-item", + # TODO use actual geometry and datetime + geometry=None, + bbox=None, + datetime=datetime.fromisoformat("2000-01-01T00:00:00+00:00"), + properties={}, # datetime will be filled in automatically + assets={"DIMAP": asset} + ) + catalog = pystac.Catalog( + id="catalog", + description="Root catalog", + href=f"{stac_root}/catalog.json" + ) + catalog.add_item(item) + catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) + + import click if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 95d059ee..a305cc36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "numpy", "pandas", "psycopg2-binary", + "pystac", "rasterio", "requests", "scipy", diff --git a/requirements.txt b/requirements.txt index 5892fc8e..ff2bb2d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ matplotlib==3.7.5 numpy==1.24.4 pandas==1.5.3 psycopg2-binary==2.9.9 +pystac rasterio==1.3.10 requests==2.32.3 scipy==1.10.1 From c7f77b4dd73e3ef75f749d67d3f68688eb31d97f Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 30 Oct 2024 11:35:12 +0100 Subject: [PATCH 24/74] preprocessing.py: reformat code --- ost/app/preprocessing.py | 137 +++++++++++++++++++++++---------------- 1 file changed, 82 insertions(+), 55 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index f7a03a07..3170a272 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -8,21 +8,23 @@ import click import pystac + @click.command() @click.argument("input") -@click.option("--resolution", - default=100) -@click.option("--ard-type", - type=click.Choice(['OST_GTC', 'OST-RTC', 'CEOS', 'Earth-Engine']), - default='Earth-Engine') -@click.option("--with-speckle-filter", - default=False) -@click.option("--resampling-method", - type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), - default="BILINEAR_INTERPOLATION") +@click.option("--resolution", default=100) +@click.option( + "--ard-type", + type=click.Choice(["OST_GTC", "OST-RTC", "CEOS", "Earth-Engine"]), + default="Earth-Engine", +) +@click.option("--with-speckle-filter", default=False) +@click.option( + "--resampling-method", + type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), + default="BILINEAR_INTERPOLATION", +) @click.option("--cdse-user", default="dummy") @click.option("--cdse-password", default="dummy") - def run( input: str, resolution: int, @@ -33,38 +35,44 @@ def run( cdse_password: str, ): # get home folder - #home = Path.home() + # home = Path.home() home = Path(".") # create a processing directory - #output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') - #output_dir.mkdir(parents=True, exist_ok=True) - #print(str(output_dir)) + # output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') + # output_dir.mkdir(parents=True, exist_ok=True) + # print(str(output_dir)) output_dir = "/home/ost/shared" output_path = Path(output_dir) # create a S1Scene class instance based on the scene identifier of the first ever Dual-Pol Sentinel-1 IW product - #--------------------------------------------------- + # --------------------------------------------------- # Some scenes to choose from # very first IW (VV/VH) S1 image available over Istanbul/Turkey # NOTE:only available via ASF data mirror - #scene_id = 'S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04' - #scene_id = 'S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB' + # scene_id = 'S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04' + # scene_id = 'S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB' # We expect input to be the path to a directory containing a STAC catalog # which will lead us to the actual input zip. input_path = get_zip_from_stac(input) - scene_id = input_path[input_path.rfind("/")+1:input_path.rfind(".")] + scene_id = input_path[input_path.rfind("/") + 1 : input_path.rfind(".")] year = scene_id[17:21] month = scene_id[21:23] day = scene_id[23:25] os.makedirs(f"{output_dir}/SAR/GRD/{year}/{month}/{day}", exist_ok=True) try: - os.link(input_path, f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip") - with open(f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", mode="w") as f: + os.link( + input_path, + f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip", + ) + with open( + f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", + mode="w", + ) as f: f.write("successfully found here") except: pass @@ -84,7 +92,7 @@ def run( # Stripmap mode S5 scene (dual-polarised VV/VH) over Germany # scene_id = 'S1B_S5_GRDH_1SDV_20170104T052519_20170104T052548_003694_006587_86AB' - #--------------------------------------------------- + # --------------------------------------------------- # create an S1Scene instance s1 = Sentinel1Scene(scene_id) @@ -99,66 +107,88 @@ def run( # we change ARD type # possible choices are: # 'OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine' - #s1.update_ard_parameters('Earth-Engine') + # s1.update_ard_parameters('Earth-Engine') s1.update_ard_parameters(ard_type) - print('-----------------------------------------------------------------------------------------------------------') - print('Dictionary of Earth Engine ARD parameters:') - print('-----------------------------------------------------------------------------------------------------------') - pprint(s1.ard_parameters['single_ARD']) - print('-----------------------------------------------------------------------------------------------------------') + print( + "-----------------------------------------------------------------------------------------------------------" + ) + print("Dictionary of Earth Engine ARD parameters:") + print( + "-----------------------------------------------------------------------------------------------------------" + ) + pprint(s1.ard_parameters["single_ARD"]) + print( + "-----------------------------------------------------------------------------------------------------------" + ) # Customised ARD parameters # we cusomize the resolution and image resampling - s1.ard_parameters['single_ARD']['resolution'] = resolution # set output resolution to 100m - s1.ard_parameters['single_ARD']['remove_speckle'] = with_speckle_filter # apply a speckle filter - s1.ard_parameters['single_ARD']['dem']['image_resampling'] = resampling_method # BICUBIC_INTERPOLATION is default - s1.ard_parameters['single_ARD']['to_tif'] = True + s1.ard_parameters["single_ARD"][ + "resolution" + ] = resolution # set output resolution to 100m + s1.ard_parameters["single_ARD"][ + "remove_speckle" + ] = with_speckle_filter # apply a speckle filter + s1.ard_parameters["single_ARD"]["dem"][ + "image_resampling" + ] = resampling_method # BICUBIC_INTERPOLATION is default + s1.ard_parameters["single_ARD"]["to_tif"] = True # s1.ard_parameters['single_ARD']['product_type'] = 'RTC-gamma0' # uncomment this for the Azores EW scene # s1.ard_parameters['single_ARD']['dem']['dem_name'] = 'GETASSE30' - print('-----------------------------------------------------------------------------------------------------------') - print('Dictionary of our customised ARD parameters for the final scene processing:') - print('-----------------------------------------------------------------------------------------------------------') - pprint(s1.ard_parameters['single_ARD']) - print('-----------------------------------------------------------------------------------------------------------') + print( + "-----------------------------------------------------------------------------------------------------------" + ) + print( + "Dictionary of our customised ARD parameters for the final scene processing:" + ) + print( + "-----------------------------------------------------------------------------------------------------------" + ) + pprint(s1.ard_parameters["single_ARD"]) + print( + "-----------------------------------------------------------------------------------------------------------" + ) s1.create_ard( - infile=s1.get_path(output_path), - out_dir=output_path, - overwrite=True + infile=s1.get_path(output_path), out_dir=output_path, overwrite=True ) - print(' The path to our newly created ARD product can be obtained the following way:') + print( + " The path to our newly created ARD product can be obtained the following way:" + ) print(f"{s1.ard_dimap}") # Write a STAC catalog and item pointing to the output product. - write_stac_for_dimap(input, str(s1.ard_dimap)) + write_stac_for_dimap(".", str(s1.ard_dimap)) # TODO change to .tif + # s1.create_rgb(outfile = output_path.joinpath(f'{s1.start_date}.tif')) # print(' The path to our newly created RGB product can be obtained the following way:') # print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") -#from ost.helpers.settings import set_log_level -#import logging -#set_log_level(logging.DEBUG) +# from ost.helpers.settings import set_log_level +# import logging +# set_log_level(logging.DEBUG) def get_zip_from_stac(stac_root: str) -> str: stac_path = pathlib.Path(stac_root) catalog = pystac.Catalog.from_file(str(stac_path / "catalog.json")) item_links = [link for link in catalog.links if link.rel == "item"] - assert(len(item_links) == 1) + assert len(item_links) == 1 item_link = item_links[0] item = pystac.Item.from_file(str(stac_path / item_link.href)) zip_assets = [ - asset for asset in item.assets.values() - if asset.media_type == "application/zip" + asset + for asset in item.assets.values() + if asset.media_type == "application/zip" ] - assert(len(zip_assets) == 1) + assert len(zip_assets) == 1 zip_asset = zip_assets[0] zip_path = stac_path / zip_asset.href return str(zip_path) @@ -166,9 +196,7 @@ def get_zip_from_stac(stac_root: str) -> str: def write_stac_for_dimap(stac_root: str, dimap_path: str) -> None: asset = pystac.Asset( - roles=["data"], - href=dimap_path, - media_type="application/dimap" + roles=["data"], href=dimap_path, media_type="application/dimap" ) item = pystac.Item( id="result-item", @@ -177,12 +205,12 @@ def write_stac_for_dimap(stac_root: str, dimap_path: str) -> None: bbox=None, datetime=datetime.fromisoformat("2000-01-01T00:00:00+00:00"), properties={}, # datetime will be filled in automatically - assets={"DIMAP": asset} + assets={"DIMAP": asset}, ) catalog = pystac.Catalog( id="catalog", description="Root catalog", - href=f"{stac_root}/catalog.json" + href=f"{stac_root}/catalog.json", ) catalog.add_item(item) catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) @@ -192,4 +220,3 @@ def write_stac_for_dimap(stac_root: str, dimap_path: str) -> None: if __name__ == "__main__": sys.exit(run()) - From eea07968ffe3a382b93439df17e2ba3b33e8c7c9 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 30 Oct 2024 12:25:06 +0100 Subject: [PATCH 25/74] Refactor and tidy up preprocessing.py --- ost/app/preprocessing.py | 112 ++++++++++++++------------------------- 1 file changed, 40 insertions(+), 72 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 3170a272..2f388ab3 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -3,7 +3,7 @@ import os import pathlib from pathlib import Path -from pprint import pprint +import pprint from ost import Sentinel1Scene import click import pystac @@ -34,9 +34,24 @@ def run( cdse_user: str, cdse_password: str, ): - # get home folder - # home = Path.home() - home = Path(".") + horizontal_line = "-" * 80 + + scene_presets = { + # very first IW (VV/VH) S1 image available over Istanbul/Turkey + # NOTE:only available via ASF data mirror + "istanbul": "S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04", + "unknown": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB", + # IW scene (dual-polarised HH/HV) over Norway/Spitzbergen + "spitzbergen": "S1B_IW_GRDH_1SDH_20200325T150411_20200325T150436_020850_02789D_2B85", + # IW scene (single-polarised VV) over Ecuadorian Amazon + "ecuador": "S1A_IW_GRDH_1SSV_20150205T232009_20150205T232034_004494_00583A_1C80", + # EW scene (dual-polarised VV/VH) over Azores (needs a different DEM, see ARD parameters below) + "azores": "S1B_EW_GRDM_1SDV_20200303T193150_20200303T193250_020532_026E82_5CE9", + # EW scene (dual-polarised HH/HV) over Greenland + "greenland": "S1B_EW_GRDM_1SDH_20200511T205319_20200511T205419_021539_028E4E_697E", + # Stripmap mode S5 scene (dual-polarised VV/VH) over Germany + "germany": "S1B_S5_GRDH_1SDV_20170104T052519_20170104T052548_003694_006587_86AB", + } # create a processing directory # output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') @@ -45,16 +60,6 @@ def run( output_dir = "/home/ost/shared" output_path = Path(output_dir) - # create a S1Scene class instance based on the scene identifier of the first ever Dual-Pol Sentinel-1 IW product - - # --------------------------------------------------- - # Some scenes to choose from - - # very first IW (VV/VH) S1 image available over Istanbul/Turkey - # NOTE:only available via ASF data mirror - # scene_id = 'S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04' - # scene_id = 'S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB' - # We expect input to be the path to a directory containing a STAC catalog # which will lead us to the actual input zip. input_path = get_zip_from_stac(input) @@ -77,24 +82,7 @@ def run( except: pass - # other scenes with different scene types to process (uncomment) - # IW scene (dual-polarised HH/HV) over Norway/Spitzbergen - # scene_id = 'S1B_IW_GRDH_1SDH_20200325T150411_20200325T150436_020850_02789D_2B85' - - # IW scene (single-polarised VV) over Ecuadorian Amazon - # scene_id = 'S1A_IW_GRDH_1SSV_20150205T232009_20150205T232034_004494_00583A_1C80' - - # EW scene (dual-polarised VV/VH) over Azores (needs a different DEM, see cell of ARD parameters below) - # scene_id = 'S1B_EW_GRDM_1SDV_20200303T193150_20200303T193250_020532_026E82_5CE9' - - # EW scene (dual-polarised HH/HV) over Greenland - # scene_id = 'S1B_EW_GRDM_1SDH_20200511T205319_20200511T205419_021539_028E4E_697E' - - # Stripmap mode S5 scene (dual-polarised VV/VH) over Germany - # scene_id = 'S1B_S5_GRDH_1SDV_20170104T052519_20170104T052548_003694_006587_86AB' - # --------------------------------------------------- - - # create an S1Scene instance + # create a S1Scene class instance based on the specified scene identifier s1 = Sentinel1Scene(scene_id) # print summarising infos about the scene @@ -102,63 +90,45 @@ def run( s1.download(output_path, mirror="5", uname=cdse_user, pword=cdse_password) + single_ard = s1.ard_parameters["single_ARD"] + # Template ARD parameters - # we change ARD type - # possible choices are: - # 'OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine' - # s1.update_ard_parameters('Earth-Engine') + # Set ARD type. Choices: 'OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine' s1.update_ard_parameters(ard_type) print( - "-----------------------------------------------------------------------------------------------------------" + f"{horizontal_line}\n" + f"Dictionary of Earth Engine ARD parameters:\n" + f"{horizontal_line}\n" + f"{pprint.pformat(single_ard)}\n" + f"{horizontal_line}" ) - print("Dictionary of Earth Engine ARD parameters:") - print( - "-----------------------------------------------------------------------------------------------------------" - ) - pprint(s1.ard_parameters["single_ARD"]) - print( - "-----------------------------------------------------------------------------------------------------------" - ) - - # Customised ARD parameters - # we cusomize the resolution and image resampling - s1.ard_parameters["single_ARD"][ - "resolution" - ] = resolution # set output resolution to 100m - s1.ard_parameters["single_ARD"][ - "remove_speckle" - ] = with_speckle_filter # apply a speckle filter - s1.ard_parameters["single_ARD"]["dem"][ + # Customize ARD parameters + single_ard["resolution"] = resolution + single_ard["remove_speckle"] = with_speckle_filter + single_ard["dem"][ "image_resampling" - ] = resampling_method # BICUBIC_INTERPOLATION is default - s1.ard_parameters["single_ARD"]["to_tif"] = True - - # s1.ard_parameters['single_ARD']['product_type'] = 'RTC-gamma0' + ] = resampling_method # default: BICUBIC_INTERPOLATION + single_ard["to_tif"] = True + # single_ard['product_type'] = 'RTC-gamma0' # uncomment this for the Azores EW scene # s1.ard_parameters['single_ARD']['dem']['dem_name'] = 'GETASSE30' - print( - "-----------------------------------------------------------------------------------------------------------" - ) + print(horizontal_line) print( "Dictionary of our customised ARD parameters for the final scene processing:" ) - print( - "-----------------------------------------------------------------------------------------------------------" - ) - pprint(s1.ard_parameters["single_ARD"]) - print( - "-----------------------------------------------------------------------------------------------------------" - ) + print(horizontal_line) + pprint.pprint(single_ard) + print(horizontal_line) s1.create_ard( infile=s1.get_path(output_path), out_dir=output_path, overwrite=True ) print( - " The path to our newly created ARD product can be obtained the following way:" + "The path to our newly created ARD product can be obtained the following way:" ) print(f"{s1.ard_dimap}") @@ -216,7 +186,5 @@ def write_stac_for_dimap(stac_root: str, dimap_path: str) -> None: catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) -import click - if __name__ == "__main__": sys.exit(run()) From 254befb0443d85e1a5005de204e3385cfe5dff4e Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 30 Oct 2024 13:12:32 +0100 Subject: [PATCH 26/74] preprocessing.py: more refactoring --- ost/app/preprocessing.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 2f388ab3..853ffecc 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -34,12 +34,13 @@ def run( cdse_user: str, cdse_password: str, ): - horizontal_line = "-" * 80 + horizontal_line = "-" * 79 scene_presets = { # very first IW (VV/VH) S1 image available over Istanbul/Turkey # NOTE:only available via ASF data mirror "istanbul": "S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04", + # ??? "unknown": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB", # IW scene (dual-polarised HH/HV) over Norway/Spitzbergen "spitzbergen": "S1B_IW_GRDH_1SDH_20200325T150411_20200325T150436_020850_02789D_2B85", @@ -115,32 +116,27 @@ def run( # uncomment this for the Azores EW scene # s1.ard_parameters['single_ARD']['dem']['dem_name'] = 'GETASSE30' - print(horizontal_line) + print( - "Dictionary of our customised ARD parameters for the final scene processing:" + f"{horizontal_line}\n", + "Dictionary of customized ARD parameters for final scene processing:\n" + f"{horizontal_line}\n", + f"{pprint.pformat(single_ard)}\n", + f"{horizontal_line}", ) - print(horizontal_line) - pprint.pprint(single_ard) - print(horizontal_line) s1.create_ard( infile=s1.get_path(output_path), out_dir=output_path, overwrite=True ) - - print( - "The path to our newly created ARD product can be obtained the following way:" - ) - print(f"{s1.ard_dimap}") + print(f"Path to newly created ARD product: {s1.ard_dimap}") + # s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) + # print("Path to newly created RGB product:") + # print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") # Write a STAC catalog and item pointing to the output product. write_stac_for_dimap(".", str(s1.ard_dimap)) # TODO change to .tif -# s1.create_rgb(outfile = output_path.joinpath(f'{s1.start_date}.tif')) - -# print(' The path to our newly created RGB product can be obtained the following way:') -# print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") - # from ost.helpers.settings import set_log_level # import logging # set_log_level(logging.DEBUG) From 85d6daee72e65b9b3b09dd2870f58f7d97e7f85f Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 30 Oct 2024 13:55:44 +0100 Subject: [PATCH 27/74] preprocessing.py: improve logging Logging is now done with the standard Python logging library, not with print statements. --- ost/app/preprocessing.py | 43 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 853ffecc..8c8e4a12 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -4,10 +4,14 @@ import pathlib from pathlib import Path import pprint +import logging + from ost import Sentinel1Scene import click import pystac +LOGGER = logging.getLogger(__name__) + @click.command() @click.argument("input") @@ -36,6 +40,12 @@ def run( ): horizontal_line = "-" * 79 + logging.basicConfig(level=logging.INFO) + + # from ost.helpers.settings import set_log_level + # import logging + # set_log_level(logging.DEBUG) + scene_presets = { # very first IW (VV/VH) S1 image available over Istanbul/Turkey # NOTE:only available via ASF data mirror @@ -80,24 +90,20 @@ def run( mode="w", ) as f: f.write("successfully found here") - except: - pass + except Exception as e: + LOGGER.warning("Exception linking input data", exc_info=e) - # create a S1Scene class instance based on the specified scene identifier + # Instantiate a Sentinel1Scene from the specified scene identifier s1 = Sentinel1Scene(scene_id) - # print summarising infos about the scene - s1.info() + s1.info() # write scene summary information to stdout s1.download(output_path, mirror="5", uname=cdse_user, pword=cdse_password) single_ard = s1.ard_parameters["single_ARD"] - - # Template ARD parameters - - # Set ARD type. Choices: 'OST_GTC', 'OST-RTC', 'CEOS', 'Earth Engine' + # Set ARD type. Choices: "OST_GTC", "OST-RTC", "CEOS", "Earth Engine" s1.update_ard_parameters(ard_type) - print( + LOGGER.info( f"{horizontal_line}\n" f"Dictionary of Earth Engine ARD parameters:\n" f"{horizontal_line}\n" @@ -117,18 +123,18 @@ def run( # uncomment this for the Azores EW scene # s1.ard_parameters['single_ARD']['dem']['dem_name'] = 'GETASSE30' - print( - f"{horizontal_line}\n", + LOGGER.info( + f"{horizontal_line}\n" "Dictionary of customized ARD parameters for final scene processing:\n" - f"{horizontal_line}\n", - f"{pprint.pformat(single_ard)}\n", - f"{horizontal_line}", + f"{horizontal_line}\n" + f"{pprint.pformat(single_ard)}\n" + f"{horizontal_line}" ) s1.create_ard( infile=s1.get_path(output_path), out_dir=output_path, overwrite=True ) - print(f"Path to newly created ARD product: {s1.ard_dimap}") + LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") # s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) # print("Path to newly created RGB product:") # print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") @@ -137,11 +143,6 @@ def run( write_stac_for_dimap(".", str(s1.ard_dimap)) # TODO change to .tif -# from ost.helpers.settings import set_log_level -# import logging -# set_log_level(logging.DEBUG) - - def get_zip_from_stac(stac_root: str) -> str: stac_path = pathlib.Path(stac_root) catalog = pystac.Catalog.from_file(str(stac_path / "catalog.json")) From a908e7066ce80adca10c57c734a69dd5120b5b3e Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 30 Oct 2024 14:18:10 +0100 Subject: [PATCH 28/74] preprocessing.py: output to CWD, not /home/ost/shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit preprocessing.py now writes its output to the CWD set on container start-up, as prescribed in OGC EOAP BP §7.2. --- ost/app/preprocessing.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 8c8e4a12..d832ebcd 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -38,7 +38,7 @@ def run( cdse_user: str, cdse_password: str, ): - horizontal_line = "-" * 79 + horizontal_line = "-" * 79 # Used in log output logging.basicConfig(level=logging.INFO) @@ -65,14 +65,17 @@ def run( } # create a processing directory - # output_dir = home.joinpath('OST_Tutorials', 'Tutorial_1') # output_dir.mkdir(parents=True, exist_ok=True) # print(str(output_dir)) - output_dir = "/home/ost/shared" + + # "When executed, the Application working directory is also the Application + # output directory. Any file created by the Application should be added + # under that directory." -- https://docs.ogc.org/bp/20-089r1.html#toc20 + output_dir = os.getcwd() output_path = Path(output_dir) # We expect input to be the path to a directory containing a STAC catalog - # which will lead us to the actual input zip. + # containing an item which links to the input zip as an asset. input_path = get_zip_from_stac(input) scene_id = input_path[input_path.rfind("/") + 1 : input_path.rfind(".")] From 6fe38a3716e1d828c99171d643ee59f0f89590d9 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 30 Oct 2024 17:12:23 +0100 Subject: [PATCH 29/74] preprocessing.py: output TIFF, not DIMAP --- ost/app/preprocessing.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index d832ebcd..6e66892a 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -41,7 +41,6 @@ def run( horizontal_line = "-" * 79 # Used in log output logging.basicConfig(level=logging.INFO) - # from ost.helpers.settings import set_log_level # import logging # set_log_level(logging.DEBUG) @@ -64,10 +63,6 @@ def run( "germany": "S1B_S5_GRDH_1SDV_20170104T052519_20170104T052548_003694_006587_86AB", } - # create a processing directory - # output_dir.mkdir(parents=True, exist_ok=True) - # print(str(output_dir)) - # "When executed, the Application working directory is also the Application # output directory. Any file created by the Application should be added # under that directory." -- https://docs.ogc.org/bp/20-089r1.html#toc20 @@ -98,9 +93,7 @@ def run( # Instantiate a Sentinel1Scene from the specified scene identifier s1 = Sentinel1Scene(scene_id) - s1.info() # write scene summary information to stdout - s1.download(output_path, mirror="5", uname=cdse_user, pword=cdse_password) single_ard = s1.ard_parameters["single_ARD"] @@ -134,16 +127,16 @@ def run( f"{horizontal_line}" ) + # This seems to be a prerequisite for create_rgb. s1.create_ard( infile=s1.get_path(output_path), out_dir=output_path, overwrite=True ) + s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") - # s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) - # print("Path to newly created RGB product:") - # print(f"CALVALUS_OUTPUT_PRODUCT {s1.ard_rgb}") + print(f"Path to newly created RGB product: {s1.ard_rgb}") # Write a STAC catalog and item pointing to the output product. - write_stac_for_dimap(".", str(s1.ard_dimap)) # TODO change to .tif + write_stac_for_tiff(".", str(s1.ard_rgb)) def get_zip_from_stac(stac_root: str) -> str: @@ -161,12 +154,15 @@ def get_zip_from_stac(stac_root: str) -> str: assert len(zip_assets) == 1 zip_asset = zip_assets[0] zip_path = stac_path / zip_asset.href + LOGGER.info(f"Found input zip at {zip_path}") return str(zip_path) -def write_stac_for_dimap(stac_root: str, dimap_path: str) -> None: +def write_stac_for_tiff(stac_root: str, asset_path: str) -> None: asset = pystac.Asset( - roles=["data"], href=dimap_path, media_type="application/dimap" + roles=["data"], + href=asset_path, + media_type="image/tiff; application=geotiff;", ) item = pystac.Item( id="result-item", From 80228c26b063d74e5b79c57be0b7afe496ca9a8d Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 5 Nov 2024 09:24:30 +0100 Subject: [PATCH 30/74] preprocessing: copy input data if link fails Linking between input and output directories doesn't work if they're on different file systems. --- ost/app/preprocessing.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 6e66892a..d1412a89 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -5,6 +5,7 @@ from pathlib import Path import pprint import logging +import shutil from ost import Sentinel1Scene import click @@ -55,7 +56,8 @@ def run( "spitzbergen": "S1B_IW_GRDH_1SDH_20200325T150411_20200325T150436_020850_02789D_2B85", # IW scene (single-polarised VV) over Ecuadorian Amazon "ecuador": "S1A_IW_GRDH_1SSV_20150205T232009_20150205T232034_004494_00583A_1C80", - # EW scene (dual-polarised VV/VH) over Azores (needs a different DEM, see ARD parameters below) + # EW scene (dual-polarised VV/VH) over Azores + # (needs a different DEM,see ARD parameters below) "azores": "S1B_EW_GRDM_1SDV_20200303T193150_20200303T193250_020532_026E82_5CE9", # EW scene (dual-polarised HH/HV) over Greenland "greenland": "S1B_EW_GRDM_1SDH_20200511T205319_20200511T205419_021539_028E4E_697E", @@ -79,10 +81,18 @@ def run( day = scene_id[23:25] os.makedirs(f"{output_dir}/SAR/GRD/{year}/{month}/{day}", exist_ok=True) try: - os.link( - input_path, - f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip", - ) + try: + os.link( + input_path, + f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip", + ) + except OSError as e: + LOGGER.warning("Exception linking input data", exc_info=e) + LOGGER.warning("Attempting to copy instead.") + shutil.copy2( + input_path, + f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip", + ) with open( f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", mode="w", From 7cd322a6c152d7c6c6f2ebd482ac65786cb01f9f Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 5 Nov 2024 14:34:40 +0100 Subject: [PATCH 31/74] preprocessing: add --dry-run option for testing --- ost/app/preprocessing.py | 51 +++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index d1412a89..b7251242 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -30,6 +30,11 @@ ) @click.option("--cdse-user", default="dummy") @click.option("--cdse-password", default="dummy") +@click.option( + "--dry-run", default=False, + help="Skip processing and write a placeholder output file instead. " + "Useful for testing." +) def run( input: str, resolution: int, @@ -38,6 +43,7 @@ def run( resampling_method: str, cdse_user: str, cdse_password: str, + dry_run: bool ): horizontal_line = "-" * 79 # Used in log output @@ -137,17 +143,44 @@ def run( f"{horizontal_line}" ) - # This seems to be a prerequisite for create_rgb. - s1.create_ard( - infile=s1.get_path(output_path), out_dir=output_path, overwrite=True - ) - s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) - LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") - print(f"Path to newly created RGB product: {s1.ard_rgb}") + if dry_run: + tiff_path = output_path / f"{s1.start_date}.tif" + LOGGER.info("Dry run -- creating dummy output at {tiff_path}") + create_dummy_tiff(tiff_path) + else: + LOGGER.info(f"Creating ARD at {output_path}") + # This seems to be a prerequisite for create_rgb. + s1.create_ard( + infile=s1.get_path(output_path), out_dir=output_path, overwrite=True + ) + LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") + LOGGER.info(f"Creating RGB at {output_path}") + s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) + tiff_path = s1.ard_rgb + LOGGER.info(f"Path to newly created RGB product: {tiff_path}") # Write a STAC catalog and item pointing to the output product. - write_stac_for_tiff(".", str(s1.ard_rgb)) - + LOGGER.info("Writing STAC catalogue and item") + write_stac_for_tiff(".", str(tiff_path)) + + +def create_dummy_tiff(path: Path) -> None: + import numpy as np + import rasterio + + data = np.linspace(np.arange(100), 50 * np.sin(np.arange(100)), 100) + with rasterio.open( + str(path), + 'w', + driver='GTiff', + height=data.shape[0], + width=data.shape[1], + count=1, + dtype=data.dtype, + crs="+proj=latlong", + transform=rasterio.transform.Affine.scale(0.1, 0.1), + ) as dst: + dst.write(d, 1) def get_zip_from_stac(stac_root: str) -> str: stac_path = pathlib.Path(stac_root) From 4f8948048cecb55b638feb1e0104de5a44d93566 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 5 Nov 2024 14:49:56 +0100 Subject: [PATCH 32/74] preprocessing: fix boolean argument handling Add is_flag=True to the click definitions of the two boolean command-line arguments to conform to CWL and normal *nix behaviour (i.e. flag is now controlled by the argument's presence/absence, not by an additional parameter to the argument). --- ost/app/preprocessing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index b7251242..a381aa79 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -22,7 +22,7 @@ type=click.Choice(["OST_GTC", "OST-RTC", "CEOS", "Earth-Engine"]), default="Earth-Engine", ) -@click.option("--with-speckle-filter", default=False) +@click.option("--with-speckle-filter", is_flag=True, default=False) @click.option( "--resampling-method", type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), @@ -31,7 +31,7 @@ @click.option("--cdse-user", default="dummy") @click.option("--cdse-password", default="dummy") @click.option( - "--dry-run", default=False, + "--dry-run", is_flag=True, default=False, help="Skip processing and write a placeholder output file instead. " "Useful for testing." ) From ba545ba1bf7fdea8a53ca286d348226ac3bcffea Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 5 Nov 2024 15:09:57 +0100 Subject: [PATCH 33/74] preprocessing: fix a typo --- ost/app/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index a381aa79..c4ee1577 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -180,7 +180,7 @@ def create_dummy_tiff(path: Path) -> None: crs="+proj=latlong", transform=rasterio.transform.Affine.scale(0.1, 0.1), ) as dst: - dst.write(d, 1) + dst.write(data, 1) def get_zip_from_stac(stac_root: str) -> str: stac_path = pathlib.Path(stac_root) From 18cb5123ef98b011372b89df78a117fc0a3c65ab Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 5 Nov 2024 18:13:43 +0100 Subject: [PATCH 34/74] Several improvements to preprocessing.py - Make asset hrefs relative in output STAC. - Set bbox, geometry, and start and end times correctly in output STAC. - Correct some errors in log messages. --- ost/app/preprocessing.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index c4ee1577..bac8f22a 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -10,6 +10,7 @@ from ost import Sentinel1Scene import click import pystac +import rasterio LOGGER = logging.getLogger(__name__) @@ -145,7 +146,7 @@ def run( if dry_run: tiff_path = output_path / f"{s1.start_date}.tif" - LOGGER.info("Dry run -- creating dummy output at {tiff_path}") + LOGGER.info(f"Dry run -- creating dummy output at {tiff_path}") create_dummy_tiff(tiff_path) else: LOGGER.info(f"Creating ARD at {output_path}") @@ -161,7 +162,7 @@ def run( # Write a STAC catalog and item pointing to the output product. LOGGER.info("Writing STAC catalogue and item") - write_stac_for_tiff(".", str(tiff_path)) + write_stac_for_tiff(".", str(tiff_path), scene_id) def create_dummy_tiff(path: Path) -> None: @@ -201,20 +202,32 @@ def get_zip_from_stac(stac_root: str) -> str: return str(zip_path) -def write_stac_for_tiff(stac_root: str, asset_path: str) -> None: +def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str) -> None: + ds = rasterio.open(asset_path) asset = pystac.Asset( roles=["data"], href=asset_path, media_type="image/tiff; application=geotiff;", ) + bb = ds.bounds + s = scene_id item = pystac.Item( id="result-item", - # TODO use actual geometry and datetime - geometry=None, - bbox=None, - datetime=datetime.fromisoformat("2000-01-01T00:00:00+00:00"), - properties={}, # datetime will be filled in automatically - assets={"DIMAP": asset}, + geometry=[ + [bb.left, bb.bottom], + [bb.left, bb.top], + [bb.right, bb.top], + [bb.right, bb.bottom], + [bb.left, bb.bottom] + ], + bbox=[bb.left, bb.bottom, bb.right, bb.top], + datetime=None, + start_datetime=datetime(*map(int, ( + s[17:21], s[21:23], s[23:25], s[26:28], s[28:30], s[30:32]))), + end_datetime=datetime(*map(int, ( + s[33:37], s[37:39], s[39:41], s[42:44], s[44:46], s[46:48]))), + properties={}, # datetime values will be filled in automatically + assets={"TIFF": asset}, ) catalog = pystac.Catalog( id="catalog", @@ -222,6 +235,7 @@ def write_stac_for_tiff(stac_root: str, asset_path: str) -> None: href=f"{stac_root}/catalog.json", ) catalog.add_item(item) + catalog.make_all_asset_hrefs_relative() catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) From d87dfa40429f0d37ca82f7c399c79e4203c03ec9 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 5 Nov 2024 18:36:19 +0100 Subject: [PATCH 35/74] preprocessing: explicitly specify output STAC root --- ost/app/preprocessing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index bac8f22a..48d4acab 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -162,7 +162,7 @@ def run( # Write a STAC catalog and item pointing to the output product. LOGGER.info("Writing STAC catalogue and item") - write_stac_for_tiff(".", str(tiff_path), scene_id) + write_stac_for_tiff(str(output_path), str(tiff_path), scene_id) def create_dummy_tiff(path: Path) -> None: @@ -203,6 +203,7 @@ def get_zip_from_stac(stac_root: str) -> str: def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str) -> None: + LOGGER.info(f"Writing STAC for asset {asset_path} to {stac_root}") ds = rasterio.open(asset_path) asset = pystac.Asset( roles=["data"], From 05345c68b6599914c954ddc85995cbd4b9b37088 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 6 Nov 2024 13:09:13 +0100 Subject: [PATCH 36/74] Updates to Dockerfile and context - Use custom headers when fetching OTB package to avoid download speed throttling. - Install OST package from version2 branch of bcdev repository. - Add constraints.txt and snap.varfile to build context. - Fetch last commit information via GitHub API to invalidate build cache if package has been updated. --- resources/Dockerfile | 13 ++++++++++--- resources/constraints.txt | 23 +++++++++++++++++++++++ resources/snap.varfile | 17 +++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 resources/constraints.txt create mode 100644 resources/snap.varfile diff --git a/resources/Dockerfile b/resources/Dockerfile index 5f3b0944..d13883a9 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -42,6 +42,8 @@ RUN groupadd -r ost && \ nodejs \ npm +# Install OTB. Use some custom headers when fetching package, since otherwise +# the download speed is heavily throttled. RUN alias python=python3 && \ rm -rf /var/lib/apt/lists/* && \ python3 -m pip install jupyterlab && \ @@ -52,7 +54,9 @@ RUN alias python=python3 && \ rm $TBX && \ rm snap.varfile && \ cd /home/ost/programs && \ - wget https://www.orfeo-toolbox.org/packages/archives/OTB/${OTB} && \ + wget https://www.orfeo-toolbox.org/packages/archives/OTB/${OTB} \ + --referer="https://www.orfeo-toolbox.org/packages/archives/OTB/" \ + --user-agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0" && \ chmod +x $OTB && \ ./${OTB} && \ rm -f OTB-${OTB_VERSION}-Linux64.run @@ -68,8 +72,11 @@ RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions COPY constraints.txt $HOME -# get OST and tutorials -RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git -c constraints.txt +# Invalidate Docker cache if there have been new commits to the repository +ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=version2&per_page=1" last_commit + +# Install OST and tutorials +RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git@version2 -c constraints.txt #RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager #RUN jupyter nbextension enable --py widgetsnbextension diff --git a/resources/constraints.txt b/resources/constraints.txt new file mode 100644 index 00000000..cfe3d577 --- /dev/null +++ b/resources/constraints.txt @@ -0,0 +1,23 @@ +click>=8.0 +descartes==1.1.0 +fiona==1.9.6 +gdal==3.0.4 +godale==0.3 +pyproj==3.5.0 +geopandas==0.13.2 +#jupyterlab==4.2.3 +matplotlib==3.7.5 +numpy==1.24.4 +pandas==1.5.3 +psycopg2-binary==2.9.9 +rasterio==1.3.10 +requests==2.32.3 +scipy==1.10.1 +shapely==2.0.5 +tqdm==4.66.4 +imageio==2.34.2 +rtree==1.3.0 +retrying==1.3.3 +pytest==8.2.2 +pytest-cov +pytest-runner diff --git a/resources/snap.varfile b/resources/snap.varfile new file mode 100644 index 00000000..fd6688cc --- /dev/null +++ b/resources/snap.varfile @@ -0,0 +1,17 @@ +# install4j response file for ESA SNAP 8.0 +# headless S1TBX +deleteAllSnapEngineDir$Boolean=false +deleteOnlySnapDesktopDir$Boolean=true +executeLauncherWithPythonAction$Boolean=false +forcePython$Boolean=false +pythonExecutable=/usr/bin/python +sys.adminRights$Boolean=true +sys.component.RSTB$Boolean=true +sys.component.S1TBX$Boolean=true +sys.component.S2TBX$Boolean=false +sys.component.S3TBX$Boolean=false +sys.component.SNAP$Boolean=false +sys.installationDir=/home/ost/programs/snap +sys.languageId=en +sys.programGroupDisabled$Boolean=false +sys.symlinkDir=/usr/local/bin From 060e3f62d2f1523efc06e603c862f826355c67f0 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 6 Nov 2024 13:22:28 +0100 Subject: [PATCH 37/74] Updates to CWL file - Add a dry-run parameter. - Update Docker requirement to reference public image on quay.io. --- resources/opensar.cwl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/resources/opensar.cwl b/resources/opensar.cwl index d826e255..40ba6a17 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -48,6 +48,10 @@ $graph: type: string label: CDSE password doc: Password for the specified CDSE user + dry-run: + type: boolean + label: Dry run + doc: Skip processing and write a placeholder output file instead outputs: - id: stac_catalog @@ -66,6 +70,7 @@ $graph: resampling-method: resampling-method cdse-user: cdse-user cdse-password: cdse-password + dry-run: dry-run out: - ost_ard @@ -73,7 +78,7 @@ $graph: id: ost_script_1 requirements: DockerRequirement: - dockerPull: ost:v1 + dockerPull: quay.io/bcdev/opensartoolkit:latest baseCommand: - python3 @@ -118,6 +123,10 @@ $graph: type: string inputBinding: prefix: --cdse-password + dry-run: + type: boolean + inputBinding: + prefix: --dry-run outputs: ost_ard: From 829ed8678bb4416372d9164e7d6e876d42a184c7 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 6 Nov 2024 14:14:22 +0100 Subject: [PATCH 38/74] Dockerfile: improve wget progress for OTB download wget progress now uses the dot:giga setting to reduce the amount of noise in the build logs. --- resources/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/resources/Dockerfile b/resources/Dockerfile index d13883a9..22fc764a 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -55,6 +55,7 @@ RUN alias python=python3 && \ rm snap.varfile && \ cd /home/ost/programs && \ wget https://www.orfeo-toolbox.org/packages/archives/OTB/${OTB} \ + --progress=dot:giga \ --referer="https://www.orfeo-toolbox.org/packages/archives/OTB/" \ --user-agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0" && \ chmod +x $OTB && \ From 9dc97163ea97f2387dfdd59d09b6c03ebc20bd4d Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 6 Nov 2024 15:24:02 +0100 Subject: [PATCH 39/74] Add an example for Application Package execution --- examples/application-package/README | 9 +++++++ ...16_20221004T164341_045295_056A44_13CB.json | 24 +++++++++++++++++++ .../SAR/GRD/2022/10/04/catalog.json | 13 ++++++++++ examples/application-package/inputs.yaml | 11 +++++++++ 4 files changed, 57 insertions(+) create mode 100644 examples/application-package/README create mode 100644 examples/application-package/SAR/GRD/2022/10/04/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json create mode 100644 examples/application-package/SAR/GRD/2022/10/04/catalog.json create mode 100644 examples/application-package/inputs.yaml diff --git a/examples/application-package/README b/examples/application-package/README new file mode 100644 index 00000000..e970ab2a --- /dev/null +++ b/examples/application-package/README @@ -0,0 +1,9 @@ +The contents of this directory support testing of the OpenSarToolkit +CWL Workflow as an OGC EO Application Package. The JSON files +(which in a real deployment would be generated by the EOAP platform's +data stage-in process) provide a STAC catalogue for a specified input +item. The file input.yaml specifies the parameters for the workflow. +After setting the parameters appropriately in input.yaml, the workflow +can be executed in the manner of an Application Package by running + +cwltool opensar.cwl inputs.cwl diff --git a/examples/application-package/SAR/GRD/2022/10/04/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json b/examples/application-package/SAR/GRD/2022/10/04/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json new file mode 100644 index 00000000..299f090f --- /dev/null +++ b/examples/application-package/SAR/GRD/2022/10/04/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json @@ -0,0 +1,24 @@ +{ + "stac_version": "1.1.0", + "type": "Feature", + "id": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB", + "geometry": null, + "properties": { + "datetime": "2022-10-04T16:43:16Z", + "platform": "sentinel-1a", + "constellation": "sentinel-1" + }, + "assets": { + "GRD": { + "type": "application/zip", + "roles": [ "data" ], + "href": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.zip" + } + }, + "links": [ + { + "rel": "parent", + "href": "../catalog.json" + } + ] +} diff --git a/examples/application-package/SAR/GRD/2022/10/04/catalog.json b/examples/application-package/SAR/GRD/2022/10/04/catalog.json new file mode 100644 index 00000000..bfae5594 --- /dev/null +++ b/examples/application-package/SAR/GRD/2022/10/04/catalog.json @@ -0,0 +1,13 @@ +{ + "stac_version": "1.1.0", + "id": "catalog", + "type": "Catalog", + "description": "Root catalog", + "links": [ + { + "type": "application/json", + "rel": "item", + "href": "./S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json" + } + ] +} diff --git a/examples/application-package/inputs.yaml b/examples/application-package/inputs.yaml new file mode 100644 index 00000000..cac17449 --- /dev/null +++ b/examples/application-package/inputs.yaml @@ -0,0 +1,11 @@ +--- +input: + class: Directory + path: /data/opensar/SAR/GRD/2022/10/04 +resolution: 100 +ard-type: Earth-Engine +with-speckle-filter: false +resampling-method: BILINEAR_INTERPOLATION +cdse-user: +cdse-password: +dry-run: true From e5bda678383864e97fc8963d67a55ae9f1aadd74 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 6 Nov 2024 18:19:09 +0100 Subject: [PATCH 40/74] Dockerfile: build from bcdev repo default branch version2 branch has now been merged so we build images from main instead. --- resources/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index 22fc764a..725279bc 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -77,7 +77,7 @@ COPY constraints.txt $HOME ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=version2&per_page=1" last_commit # Install OST and tutorials -RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git@version2 -c constraints.txt +RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git -c constraints.txt #RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager #RUN jupyter nbextension enable --py widgetsnbextension From e4ddfb3bd47b95bc0c740610eef143e31e143b24 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 12 Dec 2024 13:46:23 +0100 Subject: [PATCH 41/74] Start adding support for non-zipped input get_zip_from_stac has been renamed to get_input_path_from_stac and now also handles STAC catalogues describing unzipped SAFE directories. However, preprocessing.run() still needs to be adapted to deal with these SAFE directories as input. --- ost/app/preprocessing.py | 50 ++- ...241113T170632_056539_06EEA8_B145.SAFE.json | 376 ++++++++++++++++++ tests/resources/input_dir/catalog.json | 13 + ...16_20221004T164341_045295_056A44_13CB.json | 24 ++ tests/resources/input_zip/catalog.json | 13 + tests/test_preprocessing.py | 23 ++ 6 files changed, 486 insertions(+), 13 deletions(-) create mode 100644 tests/resources/input_dir/S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json create mode 100644 tests/resources/input_dir/catalog.json create mode 100644 tests/resources/input_zip/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json create mode 100644 tests/resources/input_zip/catalog.json create mode 100644 tests/test_preprocessing.py diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 48d4acab..e4e6f23a 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -79,8 +79,10 @@ def run( output_path = Path(output_dir) # We expect input to be the path to a directory containing a STAC catalog - # containing an item which links to the input zip as an asset. - input_path = get_zip_from_stac(input) + # containing an item which contains an asset for either a zip file + # (zipped SAFE archive) or a SAFE manifest (which is used to determine + # the location of a non-zipped SAFE directory) + input_path = get_input_path_from_stac(input) scene_id = input_path[input_path.rfind("/") + 1 : input_path.rfind(".")] year = scene_id[17:21] @@ -183,23 +185,45 @@ def create_dummy_tiff(path: Path) -> None: ) as dst: dst.write(data, 1) -def get_zip_from_stac(stac_root: str) -> str: +def get_input_path_from_stac(stac_root: str) -> str: stac_path = pathlib.Path(stac_root) catalog = pystac.Catalog.from_file(str(stac_path / "catalog.json")) item_links = [link for link in catalog.links if link.rel == "item"] assert len(item_links) == 1 item_link = item_links[0] item = pystac.Item.from_file(str(stac_path / item_link.href)) - zip_assets = [ - asset - for asset in item.assets.values() - if asset.media_type == "application/zip" - ] - assert len(zip_assets) == 1 - zip_asset = zip_assets[0] - zip_path = stac_path / zip_asset.href - LOGGER.info(f"Found input zip at {zip_path}") - return str(zip_path) + if "manifest" in item.assets: + LOGGER.info(f"Found manifest asset in {catalog}") + manifest_asset = item.assets["manifest"] + if "filename" in manifest_asset.extra_fields: + filename = pathlib.Path(manifest_asset.extra_fields["filename"]) + safe_dir = stac_path / filename.parent + LOGGER.info(f"Found SAFE directory at {safe_dir}") + return str(safe_dir) + else: + raise RuntimeError( + f"No filename for manifest asset in {catalog}" + ) + else: + LOGGER.info("No manifest asset found; looking for zip asset") + zip_assets = [ + asset + for asset in item.assets.values() + if asset.media_type == "application/zip" + ] + if len(zip_assets) < 1: + raise RuntimeError( + f"No manifest assets or zip assets found in {catalog}" + ) + elif len(zip_assets) > 1: + raise RuntimeError( + f"No manifest assets and multiple zip assets found in " + f"{stac_root}, so it's not clear which zip asset to use." + ) + else: + zip_path = stac_path / zip_assets[0].href + LOGGER.info(f"Found input zip at {zip_path}") + return str(zip_path) def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str) -> None: diff --git a/tests/resources/input_dir/S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json b/tests/resources/input_dir/S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json new file mode 100644 index 00000000..d0b78ea5 --- /dev/null +++ b/tests/resources/input_dir/S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json @@ -0,0 +1,376 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/processing/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.0.0/schema.json", + "https://stac-extensions.github.io/sar/v1.0.0/schema.json", + "https://stac-extensions.github.io/sat/v1.0.0/schema.json" + ], + "type": "Feature", + "id": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 10.710614, + 42.433846 + ], + [ + 11.100653, + 40.9328 + ], + [ + 14.189954, + 41.339962 + ], + [ + 13.873197, + 42.840092 + ], + [ + 10.710614, + 42.433846 + ] + ] + ] + }, + "properties": { + "datetime": "2024-11-13T17:06:07.293807Z", + "start_datetime": "2024-11-13T17:06:07.293807Z", + "end_datetime": "2024-11-13T17:06:32.292309Z", + "created": "2024-11-13T17:44:24Z", + "updated": "2024-12-06T11:59:58.0279394Z", + "platform": "sentinel-1a", + "constellation": "sentinel-1", + "mission": "sentinel-1", + "instruments": [ + "c-sar" + ], + "sensor_type": "radar", + "gsd": 22.0, + "sat:orbit_state": "ascending", + "sat:anx_datetime": "2024-11-13T16:55:00.601119Z", + "sat:absolute_orbit": 56539, + "sat:relative_orbit": 117, + "sat:platform_international_designator": "2014-016A", + "processing:level": "L1", + "processing:lineage": "GRD Post Processing", + "processing:facility": "Copernicus Ground Segment", + "processing:software": { + "Sentinel-1 IPF": "003.80" + }, + "proj:epsg": null, + "title": "SENTINEL-1A GRD VV/VH 117 2024-11-13 17:06:07", + "sar:instrument_mode": "IW", + "sar:frequency_band": "C", + "sar:polarizations": [ + "VV", + "VH" + ], + "sar:product_type": "GRD", + "providers": [ + { + "name": "ESA/EC (Copernicus)", + "description": "The Sentinel-1 mission comprises a constellation of two polar-orbiting satellites, operating day and night performing C-band synthetic aperture radar imaging, enabling them to acquire imagery regardless of the weather.", + "roles": [ + "producer", + "processor", + "licensor" + ], + "url": "https://sentinel.esa.int/web/sentinel/missions/sentinel-1" + } + ] + }, + "bbox": [ + 10.710614, + 40.9328, + 14.189954, + 42.840092 + ], + "assets": { + "amplitude-vh-iw-002": { + "type": "image/x.geotiff", + "roles": [ + "amplitude", + "data" + ], + "title": "IW VH Amplitude pixel values", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.tiff", + "file:size": 878507832, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.tiff", + "sar:polarizations": [ + "VH" + ], + "proj:epsg": null, + "proj:shape": [ + 26329, + 16678 + ] + }, + "annotation-vh-iw-002": { + "type": "text/xml", + "roles": [ + "metadata" + ], + "title": "Annotation VH IW 002", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "file:size": 1842824, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "sar:polarizations": [ + "VH" + ] + }, + "amplitude-vv-iw-001": { + "type": "image/x.geotiff", + "roles": [ + "amplitude", + "data" + ], + "title": "IW VV Amplitude pixel values", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.tiff", + "file:size": 878507832, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.tiff", + "sar:polarizations": [ + "VV" + ], + "proj:epsg": null, + "proj:shape": [ + 26329, + 16678 + ] + }, + "annotation-vv-iw-001": { + "type": "text/xml", + "roles": [ + "metadata" + ], + "title": "Annotation VV IW 001", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "file:size": 1842846, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "sar:polarizations": [ + "VV" + ] + }, + "calibration-vh-iw-002": { + "type": "text/xml", + "roles": [ + "calibration", + "data" + ], + "title": "Calibration VH IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "file:size": 1035193, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "sar:polarizations": [ + "VH" + ] + }, + "calibration-vv-iw-001": { + "type": "text/xml", + "roles": [ + "calibration", + "data" + ], + "title": "Calibration VV IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "file:size": 1035193, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "sar:polarizations": [ + "VV" + ] + }, + "noise-vh-iw-002": { + "type": "text/xml", + "roles": [ + "data", + "noise" + ], + "title": "Noise VH IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "file:size": 432157, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "sar:polarizations": [ + "VH" + ] + }, + "noise-vv-iw-001": { + "type": "text/xml", + "roles": [ + "data", + "noise" + ], + "title": "Noise VV IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "file:size": 432157, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "sar:polarizations": [ + "VV" + ] + }, + "support-s1-level-1-calibration": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-calibration", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-calibration.xsd", + "file:size": 6427, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-calibration.xsd" + }, + "support-s1-level-1-measurement": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-measurement", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-measurement.xsd", + "file:size": 471, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-measurement.xsd" + }, + "support-s1-level-1-noise": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-noise", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-noise.xsd", + "file:size": 7290, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-noise.xsd" + }, + "support-s1-level-1-product": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-product", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-product.xsd", + "file:size": 149999, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-product.xsd" + }, + "support-s1-level-1-quicklook": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-quicklook", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-quicklook.xsd", + "file:size": 469, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-quicklook.xsd" + }, + "support-s1-level-1-rfi": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-rfi", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-rfi.xsd", + "file:size": 16595, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-rfi.xsd" + }, + "support-s1-map-overlay": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-map-overlay", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-map-overlay.xsd", + "file:size": 450, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-map-overlay.xsd" + }, + "support-s1-object-types": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-object-types", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-object-types.xsd", + "file:size": 62179, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-object-types.xsd" + }, + "support-s1-product-preview": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-product-preview", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-product-preview.xsd", + "file:size": 440, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-product-preview.xsd" + }, + "preview-logo": { + "type": "image/png", + "roles": [ + "data", + "logo" + ], + "title": "Preview file preview-logo", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/icons/logo.png", + "file:size": 95280, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/icons/logo.png" + }, + "preview-map-overlay": { + "type": "application/vnd.google-earth.kml+xml", + "roles": [ + "data", + "kml" + ], + "title": "Preview file preview-map-overlay", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/map-overlay.kml", + "file:size": 1018, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/map-overlay.kml" + }, + "preview-product-preview": { + "type": "application/octet-stream", + "roles": [ + "data" + ], + "title": "Preview file preview-product-preview", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/product-preview.html", + "file:size": 3673, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/product-preview.html" + }, + "preview-quick-look": { + "type": "image/png", + "roles": [ + "data", + "thumbnail" + ], + "title": "Preview file preview-quick-look", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/quick-look.png", + "file:size": 301151, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/quick-look.png" + }, + "preview-thumbnail": { + "type": "application/octet-stream", + "roles": [ + "data" + ], + "title": "Preview file preview-thumbnail", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/thumbnail.png", + "file:size": 100892, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/thumbnail.png" + }, + "manifest": { + "type": "text/xml", + "roles": [ + "metadata" + ], + "title": "SAFE Manifest", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/manifest.safe", + "file:size": 24532, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/manifest.safe" + } + }, + "links": [] +} \ No newline at end of file diff --git a/tests/resources/input_dir/catalog.json b/tests/resources/input_dir/catalog.json new file mode 100644 index 00000000..611b8637 --- /dev/null +++ b/tests/resources/input_dir/catalog.json @@ -0,0 +1,13 @@ +{ + "stac_version": "1.1.0", + "id": "catalog", + "type": "Catalog", + "description": "Root catalog", + "links": [ + { + "type": "application/json", + "rel": "item", + "href": "./S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json" + } + ] +} diff --git a/tests/resources/input_zip/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json b/tests/resources/input_zip/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json new file mode 100644 index 00000000..299f090f --- /dev/null +++ b/tests/resources/input_zip/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json @@ -0,0 +1,24 @@ +{ + "stac_version": "1.1.0", + "type": "Feature", + "id": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB", + "geometry": null, + "properties": { + "datetime": "2022-10-04T16:43:16Z", + "platform": "sentinel-1a", + "constellation": "sentinel-1" + }, + "assets": { + "GRD": { + "type": "application/zip", + "roles": [ "data" ], + "href": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.zip" + } + }, + "links": [ + { + "rel": "parent", + "href": "../catalog.json" + } + ] +} diff --git a/tests/resources/input_zip/catalog.json b/tests/resources/input_zip/catalog.json new file mode 100644 index 00000000..bfae5594 --- /dev/null +++ b/tests/resources/input_zip/catalog.json @@ -0,0 +1,13 @@ +{ + "stac_version": "1.1.0", + "id": "catalog", + "type": "Catalog", + "description": "Root catalog", + "links": [ + { + "type": "application/json", + "rel": "item", + "href": "./S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json" + } + ] +} diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 00000000..b9ab5675 --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,23 @@ +import os +from pathlib import Path + +# OST insists on knowing the path to gpt, but we don't need it for these tests. +os.environ["GPT_PATH"] = os.environ.get("GPT_PATH", "dummy") + +from ost.app import preprocessing + + +def test_get_input_path_from_stac_zip(): + cat_path = Path(__file__).parent / "resources" / "input_zip" + assert preprocessing.get_input_path_from_stac(str(cat_path)) == \ + str(cat_path / + "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_" + "045295_056A44_13CB.zip") + + +def test_get_input_path_from_stac_dir(): + cat_path = Path(__file__).parent / "resources" / "input_dir" + assert preprocessing.get_input_path_from_stac(str(cat_path)) == \ + str(cat_path / + "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_" + "056539_06EEA8_B145.SAFE") From 74b79c52e44aa297b793d08e8b082da1cd6e9f2b Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 13 Dec 2024 10:47:54 +0100 Subject: [PATCH 42/74] Preprocessor: minor refactoring --- ost/app/preprocessing.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index e4e6f23a..9fdbb5cf 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -81,31 +81,26 @@ def run( # We expect input to be the path to a directory containing a STAC catalog # containing an item which contains an asset for either a zip file # (zipped SAFE archive) or a SAFE manifest (which is used to determine - # the location of a non-zipped SAFE directory) + # the location of a non-zipped SAFE directory). The returned path is + # either the zip file or the SAFE directory input_path = get_input_path_from_stac(input) scene_id = input_path[input_path.rfind("/") + 1 : input_path.rfind(".")] year = scene_id[17:21] month = scene_id[21:23] day = scene_id[23:25] - os.makedirs(f"{output_dir}/SAR/GRD/{year}/{month}/{day}", exist_ok=True) + + output_subdir = f"{output_dir}/SAR/GRD/{year}/{month}/{day}" + os.makedirs(output_subdir, exist_ok=True) try: + scene_path = f"{output_subdir}/{scene_id}" try: - os.link( - input_path, - f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip", - ) + os.link(input_path, f"{scene_path}.zip") except OSError as e: LOGGER.warning("Exception linking input data", exc_info=e) LOGGER.warning("Attempting to copy instead.") - shutil.copy2( - input_path, - f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.zip", - ) - with open( - f"{output_dir}/SAR/GRD/{year}/{month}/{day}/{scene_id}.downloaded", - mode="w", - ) as f: + shutil.copy2(input_path, f"{scene_path}.zip") + with open(f"{scene_path}.downloaded", mode="w") as f: f.write("successfully found here") except Exception as e: LOGGER.warning("Exception linking input data", exc_info=e) From d794102d00598f1c4fac518340faaeee5884fe43 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 13 Dec 2024 14:59:45 +0100 Subject: [PATCH 43/74] Preprocessor: handle SAFE directory input --- ost/app/preprocessing.py | 70 +++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 9fdbb5cf..3f42df59 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -16,7 +16,7 @@ @click.command() -@click.argument("input") +@click.argument("input_", metavar="input") @click.option("--resolution", default=100) @click.option( "--ard-type", @@ -37,7 +37,7 @@ "Useful for testing." ) def run( - input: str, + input_: str, resolution: int, ard_type: str, with_speckle_filter: bool, @@ -83,32 +83,24 @@ def run( # (zipped SAFE archive) or a SAFE manifest (which is used to determine # the location of a non-zipped SAFE directory). The returned path is # either the zip file or the SAFE directory - input_path = get_input_path_from_stac(input) + input_path = get_input_path_from_stac(input_) - scene_id = input_path[input_path.rfind("/") + 1 : input_path.rfind(".")] - year = scene_id[17:21] - month = scene_id[21:23] - day = scene_id[23:25] + # We assume that any file input path is a zip, and any non-file input + # path is a SAFE directory. + zip_input = pathlib.Path(input_path).is_file() + LOGGER.info(f"Input is {'zip' if zip_input else 'SAFE directory'}") - output_subdir = f"{output_dir}/SAR/GRD/{year}/{month}/{day}" - os.makedirs(output_subdir, exist_ok=True) - try: - scene_path = f"{output_subdir}/{scene_id}" - try: - os.link(input_path, f"{scene_path}.zip") - except OSError as e: - LOGGER.warning("Exception linking input data", exc_info=e) - LOGGER.warning("Attempting to copy instead.") - shutil.copy2(input_path, f"{scene_path}.zip") - with open(f"{scene_path}.downloaded", mode="w") as f: - f.write("successfully found here") - except Exception as e: - LOGGER.warning("Exception linking input data", exc_info=e) + scene_id = input_path[input_path.rfind("/") + 1 : input_path.rfind(".")] + if zip_input: + copy_zip_input(input_path, output_dir, scene_id) # Instantiate a Sentinel1Scene from the specified scene identifier s1 = Sentinel1Scene(scene_id) s1.info() # write scene summary information to stdout - s1.download(output_path, mirror="5", uname=cdse_user, pword=cdse_password) + if zip_input: + s1.download( + output_path, mirror="5", uname=cdse_user, pword=cdse_password + ) single_ard = s1.ard_parameters["single_ARD"] # Set ARD type. Choices: "OST_GTC", "OST-RTC", "CEOS", "Earth Engine" @@ -147,10 +139,16 @@ def run( create_dummy_tiff(tiff_path) else: LOGGER.info(f"Creating ARD at {output_path}") - # This seems to be a prerequisite for create_rgb. - s1.create_ard( - infile=s1.get_path(output_path), out_dir=output_path, overwrite=True - ) + # create_ard seems to be a prerequisite for create_rgb. + if zip_input: + s1.create_ard( + infile=s1.get_path(output_path), out_dir=output_path, overwrite=True + ) + else: + s1.create_ard( + infile=input_path, out_dir=output_path, overwrite=True + ) + LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") LOGGER.info(f"Creating RGB at {output_path}") s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) @@ -162,6 +160,26 @@ def run( write_stac_for_tiff(str(output_path), str(tiff_path), scene_id) +def copy_zip_input(input_path, output_dir, scene_id): + year = scene_id[17:21] + month = scene_id[21:23] + day = scene_id[23:25] + output_subdir = f"{output_dir}/SAR/GRD/{year}/{month}/{day}" + os.makedirs(output_subdir, exist_ok=True) + try: + scene_path = f"{output_subdir}/{scene_id}" + try: + os.link(input_path, f"{scene_path}.zip") + except OSError as e: + LOGGER.warning("Exception linking input data", exc_info=e) + LOGGER.warning("Attempting to copy instead.") + shutil.copy2(input_path, f"{scene_path}.zip") + with open(f"{scene_path}.downloaded", mode="w") as f: + f.write("successfully found here") + except Exception as e: + LOGGER.warning("Exception linking/copying input data", exc_info=e) + + def create_dummy_tiff(path: Path) -> None: import numpy as np import rasterio From e92c93ec9136acc3c2b57a2e26044086bf6ab010 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Mon, 16 Dec 2024 16:45:13 +0100 Subject: [PATCH 44/74] Preprocessor: add a logging message --- ost/s1/s1scene.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ost/s1/s1scene.py b/ost/s1/s1scene.py index 630d7ab7..e233980f 100644 --- a/ost/s1/s1scene.py +++ b/ost/s1/s1scene.py @@ -657,6 +657,7 @@ def safe_annotation_get(self, download_dir, data_mount=None): return gdf_final.drop_duplicates(["AnxTime"], keep="first") def copernicus_uuid(self, opener): + logger.info("Getting Copernicus UUID") # construct the basic the url base_url = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=" From 74bbec417ff07e4b0b8482c946c3956c079bec84 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 17 Dec 2024 15:23:36 +0100 Subject: [PATCH 45/74] Add more logging to s1scene --- ost/s1/s1scene.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ost/s1/s1scene.py b/ost/s1/s1scene.py index e233980f..c9717834 100644 --- a/ost/s1/s1scene.py +++ b/ost/s1/s1scene.py @@ -668,6 +668,8 @@ def copernicus_uuid(self, opener): # construct the download url url = base_url + action + logger.info("Fetching " + url) + try: # get the request req = opener.open(url) From fa31ffcf9bc0526eac33fc3d6fad45b4dc732ce9 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 18 Dec 2024 15:55:02 +0100 Subject: [PATCH 46/74] Some updates to CWL file - Update to version 1.2 to allow use of loadListing and networkAccess options. - Set "loadListing: no_listing" for directory input to prevent errors due to illegally named files in the input directory. - Set "neworkAccess: true" in CommandLineTool requirements to ensure that s1scene can make requests to external services. --- resources/opensar.cwl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/resources/opensar.cwl b/resources/opensar.cwl index 40ba6a17..5b631576 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -1,4 +1,4 @@ -cwlVersion: v1.0 +cwlVersion: v1.2 $namespaces: s: https://schema.org/ s:softwareVersion: 1.0.0 @@ -14,6 +14,7 @@ $graph: input: type: Directory label: Input S1 GRD + loadListing: no_listing resolution: type: int label: Resolution @@ -79,6 +80,8 @@ $graph: requirements: DockerRequirement: dockerPull: quay.io/bcdev/opensartoolkit:latest + NetworkAccess: + networkAccess: true baseCommand: - python3 From 6dc44d8bb3616c5963531b654dad2754e8d4c76b Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 18 Dec 2024 15:56:12 +0100 Subject: [PATCH 47/74] Dockerfile: add ost_branch argument This argument controls the branch or commit of the OpenSarToolkit repository which is fetched, making it easier to build locally from a different branch. --- resources/Dockerfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index 725279bc..53998a90 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -73,11 +73,13 @@ RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions COPY constraints.txt $HOME +ARG ost_branch=main + # Invalidate Docker cache if there have been new commits to the repository -ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=version2&per_page=1" last_commit +ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=${ost_branch}&per_page=1" last_commit # Install OST and tutorials -RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git -c constraints.txt +RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git@${ost_branch} -c constraints.txt #RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager #RUN jupyter nbextension enable --py widgetsnbextension From a672a65dd05b76e42a4405bd3afe1087eedeac2c Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 3 Jan 2025 17:28:44 +0100 Subject: [PATCH 48/74] CWL file: set Docker image tag to "version3" --- resources/opensar.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/opensar.cwl b/resources/opensar.cwl index 40ba6a17..29ef4dc2 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -78,7 +78,7 @@ $graph: id: ost_script_1 requirements: DockerRequirement: - dockerPull: quay.io/bcdev/opensartoolkit:latest + dockerPull: quay.io/bcdev/opensartoolkit:version3 baseCommand: - python3 From 8ae2cb381c3e800c5eb6b8965be06c707a587381 Mon Sep 17 00:00:00 2001 From: simonevaccari Date: Wed, 15 Jan 2025 16:46:08 +0000 Subject: [PATCH 49/74] proposed changes to CWL - remove cdse and rename workflow ID --- resources/opensar.cwl | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/resources/opensar.cwl b/resources/opensar.cwl index 91f8fd6d..1f5d1cf7 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -8,7 +8,7 @@ $graph: - class: Workflow label: OST Notebook 1 doc: Preprocessing an S1 image with OST - id: main + id: opensartoolkit requirements: [] inputs: input: @@ -41,14 +41,6 @@ $graph: - BICUBIC_INTERPOLATION label: Resampling method doc: Resampling method to use - cdse-user: - type: string - label: CDSE user - doc: CDSE user name - cdse-password: - type: string - label: CDSE password - doc: Password for the specified CDSE user dry-run: type: boolean label: Dry run @@ -69,8 +61,6 @@ $graph: ard-type: ard-type with-speckle-filter: with-speckle-filter resampling-method: resampling-method - cdse-user: cdse-user - cdse-password: cdse-password dry-run: dry-run out: - ost_ard @@ -119,11 +109,11 @@ $graph: inputBinding: prefix: --resampling-method cdse-user: - type: string + type: string? inputBinding: prefix: --cdse-user cdse-password: - type: string + type: string? inputBinding: prefix: --cdse-password dry-run: From c802394b7285184b1ba1752055c212ce41bee99a Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 20 Mar 2025 11:54:06 +0100 Subject: [PATCH 50/74] Add jq to packages installed by Dockerfile jq is not needed for OpenSarToolkit itself, but can be useful if the same Docker image is reused for JSON manipulation tasks in the EOAP CWL file. --- resources/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index 53998a90..85a6936b 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -40,7 +40,8 @@ RUN groupadd -r ost && \ unzip \ imagemagick \ nodejs \ - npm + npm \ + jq # Install OTB. Use some custom headers when fetching package, since otherwise # the download speed is heavily throttled. From ebc2dcc653331a76b2bac1ac898a846dfd7b02f3 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 20 Mar 2025 15:55:26 +0100 Subject: [PATCH 51/74] Dockerfile: install jq in a separate command For reasons that aren't clear, quay.io builds are failing at the apt install line since jq was added. This commit tries installing jq with an additional apt-get invocation instead. --- resources/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index 85a6936b..7201112b 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -40,8 +40,9 @@ RUN groupadd -r ost && \ unzip \ imagemagick \ nodejs \ - npm \ - jq + npm + +RUN DEBIAN_FRONTEND=noninteractive apt-get install -yq jq # Install OTB. Use some custom headers when fetching package, since otherwise # the download speed is heavily throttled. From 204e266be331d24ae703bd871212dda2fa767308 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 20 Mar 2025 16:29:16 +0100 Subject: [PATCH 52/74] Run apt-get update before installing jq --- resources/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index 7201112b..997eb74f 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -42,7 +42,7 @@ RUN groupadd -r ost && \ nodejs \ npm -RUN DEBIAN_FRONTEND=noninteractive apt-get install -yq jq +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq jq # Install OTB. Use some custom headers when fetching package, since otherwise # the download speed is heavily throttled. From f5b29a042bbbf04fc44ba3e7f84647f9c199a635 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 20 Mar 2025 17:47:34 +0100 Subject: [PATCH 53/74] Update CWL docker requirement to version 4 --- resources/opensar.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/opensar.cwl b/resources/opensar.cwl index 1f5d1cf7..41a388f8 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -69,7 +69,7 @@ $graph: id: ost_script_1 requirements: DockerRequirement: - dockerPull: quay.io/bcdev/opensartoolkit:version3 + dockerPull: quay.io/bcdev/opensartoolkit:version4 NetworkAccess: networkAccess: true From cc03eb8c8eebc9cc764716e0f6f4a3bf329e0d01 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 17 Apr 2025 10:35:48 +0200 Subject: [PATCH 54/74] Preprocessor: correct geometry in STAC; reformat - Update the geometry entry in the STAC output to be a dictionary including a type key rather than just a list of co-ordinates. - Improve source code formatting. --- ost/app/preprocessing.py | 72 ++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 3f42df59..47c71778 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -32,9 +32,11 @@ @click.option("--cdse-user", default="dummy") @click.option("--cdse-password", default="dummy") @click.option( - "--dry-run", is_flag=True, default=False, + "--dry-run", + is_flag=True, + default=False, help="Skip processing and write a placeholder output file instead. " - "Useful for testing." + "Useful for testing.", ) def run( input_: str, @@ -44,7 +46,7 @@ def run( resampling_method: str, cdse_user: str, cdse_password: str, - dry_run: bool + dry_run: bool, ): horizontal_line = "-" * 79 # Used in log output @@ -142,7 +144,9 @@ def run( # create_ard seems to be a prerequisite for create_rgb. if zip_input: s1.create_ard( - infile=s1.get_path(output_path), out_dir=output_path, overwrite=True + infile=s1.get_path(output_path), + out_dir=output_path, + overwrite=True, ) else: s1.create_ard( @@ -186,18 +190,19 @@ def create_dummy_tiff(path: Path) -> None: data = np.linspace(np.arange(100), 50 * np.sin(np.arange(100)), 100) with rasterio.open( - str(path), - 'w', - driver='GTiff', - height=data.shape[0], - width=data.shape[1], - count=1, - dtype=data.dtype, - crs="+proj=latlong", - transform=rasterio.transform.Affine.scale(0.1, 0.1), + str(path), + "w", + driver="GTiff", + height=data.shape[0], + width=data.shape[1], + count=1, + dtype=data.dtype, + crs="+proj=latlong", + transform=rasterio.transform.Affine.scale(0.1, 0.1), ) as dst: dst.write(data, 1) + def get_input_path_from_stac(stac_root: str) -> str: stac_path = pathlib.Path(stac_root) catalog = pystac.Catalog.from_file(str(stac_path / "catalog.json")) @@ -214,9 +219,7 @@ def get_input_path_from_stac(stac_root: str) -> str: LOGGER.info(f"Found SAFE directory at {safe_dir}") return str(safe_dir) else: - raise RuntimeError( - f"No filename for manifest asset in {catalog}" - ) + raise RuntimeError(f"No filename for manifest asset in {catalog}") else: LOGGER.info("No manifest asset found; looking for zip asset") zip_assets = [ @@ -239,7 +242,9 @@ def get_input_path_from_stac(stac_root: str) -> str: return str(zip_path) -def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str) -> None: +def write_stac_for_tiff( + stac_root: str, asset_path: str, scene_id: str +) -> None: LOGGER.info(f"Writing STAC for asset {asset_path} to {stac_root}") ds = rasterio.open(asset_path) asset = pystac.Asset( @@ -251,19 +256,30 @@ def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str) -> None: s = scene_id item = pystac.Item( id="result-item", - geometry=[ - [bb.left, bb.bottom], - [bb.left, bb.top], - [bb.right, bb.top], - [bb.right, bb.bottom], - [bb.left, bb.bottom] - ], + geometry={ + "type": "Polygon", + "coordinates": [ + [bb.left, bb.bottom], + [bb.left, bb.top], + [bb.right, bb.top], + [bb.right, bb.bottom], + [bb.left, bb.bottom], + ], + }, bbox=[bb.left, bb.bottom, bb.right, bb.top], datetime=None, - start_datetime=datetime(*map(int, ( - s[17:21], s[21:23], s[23:25], s[26:28], s[28:30], s[30:32]))), - end_datetime=datetime(*map(int, ( - s[33:37], s[37:39], s[39:41], s[42:44], s[44:46], s[46:48]))), + start_datetime=datetime( + *map( + int, + (s[17:21], s[21:23], s[23:25], s[26:28], s[28:30], s[30:32]), + ) + ), + end_datetime=datetime( + *map( + int, + (s[33:37], s[37:39], s[39:41], s[42:44], s[44:46], s[46:48]), + ) + ), properties={}, # datetime values will be filled in automatically assets={"TIFF": asset}, ) From e17aecd6c515c6c9b231c1b91b27e0fa6b9c051e Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 17 Apr 2025 10:46:33 +0200 Subject: [PATCH 55/74] Add "visual" role to output STAC item --- ost/app/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 47c71778..c3c2bf03 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -248,7 +248,7 @@ def write_stac_for_tiff( LOGGER.info(f"Writing STAC for asset {asset_path} to {stac_root}") ds = rasterio.open(asset_path) asset = pystac.Asset( - roles=["data"], + roles=["data", "visual"], href=asset_path, media_type="image/tiff; application=geotiff;", ) From 96848260d0398b795a799c6f77123ffc76d26010 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 17 Apr 2025 11:39:59 +0200 Subject: [PATCH 56/74] Preprocessor: adjust output location and STAC Output is now written to the result-item subdirectory of the output directory, along with the STAC item itself. The asset link in the STAC item record also points to this new location. --- ost/app/preprocessing.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index c3c2bf03..ac9c5028 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -12,7 +12,9 @@ import pystac import rasterio + LOGGER = logging.getLogger(__name__) +ITEM_ID = "result-item" @click.command() @@ -135,8 +137,10 @@ def run( f"{horizontal_line}" ) + tiff_dir = output_path / ITEM_ID + tiff_dir.mkdir(exist_ok=True) + tiff_path = tiff_dir / f"{s1.start_date}.tif" if dry_run: - tiff_path = output_path / f"{s1.start_date}.tif" LOGGER.info(f"Dry run -- creating dummy output at {tiff_path}") create_dummy_tiff(tiff_path) else: @@ -155,8 +159,7 @@ def run( LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") LOGGER.info(f"Creating RGB at {output_path}") - s1.create_rgb(outfile=output_path.joinpath(f"{s1.start_date}.tif")) - tiff_path = s1.ard_rgb + s1.create_rgb(outfile=tiff_path) LOGGER.info(f"Path to newly created RGB product: {tiff_path}") # Write a STAC catalog and item pointing to the output product. @@ -255,7 +258,7 @@ def write_stac_for_tiff( bb = ds.bounds s = scene_id item = pystac.Item( - id="result-item", + id=ITEM_ID, geometry={ "type": "Polygon", "coordinates": [ @@ -267,6 +270,9 @@ def write_stac_for_tiff( ], }, bbox=[bb.left, bb.bottom, bb.right, bb.top], + # Datetime is required by the STAC specification and schema, even + # when there is no reasonable value for it to take. In such cases + # it is permitted to set datetime to null, but not to omit it. datetime=None, start_datetime=datetime( *map( From 0b47e49b498d363b8a492d74aae2e373d7231cc5 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 17 Apr 2025 18:04:11 +0200 Subject: [PATCH 57/74] Add more STAC asset keys; tile dry-run output - Add the "title" and "gsd" keys to the STAC asset output. - Tile the dummy output for dry runs to make it COG-compliant, and increase the output image size to make sure that it's big enough to tile. --- ost/app/preprocessing.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index ac9c5028..2df950ce 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -164,7 +164,7 @@ def run( # Write a STAC catalog and item pointing to the output product. LOGGER.info("Writing STAC catalogue and item") - write_stac_for_tiff(str(output_path), str(tiff_path), scene_id) + write_stac_for_tiff(str(output_path), str(tiff_path), scene_id, resolution) def copy_zip_input(input_path, output_dir, scene_id): @@ -191,7 +191,7 @@ def create_dummy_tiff(path: Path) -> None: import numpy as np import rasterio - data = np.linspace(np.arange(100), 50 * np.sin(np.arange(100)), 100) + data = np.linspace(np.arange(2000), 50 * np.sin(np.arange(2000)), 2000) with rasterio.open( str(path), "w", @@ -202,6 +202,7 @@ def create_dummy_tiff(path: Path) -> None: dtype=data.dtype, crs="+proj=latlong", transform=rasterio.transform.Affine.scale(0.1, 0.1), + tiled=True ) as dst: dst.write(data, 1) @@ -245,15 +246,15 @@ def get_input_path_from_stac(stac_root: str) -> str: return str(zip_path) -def write_stac_for_tiff( - stac_root: str, asset_path: str, scene_id: str -) -> None: +def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str, gsd: int) -> None: LOGGER.info(f"Writing STAC for asset {asset_path} to {stac_root}") ds = rasterio.open(asset_path) asset = pystac.Asset( roles=["data", "visual"], href=asset_path, media_type="image/tiff; application=geotiff;", + title="OST-processed", + gsd=gsd ) bb = ds.bounds s = scene_id From 914cd27f923c4619299a763eef4fd66cebbddf4d Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 17 Apr 2025 18:24:43 +0200 Subject: [PATCH 58/74] Fix asset writing bug; improve dry-run dummy image - Fix an error due to incorrect pystac usage when creating the STAC asset. - Improve the appearance of the dummy image produced by a dry run. --- ost/app/preprocessing.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 2df950ce..728b9668 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -191,7 +191,9 @@ def create_dummy_tiff(path: Path) -> None: import numpy as np import rasterio - data = np.linspace(np.arange(2000), 50 * np.sin(np.arange(2000)), 2000) + data = np.fromfunction( + lambda x, y: x / 2000 + np.sin(y / 50), (2000, 2000) + ) with rasterio.open( str(path), "w", @@ -202,7 +204,7 @@ def create_dummy_tiff(path: Path) -> None: dtype=data.dtype, crs="+proj=latlong", transform=rasterio.transform.Affine.scale(0.1, 0.1), - tiled=True + tiled=True, ) as dst: dst.write(data, 1) @@ -246,7 +248,9 @@ def get_input_path_from_stac(stac_root: str) -> str: return str(zip_path) -def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str, gsd: int) -> None: +def write_stac_for_tiff( + stac_root: str, asset_path: str, scene_id: str, gsd: int +) -> None: LOGGER.info(f"Writing STAC for asset {asset_path} to {stac_root}") ds = rasterio.open(asset_path) asset = pystac.Asset( @@ -254,7 +258,7 @@ def write_stac_for_tiff(stac_root: str, asset_path: str, scene_id: str, gsd: int href=asset_path, media_type="image/tiff; application=geotiff;", title="OST-processed", - gsd=gsd + extra_fields=dict(gsd=gsd), ) bb = ds.bounds s = scene_id From 39f2a79094b309f0db841ece024aed1ee0134258 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 17 Apr 2025 18:34:07 +0200 Subject: [PATCH 59/74] Preprocessor: tile RGB output TIFFs --- ost/s1/grd_to_ard.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ost/s1/grd_to_ard.py b/ost/s1/grd_to_ard.py index 8dfe5d76..241a0c96 100644 --- a/ost/s1/grd_to_ard.py +++ b/ost/s1/grd_to_ard.py @@ -465,7 +465,7 @@ def ard_to_rgb(infile, outfile, driver="GTiff", to_db=True, shrink_factor=1): ratio_array = ras.scale_to_int(ratio_array, 1, 15, "uint8") meta.update(dtype="uint8") - with rasterio.open(outfile, "w", **meta) as dst: + with rasterio.open(outfile, "w", tiled=True, **meta) as dst: # write file for k, arr in [(1, co_array), (2, cr_array), (3, ratio_array)]: @@ -504,5 +504,5 @@ def ard_to_rgb(infile, outfile, driver="GTiff", to_db=True, shrink_factor=1): co_array = ras.scale_to_int(co_array, -20, 0, "uint8") meta.update(dtype="uint8") - with rasterio.open(outfile, "w", **meta) as dst: + with rasterio.open(outfile, "w", tiled=True, **meta) as dst: dst.write(co_array) From 1e17ef73b71927fbbea09a3327aee299de7ecd4b Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 25 Apr 2025 15:45:49 +0200 Subject: [PATCH 60/74] Add --wipe-cwd option --- ost/app/preprocessing.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 728b9668..28245845 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -14,6 +14,7 @@ LOGGER = logging.getLogger(__name__) +CATALOG_FILENAME = "catalog.json" ITEM_ID = "result-item" @@ -40,6 +41,14 @@ help="Skip processing and write a placeholder output file instead. " "Useful for testing.", ) +@click.option( + "--wipe-cwd", + is_flag=True, + default=False, + help="After processing, delete everything in the current working directory " + "except for the output data and STAC entries. Dangerous, but can be useful " + "when executing as an application package.", +) def run( input_: str, resolution: int, @@ -49,6 +58,7 @@ def run( cdse_user: str, cdse_password: str, dry_run: bool, + wipe_cwd: bool, ): horizontal_line = "-" * 79 # Used in log output @@ -86,7 +96,7 @@ def run( # containing an item which contains an asset for either a zip file # (zipped SAFE archive) or a SAFE manifest (which is used to determine # the location of a non-zipped SAFE directory). The returned path is - # either the zip file or the SAFE directory + # either the zip file or the SAFE directory. input_path = get_input_path_from_stac(input_) # We assume that any file input path is a zip, and any non-file input @@ -165,7 +175,9 @@ def run( # Write a STAC catalog and item pointing to the output product. LOGGER.info("Writing STAC catalogue and item") write_stac_for_tiff(str(output_path), str(tiff_path), scene_id, resolution) - + if wipe_cwd: + LOGGER.info("Removing everything except output from CWD") + delete_cwd_contents() def copy_zip_input(input_path, output_dir, scene_id): year = scene_id[17:21] @@ -297,12 +309,24 @@ def write_stac_for_tiff( catalog = pystac.Catalog( id="catalog", description="Root catalog", - href=f"{stac_root}/catalog.json", + href=f"{stac_root}/{CATALOG_FILENAME}", ) catalog.add_item(item) catalog.make_all_asset_hrefs_relative() catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) +def delete_cwd_contents(): + """Delete everything except the output data and STAC files""" + + cwd = Path.cwd() + for member in cwd.iterdir(): + if member.name not in {CATALOG_FILENAME, ITEM_ID}: + if member.is_dir(): + shutil.rmtree(member) + if member.is_file(): + member.unlink() + + if __name__ == "__main__": sys.exit(run()) From 5456f0b794b13d104019aacaec3b4652735bf2bd Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 25 Apr 2025 18:33:12 +0200 Subject: [PATCH 61/74] Several updates for version 5 - Set HOME in Dockerfile before using it. - In the dockerfile, set the git ref for the OST repository to version5. - Add the --wipe-cwd argument to the CLI tool invocation in the CWL. - Update the Docker image version tag in the CWL to version5. --- resources/Dockerfile | 4 +++- resources/opensar.cwl | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index 997eb74f..b1defc1d 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -3,6 +3,8 @@ FROM ubuntu:20.04 LABEL maintainer="Andreas Vollrath, FAO" LABEL OpenSARToolkit='0.12.3' +ENV HOME=/home/ost + # set work directory to home and download snap WORKDIR /home/ost @@ -75,7 +77,7 @@ RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions COPY constraints.txt $HOME -ARG ost_branch=main +ARG ost_branch=version5 # Invalidate Docker cache if there have been new commits to the repository ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=${ost_branch}&per_page=1" last_commit diff --git a/resources/opensar.cwl b/resources/opensar.cwl index 41a388f8..31ec8d36 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -69,14 +69,15 @@ $graph: id: ost_script_1 requirements: DockerRequirement: - dockerPull: quay.io/bcdev/opensartoolkit:version4 + dockerPull: quay.io/bcdev/opensartoolkit:version5 NetworkAccess: networkAccess: true baseCommand: - python3 - /usr/local/lib/python3.8/dist-packages/ost/app/preprocessing.py - arguments: [] + arguments: + - --wipe-cwd inputs: input: type: Directory From bb136cf123590043229b5032615073d0b923ac9e Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 16 May 2025 16:17:36 +0200 Subject: [PATCH 62/74] Preprocessor: fix bug in determining input path With this commit, the preprocessor resolves the path to the manifest asset relative to the STAC item containing that asset, not (as previously and erroneously) relative to the catalog (which worked only when the catalog and item were in the same directory). --- ost/app/preprocessing.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 28245845..10cff385 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -227,14 +227,18 @@ def get_input_path_from_stac(stac_root: str) -> str: item_links = [link for link in catalog.links if link.rel == "item"] assert len(item_links) == 1 item_link = item_links[0] - item = pystac.Item.from_file(str(stac_path / item_link.href)) + item_path = stac_path / item_link.href + item = pystac.Item.from_file(str(item_path)) if "manifest" in item.assets: - LOGGER.info(f"Found manifest asset in {catalog}") + LOGGER.info(f"Found manifest asset in {str(item_path)}") manifest_asset = item.assets["manifest"] if "filename" in manifest_asset.extra_fields: filename = pathlib.Path(manifest_asset.extra_fields["filename"]) - safe_dir = stac_path / filename.parent - LOGGER.info(f"Found SAFE directory at {safe_dir}") + LOGGER.info(f"Asset path in item: {str(filename)}") + safe_dir = item_path / filename.parent + LOGGER.info(f"Resolved SAFE directory path to {safe_dir}") + assert safe_dir.exists(), "SAFE directory does not exist" + assert safe_dir.is_dir(), "SAFE directory is not a directory" return str(safe_dir) else: raise RuntimeError(f"No filename for manifest asset in {catalog}") From af17cc8e8f2bddd81b8c6b160a7156865c868254 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 16 May 2025 16:54:39 +0200 Subject: [PATCH 63/74] preprocessor: a further input resolution bugfix --- ost/app/preprocessing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 10cff385..aff82d75 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -235,7 +235,10 @@ def get_input_path_from_stac(stac_root: str) -> str: if "filename" in manifest_asset.extra_fields: filename = pathlib.Path(manifest_asset.extra_fields["filename"]) LOGGER.info(f"Asset path in item: {str(filename)}") - safe_dir = item_path / filename.parent + # The SAFE directory is the direct parent of the manifest file, + # and we resolve it relative to the parent directory of the STAC + # item. + safe_dir = item_path.parent / filename.parent LOGGER.info(f"Resolved SAFE directory path to {safe_dir}") assert safe_dir.exists(), "SAFE directory does not exist" assert safe_dir.is_dir(), "SAFE directory is not a directory" From 52e4fc6602dd75c752c97918a94bb6993c2662bf Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 30 May 2025 13:28:37 +0200 Subject: [PATCH 64/74] Update Docker and CWL references to version6 --- resources/Dockerfile | 2 +- resources/opensar.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index b1defc1d..b704662f 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -77,7 +77,7 @@ RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions COPY constraints.txt $HOME -ARG ost_branch=version5 +ARG ost_branch=version6 # Invalidate Docker cache if there have been new commits to the repository ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=${ost_branch}&per_page=1" last_commit diff --git a/resources/opensar.cwl b/resources/opensar.cwl index 31ec8d36..c316fe26 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -69,7 +69,7 @@ $graph: id: ost_script_1 requirements: DockerRequirement: - dockerPull: quay.io/bcdev/opensartoolkit:version5 + dockerPull: quay.io/bcdev/opensartoolkit:version6 NetworkAccess: networkAccess: true From d5f4d5d3437fbca2d71a5e923a63ad4f4e3da9ca Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Tue, 1 Jul 2025 16:10:56 +0200 Subject: [PATCH 65/74] Fix STAC geometry structure; validate STAC output - Fix the incorrectly structured geometry value in the STAC output (it is now a singleton array of an array of co-ordinate arrays, rather than a mere array of co-ordinate arrays). - Validate the catalogue using pystac after writing it. --- ost/app/preprocessing.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index aff82d75..6365b2ed 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -285,13 +285,13 @@ def write_stac_for_tiff( id=ITEM_ID, geometry={ "type": "Polygon", - "coordinates": [ + "coordinates": [[ [bb.left, bb.bottom], [bb.left, bb.top], [bb.right, bb.top], [bb.right, bb.bottom], [bb.left, bb.bottom], - ], + ]], }, bbox=[bb.left, bb.bottom, bb.right, bb.top], # Datetime is required by the STAC specification and schema, even @@ -322,6 +322,11 @@ def write_stac_for_tiff( catalog.make_all_asset_hrefs_relative() catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) + # We validate after saving, so if validation fails the invalid STAC output + # is available for debugging. + n_validated = catalog.validate_all() + LOGGER.info(f"{n_validated} STAC item(s) successfully validated.") + def delete_cwd_contents(): """Delete everything except the output data and STAC files""" From 07da75729ea49d604dca2dc34b0aac39513a76ea Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Wed, 2 Jul 2025 10:58:56 +0200 Subject: [PATCH 66/74] Update Docker and CWL references to version 7 --- resources/Dockerfile | 2 +- resources/opensar.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index b704662f..52d23119 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -77,7 +77,7 @@ RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions COPY constraints.txt $HOME -ARG ost_branch=version6 +ARG ost_branch=version7 # Invalidate Docker cache if there have been new commits to the repository ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=${ost_branch}&per_page=1" last_commit diff --git a/resources/opensar.cwl b/resources/opensar.cwl index c316fe26..c2405f1a 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -69,7 +69,7 @@ $graph: id: ost_script_1 requirements: DockerRequirement: - dockerPull: quay.io/bcdev/opensartoolkit:version6 + dockerPull: quay.io/bcdev/opensartoolkit:version7 NetworkAccess: networkAccess: true From 3e538cc973ea8a9061b31d7a72800d2c7d2632b4 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 24 Jul 2025 16:40:10 +0200 Subject: [PATCH 67/74] Preprocessing: implement COG output --- ost/app/preprocessing.py | 59 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 6365b2ed..043f8e0c 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -9,8 +9,11 @@ from ost import Sentinel1Scene import click +import numpy as np import pystac import rasterio +from rasterio.enums import Resampling +from rasterio.shutil import copy LOGGER = logging.getLogger(__name__) @@ -149,10 +152,10 @@ def run( tiff_dir = output_path / ITEM_ID tiff_dir.mkdir(exist_ok=True) - tiff_path = tiff_dir / f"{s1.start_date}.tif" + non_cog_tiff_path = tiff_dir / f"{s1.start_date}.tif" if dry_run: - LOGGER.info(f"Dry run -- creating dummy output at {tiff_path}") - create_dummy_tiff(tiff_path) + LOGGER.info(f"Dry run -- creating dummy output at {non_cog_tiff_path}") + create_dummy_tiff(non_cog_tiff_path) else: LOGGER.info(f"Creating ARD at {output_path}") # create_ard seems to be a prerequisite for create_rgb. @@ -169,8 +172,18 @@ def run( LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") LOGGER.info(f"Creating RGB at {output_path}") - s1.create_rgb(outfile=tiff_path) - LOGGER.info(f"Path to newly created RGB product: {tiff_path}") + s1.create_rgb(outfile=non_cog_tiff_path) + LOGGER.info(f"Path to newly created RGB product: {non_cog_tiff_path}") + + with rasterio.open(non_cog_tiff_path) as src: + array = src.read() # Read array + profile = src.profile # Get the metadata profile + + tiff_path = pathlib.Path(str(non_cog_tiff_path[:-4]) + '_cog.tif') + transparency_indexes = np.isnan(array) + save_as_cog(array, profile, tiff_path, transparency_indexes, + dtype=profile['dtype']) + LOGGER.info(f"COG file saved: {tiff_path}") # Write a STAC catalog and item pointing to the output product. LOGGER.info("Writing STAC catalogue and item") @@ -340,5 +353,41 @@ def delete_cwd_contents(): member.unlink() +def save_as_cog(result_array: np.ndarray, profile, outfile_name, + transparency_indexes=None, dtype=rasterio.uint8): + """ + Saves an array as a Cloud-Optimized GeoTIFF (COG) using rasterio. + """ + factors = [2, 4, 8, 16, 32, 64] + + if transparency_indexes is not None: + result_array[transparency_indexes] = 0 + + with rasterio.Env(): + profile.update(dtype=dtype, + count=result_array.shape[0], + compress="deflate", + tiled=True, + blockxsize=256, + blockysize=256, + driver='GTiff', + BIGTIFF='IF_NEEDED', + nodata=0) + + temp_file = outfile_name.replace('.tif', '_temp.tif') + + try: + with rasterio.open(temp_file, "w", **profile) as dst: + dst.write(result_array.astype(dtype)) # writes all bands + dst.build_overviews(factors, Resampling.nearest) + dst.update_tags(ns='rio_overview', resampling='nearest') + + copy(temp_file, outfile_name, copy_src_overviews=True, driver='COG', + compress="deflate") + finally: + if os.path.exists(temp_file): + os.remove(temp_file) + + if __name__ == "__main__": sys.exit(run()) From 44a73c7e6f188357a5fc15326cd7ca1f847e09ba Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 24 Jul 2025 16:54:46 +0200 Subject: [PATCH 68/74] Fix COG generation bug --- ost/app/preprocessing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 043f8e0c..e9d953ea 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -179,10 +179,10 @@ def run( array = src.read() # Read array profile = src.profile # Get the metadata profile - tiff_path = pathlib.Path(str(non_cog_tiff_path[:-4]) + '_cog.tif') + tiff_path = pathlib.Path(str(non_cog_tiff_path)[:-4] + "_cog.tif") transparency_indexes = np.isnan(array) save_as_cog(array, profile, tiff_path, transparency_indexes, - dtype=profile['dtype']) + dtype=profile["dtype"]) LOGGER.info(f"COG file saved: {tiff_path}") # Write a STAC catalog and item pointing to the output product. From 768d88951c1e97cb95d17c79e5239e224fa01a67 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 24 Jul 2025 16:58:54 +0200 Subject: [PATCH 69/74] Fix COG generation bug --- ost/app/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index e9d953ea..1ce40da2 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -181,7 +181,7 @@ def run( tiff_path = pathlib.Path(str(non_cog_tiff_path)[:-4] + "_cog.tif") transparency_indexes = np.isnan(array) - save_as_cog(array, profile, tiff_path, transparency_indexes, + save_as_cog(array, profile, str(tiff_path), transparency_indexes, dtype=profile["dtype"]) LOGGER.info(f"COG file saved: {tiff_path}") From ce130b1bb93ba826df6a3dedbde60eeb45aab2e7 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Thu, 24 Jul 2025 17:32:49 +0200 Subject: [PATCH 70/74] Delete original TIFF after creating COG --- ost/app/preprocessing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 1ce40da2..c4a1ec64 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -184,6 +184,8 @@ def run( save_as_cog(array, profile, str(tiff_path), transparency_indexes, dtype=profile["dtype"]) LOGGER.info(f"COG file saved: {tiff_path}") + non_cog_tiff_path.unlink() + LOGGER.info(f"Non-COG TIFF deleted: {non_cog_tiff_path}") # Write a STAC catalog and item pointing to the output product. LOGGER.info("Writing STAC catalogue and item") From 3ac87b8314584e76c41fe23e7af3e71372bc937d Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 25 Jul 2025 13:16:14 +0200 Subject: [PATCH 71/74] Update media_type in STAC output Add "profile=cloud-optimized", since we now output a valid COG. --- ost/app/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index c4a1ec64..8c0e724c 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -290,7 +290,7 @@ def write_stac_for_tiff( asset = pystac.Asset( roles=["data", "visual"], href=asset_path, - media_type="image/tiff; application=geotiff;", + media_type="image/tiff; application=geotiff; profile=cloud-optimized", title="OST-processed", extra_fields=dict(gsd=gsd), ) From cc654cd01c90f2fa17ba292e2545d3b320b36b4f Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 25 Jul 2025 16:54:58 +0200 Subject: [PATCH 72/74] Add raster bands data to STAC output --- ost/app/preprocessing.py | 108 +++++++++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 27 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index 8c0e724c..ce4a1e33 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -1,4 +1,5 @@ from datetime import datetime +import json import sys import os import pathlib @@ -6,6 +7,7 @@ import pprint import logging import shutil +import subprocess from ost import Sentinel1Scene import click @@ -181,8 +183,13 @@ def run( tiff_path = pathlib.Path(str(non_cog_tiff_path)[:-4] + "_cog.tif") transparency_indexes = np.isnan(array) - save_as_cog(array, profile, str(tiff_path), transparency_indexes, - dtype=profile["dtype"]) + save_as_cog( + array, + profile, + str(tiff_path), + transparency_indexes, + dtype=profile["dtype"], + ) LOGGER.info(f"COG file saved: {tiff_path}") non_cog_tiff_path.unlink() LOGGER.info(f"Non-COG TIFF deleted: {non_cog_tiff_path}") @@ -194,6 +201,7 @@ def run( LOGGER.info("Removing everything except output from CWD") delete_cwd_contents() + def copy_zip_input(input_path, output_dir, scene_id): year = scene_id[17:21] month = scene_id[21:23] @@ -292,7 +300,10 @@ def write_stac_for_tiff( href=asset_path, media_type="image/tiff; application=geotiff; profile=cloud-optimized", title="OST-processed", - extra_fields=dict(gsd=gsd), + extra_fields={ + "gsd": gsd, + "raster:bands": bands_data(asset_path, gsd), + }, ) bb = ds.bounds s = scene_id @@ -300,13 +311,15 @@ def write_stac_for_tiff( id=ITEM_ID, geometry={ "type": "Polygon", - "coordinates": [[ - [bb.left, bb.bottom], - [bb.left, bb.top], - [bb.right, bb.top], - [bb.right, bb.bottom], - [bb.left, bb.bottom], - ]], + "coordinates": [ + [ + [bb.left, bb.bottom], + [bb.left, bb.top], + [bb.right, bb.top], + [bb.right, bb.bottom], + [bb.left, bb.bottom], + ] + ], }, bbox=[bb.left, bb.bottom, bb.right, bb.top], # Datetime is required by the STAC specification and schema, even @@ -327,6 +340,9 @@ def write_stac_for_tiff( ), properties={}, # datetime values will be filled in automatically assets={"TIFF": asset}, + stac_extensions=[ + "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + ], ) catalog = pystac.Catalog( id="catalog", @@ -355,8 +371,13 @@ def delete_cwd_contents(): member.unlink() -def save_as_cog(result_array: np.ndarray, profile, outfile_name, - transparency_indexes=None, dtype=rasterio.uint8): +def save_as_cog( + result_array: np.ndarray, + profile, + outfile_name, + transparency_indexes=None, + dtype=rasterio.uint8, +): """ Saves an array as a Cloud-Optimized GeoTIFF (COG) using rasterio. """ @@ -366,30 +387,63 @@ def save_as_cog(result_array: np.ndarray, profile, outfile_name, result_array[transparency_indexes] = 0 with rasterio.Env(): - profile.update(dtype=dtype, - count=result_array.shape[0], - compress="deflate", - tiled=True, - blockxsize=256, - blockysize=256, - driver='GTiff', - BIGTIFF='IF_NEEDED', - nodata=0) - - temp_file = outfile_name.replace('.tif', '_temp.tif') + profile.update( + dtype=dtype, + count=result_array.shape[0], + compress="deflate", + tiled=True, + blockxsize=256, + blockysize=256, + driver="GTiff", + BIGTIFF="IF_NEEDED", + nodata=0, + ) + + temp_file = outfile_name.replace(".tif", "_temp.tif") try: with rasterio.open(temp_file, "w", **profile) as dst: dst.write(result_array.astype(dtype)) # writes all bands dst.build_overviews(factors, Resampling.nearest) - dst.update_tags(ns='rio_overview', resampling='nearest') - - copy(temp_file, outfile_name, copy_src_overviews=True, driver='COG', - compress="deflate") + dst.update_tags(ns="rio_overview", resampling="nearest") + + copy( + temp_file, + outfile_name, + copy_src_overviews=True, + driver="COG", + compress="deflate", + ) finally: if os.path.exists(temp_file): os.remove(temp_file) +def bands_data(filename, resolution): + process = subprocess.run( + ["gdalinfo", "-json", "-stats", "-hist", filename], capture_output=True + ) + result = process.stdout + gdal_data = json.loads(result) + stac_data = [band_data(band, resolution) for band in gdal_data["bands"]] + return stac_data + + +def band_data(gdal_band, resolution): + return { + "histogram": gdal_band["histogram"], + "statistics": { + "mean": gdal_band["mean"], + "stddev": gdal_band["stdDev"], + "maximum": gdal_band["maximum"], + "minimum": gdal_band["minimum"], + "valid_percent": float( + gdal_band["metadata"][""]["STATISTICS_VALID_PERCENT"] + ), + }, + "spatial_resolution": resolution, + } + + if __name__ == "__main__": sys.exit(run()) From 1e6729a39b587d067a8ec09e160e5217af230a86 Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 25 Jul 2025 17:21:10 +0200 Subject: [PATCH 73/74] Add rendering metadata to STAC output --- ost/app/preprocessing.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py index ce4a1e33..f0b8c364 100644 --- a/ost/app/preprocessing.py +++ b/ost/app/preprocessing.py @@ -295,6 +295,7 @@ def write_stac_for_tiff( ) -> None: LOGGER.info(f"Writing STAC for asset {asset_path} to {stac_root}") ds = rasterio.open(asset_path) + bands = bands_data(asset_path, gsd) asset = pystac.Asset( roles=["data", "visual"], href=asset_path, @@ -302,7 +303,7 @@ def write_stac_for_tiff( title="OST-processed", extra_fields={ "gsd": gsd, - "raster:bands": bands_data(asset_path, gsd), + "raster:bands": bands, }, ) bb = ds.bounds @@ -338,10 +339,26 @@ def write_stac_for_tiff( (s[33:37], s[37:39], s[39:41], s[42:44], s[44:46], s[46:48]), ) ), - properties={}, # datetime values will be filled in automatically + properties={ + # datetime values will be filled in automatically + "title": "Open Sar Toolkit ARD Processing", + "renders": { + "render-tiff": { + "title": "ARD-processed", + "assets": ["TIFF"], + "nodata": "NaN", + "rescale": [ + [b["statistics"]["minimum"], b["statistics"]["maximum"]] for b in bands + ], + "resampling": "nearest", + "colormap_name": "ylorrd", + } + }, + }, assets={"TIFF": asset}, stac_extensions=[ "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + "https://stac-extensions.github.io/render/v2.0.0/schema.json", ], ) catalog = pystac.Catalog( From f4d43d2de73180c70d865b32815422aaf4981d3f Mon Sep 17 00:00:00 2001 From: Pontus Lurcock Date: Fri, 25 Jul 2025 18:08:40 +0200 Subject: [PATCH 74/74] Update Docker and CWL references to version 8 --- resources/Dockerfile | 2 +- resources/opensar.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/Dockerfile b/resources/Dockerfile index 52d23119..4a1a4cfa 100644 --- a/resources/Dockerfile +++ b/resources/Dockerfile @@ -77,7 +77,7 @@ RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions COPY constraints.txt $HOME -ARG ost_branch=version7 +ARG ost_branch=version8 # Invalidate Docker cache if there have been new commits to the repository ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=${ost_branch}&per_page=1" last_commit diff --git a/resources/opensar.cwl b/resources/opensar.cwl index c2405f1a..bc69c714 100644 --- a/resources/opensar.cwl +++ b/resources/opensar.cwl @@ -69,7 +69,7 @@ $graph: id: ost_script_1 requirements: DockerRequirement: - dockerPull: quay.io/bcdev/opensartoolkit:version7 + dockerPull: quay.io/bcdev/opensartoolkit:version8 NetworkAccess: networkAccess: true