diff --git a/.gitignore b/.gitignore index 10d04cff..f37d901f 100755 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,4 @@ dmypy.json # IDE .vscode +.idea diff --git a/Dockerfile b/Dockerfile index 16004cb1..50e94995 100755 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,13 @@ ENV TBX="esa-snap_sentinel_unix_${TBX_VERSION}_${TBX_SUBVERSION}.sh" \ HOME=/home/ost \ PATH=$PATH:/home/ost/programs/snap/bin:/home/ost/programs/OTB-${OTB_VERSION}-Linux64/bin +RUN apt-get update && apt-get install -yq wget libquadmath0 + +RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/libgfortran3_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i libgfortran3_6.4.0-17ubuntu1_amd64.deb + # install all dependencies RUN groupadd -r ost && \ useradd -r -g ost ost && \ @@ -29,7 +36,6 @@ RUN groupadd -r ost && \ libgdal-dev \ python3-gdal \ libspatialindex-dev \ - libgfortran3 \ wget \ unzip \ imagemagick \ @@ -46,7 +52,7 @@ RUN alias python=python3 && \ rm $TBX && \ rm snap.varfile && \ cd /home/ost/programs && \ - wget https://www.orfeo-toolbox.org/packages/${OTB} && \ + wget https://www.orfeo-toolbox.org/packages/archives/OTB/${OTB} && \ chmod +x $OTB && \ ./${OTB} && \ rm -f OTB-${OTB_VERSION}-Linux64.run @@ -60,11 +66,15 @@ RUN /home/ost/programs/snap/bin/snap --nosplash --nogui --modules --update-all 2 # set usable memory to 12G RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions +COPY requirements.txt $HOME + # get OST and tutorials -RUN python3 -m pip install git+https://github.com/ESA-PhiLab/OpenSarToolkit.git && \ - git clone https://github.com/ESA-PhiLab/OST_Notebooks && \ - jupyter labextension install @jupyter-widgets/jupyterlab-manager && \ - jupyter nbextension enable --py widgetsnbextension +RUN python3 -m pip install git+https://github.com/ESA-PhiLab/OpenSarToolkit.git -c requirements.txt && \ + git clone https://github.com/ESA-PhiLab/OST_Notebooks + +#RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager +#RUN jupyter nbextension enable --py widgetsnbextension +RUN pip install widgetsnbextension EXPOSE 8888 CMD jupyter lab --ip='0.0.0.0' --port=8888 --no-browser --allow-root diff --git a/examples/application-package/README b/examples/application-package/README new file mode 100644 index 00000000..e970ab2a --- /dev/null +++ b/examples/application-package/README @@ -0,0 +1,9 @@ +The contents of this directory support testing of the OpenSarToolkit +CWL Workflow as an OGC EO Application Package. The JSON files +(which in a real deployment would be generated by the EOAP platform's +data stage-in process) provide a STAC catalogue for a specified input +item. The file input.yaml specifies the parameters for the workflow. +After setting the parameters appropriately in input.yaml, the workflow +can be executed in the manner of an Application Package by running + +cwltool opensar.cwl inputs.cwl diff --git a/examples/application-package/SAR/GRD/2022/10/04/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json b/examples/application-package/SAR/GRD/2022/10/04/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json new file mode 100644 index 00000000..299f090f --- /dev/null +++ b/examples/application-package/SAR/GRD/2022/10/04/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json @@ -0,0 +1,24 @@ +{ + "stac_version": "1.1.0", + "type": "Feature", + "id": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB", + "geometry": null, + "properties": { + "datetime": "2022-10-04T16:43:16Z", + "platform": "sentinel-1a", + "constellation": "sentinel-1" + }, + "assets": { + "GRD": { + "type": "application/zip", + "roles": [ "data" ], + "href": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.zip" + } + }, + "links": [ + { + "rel": "parent", + "href": "../catalog.json" + } + ] +} diff --git a/examples/application-package/SAR/GRD/2022/10/04/catalog.json b/examples/application-package/SAR/GRD/2022/10/04/catalog.json new file mode 100644 index 00000000..bfae5594 --- /dev/null +++ b/examples/application-package/SAR/GRD/2022/10/04/catalog.json @@ -0,0 +1,13 @@ +{ + "stac_version": "1.1.0", + "id": "catalog", + "type": "Catalog", + "description": "Root catalog", + "links": [ + { + "type": "application/json", + "rel": "item", + "href": "./S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json" + } + ] +} diff --git a/examples/application-package/inputs.yaml b/examples/application-package/inputs.yaml new file mode 100644 index 00000000..cac17449 --- /dev/null +++ b/examples/application-package/inputs.yaml @@ -0,0 +1,11 @@ +--- +input: + class: Directory + path: /data/opensar/SAR/GRD/2022/10/04 +resolution: 100 +ard-type: Earth-Engine +with-speckle-filter: false +resampling-method: BILINEAR_INTERPOLATION +cdse-user: +cdse-password: +dry-run: true diff --git a/ost/app/__init__.py b/ost/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ost/app/preprocessing.py b/ost/app/preprocessing.py new file mode 100644 index 00000000..f0b8c364 --- /dev/null +++ b/ost/app/preprocessing.py @@ -0,0 +1,466 @@ +from datetime import datetime +import json +import sys +import os +import pathlib +from pathlib import Path +import pprint +import logging +import shutil +import subprocess + +from ost import Sentinel1Scene +import click +import numpy as np +import pystac +import rasterio +from rasterio.enums import Resampling +from rasterio.shutil import copy + + +LOGGER = logging.getLogger(__name__) +CATALOG_FILENAME = "catalog.json" +ITEM_ID = "result-item" + + +@click.command() +@click.argument("input_", metavar="input") +@click.option("--resolution", default=100) +@click.option( + "--ard-type", + type=click.Choice(["OST_GTC", "OST-RTC", "CEOS", "Earth-Engine"]), + default="Earth-Engine", +) +@click.option("--with-speckle-filter", is_flag=True, default=False) +@click.option( + "--resampling-method", + type=click.Choice(["BILINEAR_INTERPOLATION", "BICUBIC_INTERPOLATION"]), + default="BILINEAR_INTERPOLATION", +) +@click.option("--cdse-user", default="dummy") +@click.option("--cdse-password", default="dummy") +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Skip processing and write a placeholder output file instead. " + "Useful for testing.", +) +@click.option( + "--wipe-cwd", + is_flag=True, + default=False, + help="After processing, delete everything in the current working directory " + "except for the output data and STAC entries. Dangerous, but can be useful " + "when executing as an application package.", +) +def run( + input_: str, + resolution: int, + ard_type: str, + with_speckle_filter: bool, + resampling_method: str, + cdse_user: str, + cdse_password: str, + dry_run: bool, + wipe_cwd: bool, +): + horizontal_line = "-" * 79 # Used in log output + + logging.basicConfig(level=logging.INFO) + # from ost.helpers.settings import set_log_level + # import logging + # set_log_level(logging.DEBUG) + + scene_presets = { + # very first IW (VV/VH) S1 image available over Istanbul/Turkey + # NOTE:only available via ASF data mirror + "istanbul": "S1A_IW_GRDH_1SDV_20141003T040550_20141003T040619_002660_002F64_EC04", + # ??? + "unknown": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB", + # IW scene (dual-polarised HH/HV) over Norway/Spitzbergen + "spitzbergen": "S1B_IW_GRDH_1SDH_20200325T150411_20200325T150436_020850_02789D_2B85", + # IW scene (single-polarised VV) over Ecuadorian Amazon + "ecuador": "S1A_IW_GRDH_1SSV_20150205T232009_20150205T232034_004494_00583A_1C80", + # EW scene (dual-polarised VV/VH) over Azores + # (needs a different DEM,see ARD parameters below) + "azores": "S1B_EW_GRDM_1SDV_20200303T193150_20200303T193250_020532_026E82_5CE9", + # EW scene (dual-polarised HH/HV) over Greenland + "greenland": "S1B_EW_GRDM_1SDH_20200511T205319_20200511T205419_021539_028E4E_697E", + # Stripmap mode S5 scene (dual-polarised VV/VH) over Germany + "germany": "S1B_S5_GRDH_1SDV_20170104T052519_20170104T052548_003694_006587_86AB", + } + + # "When executed, the Application working directory is also the Application + # output directory. Any file created by the Application should be added + # under that directory." -- https://docs.ogc.org/bp/20-089r1.html#toc20 + output_dir = os.getcwd() + output_path = Path(output_dir) + + # We expect input to be the path to a directory containing a STAC catalog + # containing an item which contains an asset for either a zip file + # (zipped SAFE archive) or a SAFE manifest (which is used to determine + # the location of a non-zipped SAFE directory). The returned path is + # either the zip file or the SAFE directory. + input_path = get_input_path_from_stac(input_) + + # We assume that any file input path is a zip, and any non-file input + # path is a SAFE directory. + zip_input = pathlib.Path(input_path).is_file() + LOGGER.info(f"Input is {'zip' if zip_input else 'SAFE directory'}") + + scene_id = input_path[input_path.rfind("/") + 1 : input_path.rfind(".")] + if zip_input: + copy_zip_input(input_path, output_dir, scene_id) + + # Instantiate a Sentinel1Scene from the specified scene identifier + s1 = Sentinel1Scene(scene_id) + s1.info() # write scene summary information to stdout + if zip_input: + s1.download( + output_path, mirror="5", uname=cdse_user, pword=cdse_password + ) + + single_ard = s1.ard_parameters["single_ARD"] + # Set ARD type. Choices: "OST_GTC", "OST-RTC", "CEOS", "Earth Engine" + s1.update_ard_parameters(ard_type) + LOGGER.info( + f"{horizontal_line}\n" + f"Dictionary of Earth Engine ARD parameters:\n" + f"{horizontal_line}\n" + f"{pprint.pformat(single_ard)}\n" + f"{horizontal_line}" + ) + + # Customize ARD parameters + single_ard["resolution"] = resolution + single_ard["remove_speckle"] = with_speckle_filter + single_ard["dem"][ + "image_resampling" + ] = resampling_method # default: BICUBIC_INTERPOLATION + single_ard["to_tif"] = True + # single_ard['product_type'] = 'RTC-gamma0' + + # uncomment this for the Azores EW scene + # s1.ard_parameters['single_ARD']['dem']['dem_name'] = 'GETASSE30' + + LOGGER.info( + f"{horizontal_line}\n" + "Dictionary of customized ARD parameters for final scene processing:\n" + f"{horizontal_line}\n" + f"{pprint.pformat(single_ard)}\n" + f"{horizontal_line}" + ) + + tiff_dir = output_path / ITEM_ID + tiff_dir.mkdir(exist_ok=True) + non_cog_tiff_path = tiff_dir / f"{s1.start_date}.tif" + if dry_run: + LOGGER.info(f"Dry run -- creating dummy output at {non_cog_tiff_path}") + create_dummy_tiff(non_cog_tiff_path) + else: + LOGGER.info(f"Creating ARD at {output_path}") + # create_ard seems to be a prerequisite for create_rgb. + if zip_input: + s1.create_ard( + infile=s1.get_path(output_path), + out_dir=output_path, + overwrite=True, + ) + else: + s1.create_ard( + infile=input_path, out_dir=output_path, overwrite=True + ) + + LOGGER.info(f"Path to newly created ARD product: {s1.ard_dimap}") + LOGGER.info(f"Creating RGB at {output_path}") + s1.create_rgb(outfile=non_cog_tiff_path) + LOGGER.info(f"Path to newly created RGB product: {non_cog_tiff_path}") + + with rasterio.open(non_cog_tiff_path) as src: + array = src.read() # Read array + profile = src.profile # Get the metadata profile + + tiff_path = pathlib.Path(str(non_cog_tiff_path)[:-4] + "_cog.tif") + transparency_indexes = np.isnan(array) + save_as_cog( + array, + profile, + str(tiff_path), + transparency_indexes, + dtype=profile["dtype"], + ) + LOGGER.info(f"COG file saved: {tiff_path}") + non_cog_tiff_path.unlink() + LOGGER.info(f"Non-COG TIFF deleted: {non_cog_tiff_path}") + + # Write a STAC catalog and item pointing to the output product. + LOGGER.info("Writing STAC catalogue and item") + write_stac_for_tiff(str(output_path), str(tiff_path), scene_id, resolution) + if wipe_cwd: + LOGGER.info("Removing everything except output from CWD") + delete_cwd_contents() + + +def copy_zip_input(input_path, output_dir, scene_id): + year = scene_id[17:21] + month = scene_id[21:23] + day = scene_id[23:25] + output_subdir = f"{output_dir}/SAR/GRD/{year}/{month}/{day}" + os.makedirs(output_subdir, exist_ok=True) + try: + scene_path = f"{output_subdir}/{scene_id}" + try: + os.link(input_path, f"{scene_path}.zip") + except OSError as e: + LOGGER.warning("Exception linking input data", exc_info=e) + LOGGER.warning("Attempting to copy instead.") + shutil.copy2(input_path, f"{scene_path}.zip") + with open(f"{scene_path}.downloaded", mode="w") as f: + f.write("successfully found here") + except Exception as e: + LOGGER.warning("Exception linking/copying input data", exc_info=e) + + +def create_dummy_tiff(path: Path) -> None: + import numpy as np + import rasterio + + data = np.fromfunction( + lambda x, y: x / 2000 + np.sin(y / 50), (2000, 2000) + ) + with rasterio.open( + str(path), + "w", + driver="GTiff", + height=data.shape[0], + width=data.shape[1], + count=1, + dtype=data.dtype, + crs="+proj=latlong", + transform=rasterio.transform.Affine.scale(0.1, 0.1), + tiled=True, + ) as dst: + dst.write(data, 1) + + +def get_input_path_from_stac(stac_root: str) -> str: + stac_path = pathlib.Path(stac_root) + catalog = pystac.Catalog.from_file(str(stac_path / "catalog.json")) + item_links = [link for link in catalog.links if link.rel == "item"] + assert len(item_links) == 1 + item_link = item_links[0] + item_path = stac_path / item_link.href + item = pystac.Item.from_file(str(item_path)) + if "manifest" in item.assets: + LOGGER.info(f"Found manifest asset in {str(item_path)}") + manifest_asset = item.assets["manifest"] + if "filename" in manifest_asset.extra_fields: + filename = pathlib.Path(manifest_asset.extra_fields["filename"]) + LOGGER.info(f"Asset path in item: {str(filename)}") + # The SAFE directory is the direct parent of the manifest file, + # and we resolve it relative to the parent directory of the STAC + # item. + safe_dir = item_path.parent / filename.parent + LOGGER.info(f"Resolved SAFE directory path to {safe_dir}") + assert safe_dir.exists(), "SAFE directory does not exist" + assert safe_dir.is_dir(), "SAFE directory is not a directory" + return str(safe_dir) + else: + raise RuntimeError(f"No filename for manifest asset in {catalog}") + else: + LOGGER.info("No manifest asset found; looking for zip asset") + zip_assets = [ + asset + for asset in item.assets.values() + if asset.media_type == "application/zip" + ] + if len(zip_assets) < 1: + raise RuntimeError( + f"No manifest assets or zip assets found in {catalog}" + ) + elif len(zip_assets) > 1: + raise RuntimeError( + f"No manifest assets and multiple zip assets found in " + f"{stac_root}, so it's not clear which zip asset to use." + ) + else: + zip_path = stac_path / zip_assets[0].href + LOGGER.info(f"Found input zip at {zip_path}") + return str(zip_path) + + +def write_stac_for_tiff( + stac_root: str, asset_path: str, scene_id: str, gsd: int +) -> None: + LOGGER.info(f"Writing STAC for asset {asset_path} to {stac_root}") + ds = rasterio.open(asset_path) + bands = bands_data(asset_path, gsd) + asset = pystac.Asset( + roles=["data", "visual"], + href=asset_path, + media_type="image/tiff; application=geotiff; profile=cloud-optimized", + title="OST-processed", + extra_fields={ + "gsd": gsd, + "raster:bands": bands, + }, + ) + bb = ds.bounds + s = scene_id + item = pystac.Item( + id=ITEM_ID, + geometry={ + "type": "Polygon", + "coordinates": [ + [ + [bb.left, bb.bottom], + [bb.left, bb.top], + [bb.right, bb.top], + [bb.right, bb.bottom], + [bb.left, bb.bottom], + ] + ], + }, + bbox=[bb.left, bb.bottom, bb.right, bb.top], + # Datetime is required by the STAC specification and schema, even + # when there is no reasonable value for it to take. In such cases + # it is permitted to set datetime to null, but not to omit it. + datetime=None, + start_datetime=datetime( + *map( + int, + (s[17:21], s[21:23], s[23:25], s[26:28], s[28:30], s[30:32]), + ) + ), + end_datetime=datetime( + *map( + int, + (s[33:37], s[37:39], s[39:41], s[42:44], s[44:46], s[46:48]), + ) + ), + properties={ + # datetime values will be filled in automatically + "title": "Open Sar Toolkit ARD Processing", + "renders": { + "render-tiff": { + "title": "ARD-processed", + "assets": ["TIFF"], + "nodata": "NaN", + "rescale": [ + [b["statistics"]["minimum"], b["statistics"]["maximum"]] for b in bands + ], + "resampling": "nearest", + "colormap_name": "ylorrd", + } + }, + }, + assets={"TIFF": asset}, + stac_extensions=[ + "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + "https://stac-extensions.github.io/render/v2.0.0/schema.json", + ], + ) + catalog = pystac.Catalog( + id="catalog", + description="Root catalog", + href=f"{stac_root}/{CATALOG_FILENAME}", + ) + catalog.add_item(item) + catalog.make_all_asset_hrefs_relative() + catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) + + # We validate after saving, so if validation fails the invalid STAC output + # is available for debugging. + n_validated = catalog.validate_all() + LOGGER.info(f"{n_validated} STAC item(s) successfully validated.") + + +def delete_cwd_contents(): + """Delete everything except the output data and STAC files""" + + cwd = Path.cwd() + for member in cwd.iterdir(): + if member.name not in {CATALOG_FILENAME, ITEM_ID}: + if member.is_dir(): + shutil.rmtree(member) + if member.is_file(): + member.unlink() + + +def save_as_cog( + result_array: np.ndarray, + profile, + outfile_name, + transparency_indexes=None, + dtype=rasterio.uint8, +): + """ + Saves an array as a Cloud-Optimized GeoTIFF (COG) using rasterio. + """ + factors = [2, 4, 8, 16, 32, 64] + + if transparency_indexes is not None: + result_array[transparency_indexes] = 0 + + with rasterio.Env(): + profile.update( + dtype=dtype, + count=result_array.shape[0], + compress="deflate", + tiled=True, + blockxsize=256, + blockysize=256, + driver="GTiff", + BIGTIFF="IF_NEEDED", + nodata=0, + ) + + temp_file = outfile_name.replace(".tif", "_temp.tif") + + try: + with rasterio.open(temp_file, "w", **profile) as dst: + dst.write(result_array.astype(dtype)) # writes all bands + dst.build_overviews(factors, Resampling.nearest) + dst.update_tags(ns="rio_overview", resampling="nearest") + + copy( + temp_file, + outfile_name, + copy_src_overviews=True, + driver="COG", + compress="deflate", + ) + finally: + if os.path.exists(temp_file): + os.remove(temp_file) + + +def bands_data(filename, resolution): + process = subprocess.run( + ["gdalinfo", "-json", "-stats", "-hist", filename], capture_output=True + ) + result = process.stdout + gdal_data = json.loads(result) + stac_data = [band_data(band, resolution) for band in gdal_data["bands"]] + return stac_data + + +def band_data(gdal_band, resolution): + return { + "histogram": gdal_band["histogram"], + "statistics": { + "mean": gdal_band["mean"], + "stddev": gdal_band["stdDev"], + "maximum": gdal_band["maximum"], + "minimum": gdal_band["minimum"], + "valid_percent": float( + gdal_band["metadata"][""]["STATISTICS_VALID_PERCENT"] + ), + }, + "spatial_resolution": resolution, + } + + +if __name__ == "__main__": + sys.exit(run()) diff --git a/ost/generic/common_wrappers.py b/ost/generic/common_wrappers.py index da3e28fe..9cad7fbf 100644 --- a/ost/generic/common_wrappers.py +++ b/ost/generic/common_wrappers.py @@ -7,6 +7,7 @@ from ost.helpers import helpers as h from ost.helpers.settings import GPT_FILE, OST_ROOT from ost.helpers.errors import GPTRuntimeError, NotValidFileError +from multiprocessing import cpu_count logger = logging.getLogger(__name__) @@ -416,3 +417,48 @@ def mt_speckle_filter(in_stack, out_stack, logfile, config_dict): return str(out_stack.with_suffix(".dim")) else: raise NotValidFileError(f"Product did not pass file check: {return_code}") + + +@retry(stop_max_attempt_number=3, wait_fixed=1) +def convert_to_tiff( + input, + output, + logfile, +): + """ + Converts product into TIFF format + :param input: path to a product in BEAM-DIMAP format + :param output: path to the target TIFF file + :param logfile: SNAP logfile + :param config_dict: + :return: + """ + + # get relevant config parameters + cpus = cpu_count() # config_dict["snap_cpu_parallelism"] + + logger.debug("Converting to GeoTIFF") + + command = ( + f"{GPT_FILE} Subset " + f"-t {output} " + f"-f GeoTIFF " + f"-x -q {cpus} " + f"{input}" + ) + return_code = h.run_command(command, logfile) + + if return_code == 0: + logger.debug(f"Successfully created TIFF file {output}") + else: + raise GPTRuntimeError( + f"TIFF conversion of {input} exited with error {return_code}. " + f"See {logfile} for Snap's error message." + ) + + # do check routine + return_msg = h.check_out_tiff(output) + if return_msg == 0: + logger.debug("Product passed validity check.") + else: + raise NotValidFileError(f"Product did not pass file check: {return_msg}") diff --git a/ost/graphs/S1_GRD2ARD/3_LSmap.xml b/ost/graphs/S1_GRD2ARD/3_LSmap.xml index 1ec47f53..284a7e4d 100755 --- a/ost/graphs/S1_GRD2ARD/3_LSmap.xml +++ b/ost/graphs/S1_GRD2ARD/3_LSmap.xml @@ -48,7 +48,6 @@ false false true - false false false false diff --git a/ost/graphs/S1_GRD2ARD/3_ML_TC.xml b/ost/graphs/S1_GRD2ARD/3_ML_TC.xml index 39c21394..2ca9d487 100755 --- a/ost/graphs/S1_GRD2ARD/3_ML_TC.xml +++ b/ost/graphs/S1_GRD2ARD/3_ML_TC.xml @@ -39,7 +39,6 @@ false false true - false false false false diff --git a/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml b/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml index 44bc70d8..e3f29736 100755 --- a/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml +++ b/ost/graphs/S1_GRD2ARD/3_ML_TC_deg.xml @@ -39,7 +39,6 @@ false false true - false false false false diff --git a/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml b/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml index cd711ddc..a0f7f97a 100755 --- a/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml +++ b/ost/graphs/S1_SLC2ARD/S1_SLC_ML_TC.xml @@ -52,7 +52,6 @@ false false true - false false false false diff --git a/ost/helpers/copernicus.py b/ost/helpers/copernicus.py index fb729354..629eae8d 100644 --- a/ost/helpers/copernicus.py +++ b/ost/helpers/copernicus.py @@ -2,11 +2,14 @@ import getpass import logging +import multiprocessing +import urllib from pathlib import Path from datetime import datetime as dt - +import tqdm import requests from shapely.wkt import loads +from ost.helpers import helpers as h logger = logging.getLogger(__name__) @@ -21,6 +24,32 @@ def ask_credentials(): return uname, pword +def connect(uname=None, pword=None, base_url="https://catalogue.dataspace.copernicus.eu"): + """Generates an opener for the Copernicus apihub/dhus + + :param uname: username of Copernicus' CDSE + :type uname: str + :param pword: password of Copernicus' CDSE + :type pword: str + :param base_url: + :return: an urllib opener instance for Copernicus' CDSE + :rtype: opener object + """ + + if not uname: + print(" If you do not have a CDSE user" " account go to: https://browser.dataspace.copernicus.eu") + uname = input(" Your CDSE Username:") + + if not pword: + pword = getpass.getpass(" Your CDSE Password:") + + # create opener + manager = urllib.request.HTTPPasswordMgrWithDefaultRealm() + manager.add_password(None, base_url, uname, pword) + handler = urllib.request.HTTPBasicAuthHandler(manager) + opener = urllib.request.build_opener(handler) + + return opener def get_access_token(username, password: None): @@ -176,3 +205,174 @@ def get_advanced_metadata(metafile, access_token): beginposition, endposition, acqdate, 0 # placeholder for size ) + + +def s1_download(uuid, filename, uname, pword, base_url="https://catalogue.dataspace.copernicus.eu"): + """Single scene download function for CDSE + + :param uuid: product's uuid + :param filename: local path for the download + :param uname: username of CDSE + :param pword: password of CDSE + :param base_url: + + :return: + """ + + # get out the arguments + if isinstance(filename, str): + filename = Path(filename) + + # check if file is partially downloaded + first_byte = filename.stat().st_size if filename.exists() else 0 + + # ask for credentials in case they are not defined as input + if not uname or not pword: + ask_credentials() + + # define url + url = f"{base_url}/odata/v1/Products({uuid})/$value" + + # get first response for file Size + access_token = get_access_token(uname, pword) + # we use some random url for checking (also for czech mirror) + with requests.Session() as session: + headers = {'Authorization': f'Bearer {access_token}', + "Range": f"bytes={first_byte}-"} + request = session.request("get", url) + response = session.get(request.url, headers=headers, stream=True) + + # check response + if response.status_code == 401: + raise ValueError(" ERROR: Username/Password are incorrect.") + elif response.status_code != 200: + print(" ERROR: Something went wrong, will try again in 30 seconds.") + response.raise_for_status() + + # get download size + remaining_length = int(response.headers.get("content-length", 0)) + print(f"{filename.name} {first_byte=} {remaining_length=}") + if remaining_length == 0: + return + + # define chunk_size + chunk_size = 8192 + + # actual download + with open(filename, "ab") as file: + for chunk in response.iter_content(chunk_size): + if chunk: + file.write(chunk) + #pbar.update(len(chunk)) + #print(f"reading {filename.name} {len(chunk)}") + else: + print(f"reading {filename.name} empty chunk") + print(f"{filename.name} downloaded, {filename.stat().st_size=}") + + logger.info(f"Checking zip archive {filename.name} for consistency") + zip_test = h.check_zipfile(filename) + + # if it did not pass the test, remove the file + # in the while loop it will be downloaded again + if zip_test is not None: + logger.info(f"{filename.name} did not pass the zip test. Re-downloading " f"the full scene.") + #filename.unlink() + #first_byte = 0 + raise ValueError(f"zip test failed for {filename.name}") + # otherwise we change the status to downloaded + logger.info(f"{filename.name} passed the zip test.") + with open(filename.with_suffix(".downloaded"), "w") as file: + file.write("successfully downloaded \n") + + +def s1_download_parallel(argument_list): + """Helper function for parallel download from scihub""" + + uuid, filename, uname, pword, base_url = argument_list + s1_download(uuid, filename, uname, pword, base_url) + + +def batch_download( + inventory_df, + download_dir, + uname, + pword, + concurrent=2, + base_url="https://catalogue.dataspace.copernicus.eu", +): + """Batch download Sentinel-1 on the basis of an OST inventory GeoDataFrame + + :param inventory_df: + :param download_dir: + :param uname: + :param pword: + :param concurrent: + :param base_url: + + :return: + """ + from ost import Sentinel1Scene as S1Scene + + if isinstance(download_dir, str): + download_dir = Path(download_dir) + + # create list of scenes + scenes = inventory_df["identifier"].tolist() + + check, i = False, 1 + while check is False and i <= 10: + + download_list = [] + for scene_id in scenes: + scene = S1Scene(scene_id) + filepath = scene.download_path(download_dir, True) + logger.info(f"checking path {filepath}") + if (filepath.parent / (filepath.stem + ".downloaded")).exists(): + logger.debug(f"{scene.scene_id} is already downloaded.") + else: + try: + uuid = inventory_df["uuid"][inventory_df["identifier"] == scene_id].tolist() + except KeyError: + #uuid = [scene.scihub_uuid(connect(uname=uname, pword=pword, base_url=base_url))] + print("cannot find uuid in inventory " + str(inventory_df)) + raise + # create list objects for download + download_list.append([uuid[0], filepath, uname, pword, base_url]) + + if download_list: + pool = multiprocessing.Pool(processes=concurrent) + pool.map(s1_download_parallel, download_list) + + downloaded_scenes = list(download_dir.glob("**/*.downloaded")) + + if len(inventory_df["identifier"].tolist()) == len(downloaded_scenes): + logger.info("All products are downloaded.") + check = True + else: + check = False + for scene in scenes: + + scene = S1Scene(scene) + file_path = scene.download_path(download_dir) + + if file_path.with_suffix(".downloaded").exists(): + scenes.remove(scene.scene_id) + + i += 1 + + +def check_connection(uname, pword, base_url="https://catalogue.dataspace.copernicus.eu"): + """Check if a connection with CDSE can be established + :param uname: + :param pword: + :param base_url: + :return: + """ + access_token = get_access_token(uname, pword) + # we use some random url for checking (also for czech mirror) + url = f"{base_url}/odata/v1/Products(8f30a536-c01c-4ef4-ac74-be3378dc44c4)/$value" + with requests.Session() as session: + headers = {'Authorization': f'Bearer {access_token}'} + request = session.request("head", url) + response = session.get(request.url, headers=headers, stream=True) + return response.status_code diff --git a/ost/helpers/helpers.py b/ost/helpers/helpers.py index cca5618c..e51e0832 100644 --- a/ost/helpers/helpers.py +++ b/ost/helpers/helpers.py @@ -16,6 +16,7 @@ from pathlib import Path from datetime import timedelta from osgeo import gdal +from ost.generic.common_wrappers import convert_to_tiff logger = logging.getLogger(__name__) @@ -102,7 +103,27 @@ def move_dimap(infile_prefix, outfile_prefix, to_tif): if to_tif: - gdal.Warp(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) + #gdal.Warp(outfile_prefix.with_suffix(".tif"), infile_prefix.with_suffix(".dim")) + convert_to_tiff( + infile_prefix.with_suffix(".dim"), + infile_prefix.with_suffix(".uncompressed.tif"), + outfile_prefix.with_suffix(".log"), + ) + # TODO use COG output format instead of writing GTiff and update afterwards + gdal.Translate( + str(outfile_prefix.with_suffix(".tif")), + str(infile_prefix.with_suffix(".uncompressed.tif")), + creationOptions={ + "TILED": "YES", + "BLOCKXSIZE": "512", + "BLOCKYSIZE": "512", + "COMPRESS": "DEFLATE", + } + ) + image = gdal.Open(str(outfile_prefix.with_suffix(".tif")), 1) # 0 = read-only, 1 = read-write. + gdal.SetConfigOption('COMPRESS_OVERVIEW', 'DEFLATE') + image.BuildOverviews('NEAREST', [4, 8, 16, 32, 64, 128], gdal.TermProgress_nocb) + del image else: @@ -180,7 +201,7 @@ def check_out_tiff(file, test_stats=True): if test_stats: # open the file ds = gdal.Open(str(file)) - stats = ds.GetRasterBand(1).GetStatistics(0, 1) + stats = ds.GetRasterBand(1).ComputeStatistics(False) # if difference of min and max is 0 and mean are all 0 if stats[1] - stats[0] == 0 and stats[2] == 0: diff --git a/ost/s1/download.py b/ost/s1/download.py index 9277f434..6d248142 100644 --- a/ost/s1/download.py +++ b/ost/s1/download.py @@ -15,7 +15,7 @@ from ost.s1.s1scene import Sentinel1Scene as S1Scene from ost.helpers import helpers as h -from ost.helpers import scihub, peps, asf, onda # , asf_wget +from ost.helpers import scihub, peps, asf, onda, copernicus # , asf_wget logger = logging.getLogger(__name__) @@ -90,9 +90,10 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un print(" (2) Alaska Satellite Facility (NASA, full archive)") print(" (3) PEPS (CNES, 1 year rolling archive)") print(" (4) ONDA DIAS (ONDA DIAS full archive for SLC -" " or GRD from 30 June 2019)") + print(" (5) CDSE") # print(' (5) Alaska Satellite Facility (using WGET - ' # 'unstable - use only if 2 does not work)') - mirror = input(" Type 1, 2, 3, or 4: ") + mirror = input(" Type 1, 2, 3, 4, or 5: ") if not uname: print(" Please provide username for the selected server") @@ -118,7 +119,10 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un error_code = peps.check_connection(uname, pword) elif int(mirror) == 4: error_code = onda.check_connection(uname, pword) - # elif int(mirror) == 5: + elif int(mirror) == 5: + # we avoid checking the connection, all products may be downloaded already + #error_code = copernicus.check_connection(uname, pword) + error_code = 200 # error_code = asf_wget.check_connection(uname, pword) # hidden option for downloading from czech mirror elif int(mirror) == 321: @@ -128,7 +132,7 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un if error_code == 401: raise ValueError("Username/Password are incorrect") - elif error_code != 200: + elif error_code != 200 and error_code != 301 and error_code != 404: raise ValueError(f"Some connection error. Error code {error_code}.") # download in parallel @@ -140,6 +144,8 @@ def download_sentinel1(inventory_df, download_dir, mirror=None, concurrent=2, un peps.batch_download(inventory_df, download_dir, uname, pword, concurrent) elif int(mirror) == 4: # ONDA DIAS onda.batch_download(inventory_df, download_dir, uname, pword, concurrent) + elif int(mirror) == 5: # CDSE + copernicus.batch_download(inventory_df, download_dir, uname, pword, concurrent) if int(mirror) == 321: # scihub czech mirror scihub.batch_download( inventory_df, diff --git a/ost/s1/grd_to_ard.py b/ost/s1/grd_to_ard.py index 8dfe5d76..241a0c96 100644 --- a/ost/s1/grd_to_ard.py +++ b/ost/s1/grd_to_ard.py @@ -465,7 +465,7 @@ def ard_to_rgb(infile, outfile, driver="GTiff", to_db=True, shrink_factor=1): ratio_array = ras.scale_to_int(ratio_array, 1, 15, "uint8") meta.update(dtype="uint8") - with rasterio.open(outfile, "w", **meta) as dst: + with rasterio.open(outfile, "w", tiled=True, **meta) as dst: # write file for k, arr in [(1, co_array), (2, cr_array), (3, ratio_array)]: @@ -504,5 +504,5 @@ def ard_to_rgb(infile, outfile, driver="GTiff", to_db=True, shrink_factor=1): co_array = ras.scale_to_int(co_array, -20, 0, "uint8") meta.update(dtype="uint8") - with rasterio.open(outfile, "w", **meta) as dst: + with rasterio.open(outfile, "w", tiled=True, **meta) as dst: dst.write(co_array) diff --git a/ost/s1/s1scene.py b/ost/s1/s1scene.py index dd63227f..c9717834 100644 --- a/ost/s1/s1scene.py +++ b/ost/s1/s1scene.py @@ -37,7 +37,7 @@ import pandas as pd import geopandas as gpd -from ost.helpers import scihub, peps, onda, asf, raster as ras, helpers as h +from ost.helpers import scihub, peps, onda, asf, raster as ras, helpers as h, copernicus from ost.helpers.settings import APIHUB_BASEURL, OST_ROOT from ost.helpers.settings import set_log_level, check_ard_parameters from ost.s1.grd_to_ard import grd_to_ard, ard_to_rgb @@ -244,7 +244,7 @@ def info_dict(self): return inf_dict - def download(self, download_dir, mirror=None): + def download(self, download_dir, mirror=None, uname=None, pword=None): if not mirror: logger.info("One or more of your scenes need to be downloaded.") @@ -253,9 +253,10 @@ def download(self, download_dir, mirror=None): print(" (2) Alaska Satellite Facility (NASA, full archive)") print(" (3) PEPS (CNES, 1 year rolling archive)") print(" (4) ONDA DIAS (ONDA DIAS full archive for" " SLC - or GRD from 30 June 2019)") + print(" (5) CDSE") # print(' (5) Alaska Satellite Facility (using WGET' # ' - unstable - use only if 2 fails)') - mirror = input(" Type 1, 2, 3, or 4: ") + mirror = input(" Type 1, 2, 3, 4, or 5: ") from ost.s1 import download @@ -263,16 +264,19 @@ def download(self, download_dir, mirror=None): download_dir = Path(download_dir) if mirror == "1": - uname, pword = scihub.ask_credentials() + if uname is None or pword is None: + uname, pword = scihub.ask_credentials() opener = scihub.connect(uname=uname, pword=pword) df = pd.DataFrame({"identifier": [self.scene_id], "uuid": [self.scihub_uuid(opener)]}) elif mirror == "2": - uname, pword = asf.ask_credentials() + if uname is None or pword is None: + uname, pword = asf.ask_credentials() df = pd.DataFrame({"identifier": [self.scene_id]}) elif mirror == "3": - uname, pword = peps.ask_credentials() + if uname is None or pword is None: + uname, pword = peps.ask_credentials() df = pd.DataFrame( { "identifier": [self.scene_id], @@ -280,9 +284,16 @@ def download(self, download_dir, mirror=None): } ) elif mirror == "4": - uname, pword = onda.ask_credentials() + if uname is None or pword is None: + uname, pword = onda.ask_credentials() opener = onda.connect(uname=uname, pword=pword) df = pd.DataFrame({"identifier": [self.scene_id], "uuid": [self.ondadias_uuid(opener)]}) + elif mirror == "5": + if uname is None or pword is None: + uname, pword = copernicus.ask_credentials() + opener = copernicus.connect(uname=uname, pword=pword) + df = pd.DataFrame({"identifier": [self.scene_id], "uuid": [self.copernicus_uuid(opener)]}) + else: raise ValueError("You entered the wrong mirror.") # else: # ASF @@ -645,6 +656,39 @@ def safe_annotation_get(self, download_dir, data_mount=None): return gdf_final.drop_duplicates(["AnxTime"], keep="first") + def copernicus_uuid(self, opener): + logger.info("Getting Copernicus UUID") + + # construct the basic the url + base_url = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=" + + # request + action = urllib.parse.quote(f"Name eq '{self.scene_id}.SAFE'") + + # construct the download url + url = base_url + action + + logger.info("Fetching " + url) + + try: + # get the request + req = opener.open(url) + except URLError as error: + if hasattr(error, "reason"): + logger.info(f"{CONNECTION_ERROR}{error.reason}") + sys.exit() + elif hasattr(error, "code"): + logger.info(f"{CONNECTION_ERROR_2}{error.reason}") + sys.exit() + else: + # write the request to to the response variable + # (i.e. the xml coming back from scihub) + response = req.read().decode("utf-8") + + # return uuid from response + # "Id":"1b64f9bb-2e8e-58ec-abac-45f4f5b61d22","Name":"S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.SAFE" + return response.split('"Id":"')[1].split('","Name":')[0] + # onda dias uuid extractor def ondadias_uuid(self, opener): diff --git a/pyproject.toml b/pyproject.toml index 95d059ee..a305cc36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "numpy", "pandas", "psycopg2-binary", + "pystac", "rasterio", "requests", "scipy", diff --git a/requirements.txt b/requirements.txt old mode 100755 new mode 100644 index 50362e8c..ff2bb2d9 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,24 @@ -descartes -fiona -gdal>=2 -godale -pyproj>=2.1 -geopandas>=0.8 -jupyterlab -matplotlib -numpy -pandas -psycopg2-binary -rasterio -requests -scipy -shapely -tqdm -imageio -rtree -retrying -pytest +click>=8.0 +descartes==1.1.0 +fiona==1.9.6 +gdal==3.0.4 +godale==0.3 +pyproj==3.5.0 +geopandas==0.13.2 +jupyterlab==4.2.3 +matplotlib==3.7.5 +numpy==1.24.4 +pandas==1.5.3 +psycopg2-binary==2.9.9 +pystac +rasterio==1.3.10 +requests==2.32.3 +scipy==1.10.1 +shapely==2.0.5 +tqdm==4.66.4 +imageio==2.34.2 +rtree==1.3.0 +retrying==1.3.3 +pytest==8.2.2 pytest-cov -pytest-runner \ No newline at end of file +pytest-runner diff --git a/resources/Dockerfile b/resources/Dockerfile new file mode 100644 index 00000000..4a1a4cfa --- /dev/null +++ b/resources/Dockerfile @@ -0,0 +1,93 @@ +FROM ubuntu:20.04 + +LABEL maintainer="Andreas Vollrath, FAO" +LABEL OpenSARToolkit='0.12.3' + +ENV HOME=/home/ost + +# set work directory to home and download snap +WORKDIR /home/ost + +# copy the snap installation config file into the container +COPY snap.varfile $HOME + +# update variables +ENV OTB_VERSION="7.3.0" \ + TBX_VERSION="8" \ + TBX_SUBVERSION="0" +ENV TBX="esa-snap_sentinel_unix_${TBX_VERSION}_${TBX_SUBVERSION}.sh" \ + SNAP_URL="http://step.esa.int/downloads/${TBX_VERSION}.${TBX_SUBVERSION}/installers" \ + OTB=OTB-${OTB_VERSION}-Linux64.run \ + HOME=/home/ost \ + PATH=$PATH:/home/ost/programs/snap/bin:/home/ost/programs/OTB-${OTB_VERSION}-Linux64/bin + +RUN apt-get update && apt-get install -yq wget libquadmath0 sudo + +RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i gcc-6-base_6.4.0-17ubuntu1_amd64.deb && \ + wget http://archive.ubuntu.com/ubuntu/pool/universe/g/gcc-6/libgfortran3_6.4.0-17ubuntu1_amd64.deb && \ + dpkg -i libgfortran3_6.4.0-17ubuntu1_amd64.deb + +# install all dependencies +RUN groupadd -r ost && \ + useradd -r -g ost ost && \ + apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq \ + python3 \ + python3-pip \ + git \ + libgdal-dev \ + python3-gdal \ + libspatialindex-dev \ + wget \ + unzip \ + imagemagick \ + nodejs \ + npm + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq jq + +# Install OTB. Use some custom headers when fetching package, since otherwise +# the download speed is heavily throttled. +RUN alias python=python3 && \ + rm -rf /var/lib/apt/lists/* && \ + python3 -m pip install jupyterlab && \ + mkdir /home/ost/programs && \ + wget $SNAP_URL/$TBX && \ + chmod +x $TBX && \ + ./$TBX -q -varfile snap.varfile && \ + rm $TBX && \ + rm snap.varfile && \ + cd /home/ost/programs && \ + wget https://www.orfeo-toolbox.org/packages/archives/OTB/${OTB} \ + --progress=dot:giga \ + --referer="https://www.orfeo-toolbox.org/packages/archives/OTB/" \ + --user-agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0" && \ + chmod +x $OTB && \ + ./${OTB} && \ + rm -f OTB-${OTB_VERSION}-Linux64.run + +# update snap to latest version +RUN /home/ost/programs/snap/bin/snap --nosplash --nogui --modules --update-all 2>&1 | while read -r line; do \ + echo "$line" && \ + [ "$line" = "updates=0" ] && sleep 2 && pkill -TERM -f "snap/jre/bin/java"; \ + done; exit 0 + +# set usable memory to 12G +RUN echo "-Xmx12G" > /home/ost/programs/snap/bin/gpt.vmoptions + +COPY constraints.txt $HOME + +ARG ost_branch=version8 + +# Invalidate Docker cache if there have been new commits to the repository +ADD "https://api.github.com/repos/bcdev/OpenSarToolkit/commits?sha=${ost_branch}&per_page=1" last_commit + +# Install OST and tutorials +RUN python3 -m pip install git+https://github.com/bcdev/OpenSarToolkit.git@${ost_branch} -c constraints.txt + +#RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager +#RUN jupyter nbextension enable --py widgetsnbextension +#RUN pip install widgetsnbextension + +#EXPOSE 8888 +#CMD jupyter lab --ip='0.0.0.0' --port=8888 --no-browser --allow-root diff --git a/resources/constraints.txt b/resources/constraints.txt new file mode 100644 index 00000000..cfe3d577 --- /dev/null +++ b/resources/constraints.txt @@ -0,0 +1,23 @@ +click>=8.0 +descartes==1.1.0 +fiona==1.9.6 +gdal==3.0.4 +godale==0.3 +pyproj==3.5.0 +geopandas==0.13.2 +#jupyterlab==4.2.3 +matplotlib==3.7.5 +numpy==1.24.4 +pandas==1.5.3 +psycopg2-binary==2.9.9 +rasterio==1.3.10 +requests==2.32.3 +scipy==1.10.1 +shapely==2.0.5 +tqdm==4.66.4 +imageio==2.34.2 +rtree==1.3.0 +retrying==1.3.3 +pytest==8.2.2 +pytest-cov +pytest-runner diff --git a/resources/opensar.cwl b/resources/opensar.cwl new file mode 100644 index 00000000..bc69c714 --- /dev/null +++ b/resources/opensar.cwl @@ -0,0 +1,129 @@ +cwlVersion: v1.2 +$namespaces: + s: https://schema.org/ +s:softwareVersion: 1.0.0 +schemas: + - http://schema.org/version/9.0/schemaorg-current-http.rdf +$graph: + - class: Workflow + label: OST Notebook 1 + doc: Preprocessing an S1 image with OST + id: opensartoolkit + requirements: [] + inputs: + input: + type: Directory + label: Input S1 GRD + loadListing: no_listing + resolution: + type: int + label: Resolution + doc: Resolution in metres + ard-type: + type: + type: enum + symbols: + - OST_GTC + - OST-RTC + - CEOS + - Earth-Engine + label: ARD type + doc: Type of analysis-ready data to produce + with-speckle-filter: + type: boolean + label: Speckle filter + doc: Whether to apply a speckle filter + resampling-method: + type: + type: enum + symbols: + - BILINEAR_INTERPOLATION + - BICUBIC_INTERPOLATION + label: Resampling method + doc: Resampling method to use + dry-run: + type: boolean + label: Dry run + doc: Skip processing and write a placeholder output file instead + + outputs: + - id: stac_catalog + outputSource: + - run_script/ost_ard + type: Directory + + steps: + run_script: + run: "#ost_script_1" + in: + input: input + resolution: resolution + ard-type: ard-type + with-speckle-filter: with-speckle-filter + resampling-method: resampling-method + dry-run: dry-run + out: + - ost_ard + + - class: CommandLineTool + id: ost_script_1 + requirements: + DockerRequirement: + dockerPull: quay.io/bcdev/opensartoolkit:version8 + NetworkAccess: + networkAccess: true + + baseCommand: + - python3 + - /usr/local/lib/python3.8/dist-packages/ost/app/preprocessing.py + arguments: + - --wipe-cwd + inputs: + input: + type: Directory + inputBinding: + position: 1 + resolution: + type: int + inputBinding: + prefix: --resolution + ard-type: + type: + type: enum + symbols: + - OST_GTC + - OST-RTC + - CEOS + - Earth-Engine + inputBinding: + prefix: --ard-type + with-speckle-filter: + type: boolean + inputBinding: + prefix: --with-speckle-filter + resampling-method: + type: + type: enum + symbols: + - BILINEAR_INTERPOLATION + - BICUBIC_INTERPOLATION + inputBinding: + prefix: --resampling-method + cdse-user: + type: string? + inputBinding: + prefix: --cdse-user + cdse-password: + type: string? + inputBinding: + prefix: --cdse-password + dry-run: + type: boolean + inputBinding: + prefix: --dry-run + + outputs: + ost_ard: + outputBinding: + glob: . + type: Directory diff --git a/resources/snap.varfile b/resources/snap.varfile new file mode 100644 index 00000000..fd6688cc --- /dev/null +++ b/resources/snap.varfile @@ -0,0 +1,17 @@ +# install4j response file for ESA SNAP 8.0 +# headless S1TBX +deleteAllSnapEngineDir$Boolean=false +deleteOnlySnapDesktopDir$Boolean=true +executeLauncherWithPythonAction$Boolean=false +forcePython$Boolean=false +pythonExecutable=/usr/bin/python +sys.adminRights$Boolean=true +sys.component.RSTB$Boolean=true +sys.component.S1TBX$Boolean=true +sys.component.S2TBX$Boolean=false +sys.component.S3TBX$Boolean=false +sys.component.SNAP$Boolean=false +sys.installationDir=/home/ost/programs/snap +sys.languageId=en +sys.programGroupDisabled$Boolean=false +sys.symlinkDir=/usr/local/bin diff --git a/tests/resources/input_dir/S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json b/tests/resources/input_dir/S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json new file mode 100644 index 00000000..d0b78ea5 --- /dev/null +++ b/tests/resources/input_dir/S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json @@ -0,0 +1,376 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/processing/v1.0.0/schema.json", + "https://stac-extensions.github.io/projection/v1.0.0/schema.json", + "https://stac-extensions.github.io/sar/v1.0.0/schema.json", + "https://stac-extensions.github.io/sat/v1.0.0/schema.json" + ], + "type": "Feature", + "id": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 10.710614, + 42.433846 + ], + [ + 11.100653, + 40.9328 + ], + [ + 14.189954, + 41.339962 + ], + [ + 13.873197, + 42.840092 + ], + [ + 10.710614, + 42.433846 + ] + ] + ] + }, + "properties": { + "datetime": "2024-11-13T17:06:07.293807Z", + "start_datetime": "2024-11-13T17:06:07.293807Z", + "end_datetime": "2024-11-13T17:06:32.292309Z", + "created": "2024-11-13T17:44:24Z", + "updated": "2024-12-06T11:59:58.0279394Z", + "platform": "sentinel-1a", + "constellation": "sentinel-1", + "mission": "sentinel-1", + "instruments": [ + "c-sar" + ], + "sensor_type": "radar", + "gsd": 22.0, + "sat:orbit_state": "ascending", + "sat:anx_datetime": "2024-11-13T16:55:00.601119Z", + "sat:absolute_orbit": 56539, + "sat:relative_orbit": 117, + "sat:platform_international_designator": "2014-016A", + "processing:level": "L1", + "processing:lineage": "GRD Post Processing", + "processing:facility": "Copernicus Ground Segment", + "processing:software": { + "Sentinel-1 IPF": "003.80" + }, + "proj:epsg": null, + "title": "SENTINEL-1A GRD VV/VH 117 2024-11-13 17:06:07", + "sar:instrument_mode": "IW", + "sar:frequency_band": "C", + "sar:polarizations": [ + "VV", + "VH" + ], + "sar:product_type": "GRD", + "providers": [ + { + "name": "ESA/EC (Copernicus)", + "description": "The Sentinel-1 mission comprises a constellation of two polar-orbiting satellites, operating day and night performing C-band synthetic aperture radar imaging, enabling them to acquire imagery regardless of the weather.", + "roles": [ + "producer", + "processor", + "licensor" + ], + "url": "https://sentinel.esa.int/web/sentinel/missions/sentinel-1" + } + ] + }, + "bbox": [ + 10.710614, + 40.9328, + 14.189954, + 42.840092 + ], + "assets": { + "amplitude-vh-iw-002": { + "type": "image/x.geotiff", + "roles": [ + "amplitude", + "data" + ], + "title": "IW VH Amplitude pixel values", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.tiff", + "file:size": 878507832, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.tiff", + "sar:polarizations": [ + "VH" + ], + "proj:epsg": null, + "proj:shape": [ + 26329, + 16678 + ] + }, + "annotation-vh-iw-002": { + "type": "text/xml", + "roles": [ + "metadata" + ], + "title": "Annotation VH IW 002", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "file:size": 1842824, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "sar:polarizations": [ + "VH" + ] + }, + "amplitude-vv-iw-001": { + "type": "image/x.geotiff", + "roles": [ + "amplitude", + "data" + ], + "title": "IW VV Amplitude pixel values", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.tiff", + "file:size": 878507832, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/measurement/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.tiff", + "sar:polarizations": [ + "VV" + ], + "proj:epsg": null, + "proj:shape": [ + 26329, + 16678 + ] + }, + "annotation-vv-iw-001": { + "type": "text/xml", + "roles": [ + "metadata" + ], + "title": "Annotation VV IW 001", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "file:size": 1842846, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "sar:polarizations": [ + "VV" + ] + }, + "calibration-vh-iw-002": { + "type": "text/xml", + "roles": [ + "calibration", + "data" + ], + "title": "Calibration VH IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "file:size": 1035193, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "sar:polarizations": [ + "VH" + ] + }, + "calibration-vv-iw-001": { + "type": "text/xml", + "roles": [ + "calibration", + "data" + ], + "title": "Calibration VV IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "file:size": 1035193, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/calibration-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "sar:polarizations": [ + "VV" + ] + }, + "noise-vh-iw-002": { + "type": "text/xml", + "roles": [ + "data", + "noise" + ], + "title": "Noise VH IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "file:size": 432157, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vh-20241113t170607-20241113t170632-056539-06eea8-002.xml", + "sar:polarizations": [ + "VH" + ] + }, + "noise-vv-iw-001": { + "type": "text/xml", + "roles": [ + "data", + "noise" + ], + "title": "Noise VV IW", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "file:size": 432157, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/annotation/calibration/noise-s1a-iw-grd-vv-20241113t170607-20241113t170632-056539-06eea8-001.xml", + "sar:polarizations": [ + "VV" + ] + }, + "support-s1-level-1-calibration": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-calibration", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-calibration.xsd", + "file:size": 6427, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-calibration.xsd" + }, + "support-s1-level-1-measurement": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-measurement", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-measurement.xsd", + "file:size": 471, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-measurement.xsd" + }, + "support-s1-level-1-noise": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-noise", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-noise.xsd", + "file:size": 7290, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-noise.xsd" + }, + "support-s1-level-1-product": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-product", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-product.xsd", + "file:size": 149999, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-product.xsd" + }, + "support-s1-level-1-quicklook": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-quicklook", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-quicklook.xsd", + "file:size": 469, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-quicklook.xsd" + }, + "support-s1-level-1-rfi": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-level-1-rfi", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-rfi.xsd", + "file:size": 16595, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-level-1-rfi.xsd" + }, + "support-s1-map-overlay": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-map-overlay", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-map-overlay.xsd", + "file:size": 450, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-map-overlay.xsd" + }, + "support-s1-object-types": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-object-types", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-object-types.xsd", + "file:size": 62179, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-object-types.xsd" + }, + "support-s1-product-preview": { + "type": "text/xml", + "roles": [ + "metadata", + "support" + ], + "title": "Support file support-s1-product-preview", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-product-preview.xsd", + "file:size": 440, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/support/s1-product-preview.xsd" + }, + "preview-logo": { + "type": "image/png", + "roles": [ + "data", + "logo" + ], + "title": "Preview file preview-logo", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/icons/logo.png", + "file:size": 95280, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/icons/logo.png" + }, + "preview-map-overlay": { + "type": "application/vnd.google-earth.kml+xml", + "roles": [ + "data", + "kml" + ], + "title": "Preview file preview-map-overlay", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/map-overlay.kml", + "file:size": 1018, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/map-overlay.kml" + }, + "preview-product-preview": { + "type": "application/octet-stream", + "roles": [ + "data" + ], + "title": "Preview file preview-product-preview", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/product-preview.html", + "file:size": 3673, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/product-preview.html" + }, + "preview-quick-look": { + "type": "image/png", + "roles": [ + "data", + "thumbnail" + ], + "title": "Preview file preview-quick-look", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/quick-look.png", + "file:size": 301151, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/quick-look.png" + }, + "preview-thumbnail": { + "type": "application/octet-stream", + "roles": [ + "data" + ], + "title": "Preview file preview-thumbnail", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/thumbnail.png", + "file:size": 100892, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/preview/thumbnail.png" + }, + "manifest": { + "type": "text/xml", + "roles": [ + "metadata" + ], + "title": "SAFE Manifest", + "href": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/manifest.safe", + "file:size": 24532, + "filename": "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE/manifest.safe" + } + }, + "links": [] +} \ No newline at end of file diff --git a/tests/resources/input_dir/catalog.json b/tests/resources/input_dir/catalog.json new file mode 100644 index 00000000..611b8637 --- /dev/null +++ b/tests/resources/input_dir/catalog.json @@ -0,0 +1,13 @@ +{ + "stac_version": "1.1.0", + "id": "catalog", + "type": "Catalog", + "description": "Root catalog", + "links": [ + { + "type": "application/json", + "rel": "item", + "href": "./S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_056539_06EEA8_B145.SAFE.json" + } + ] +} diff --git a/tests/resources/input_zip/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json b/tests/resources/input_zip/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json new file mode 100644 index 00000000..299f090f --- /dev/null +++ b/tests/resources/input_zip/S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json @@ -0,0 +1,24 @@ +{ + "stac_version": "1.1.0", + "type": "Feature", + "id": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB", + "geometry": null, + "properties": { + "datetime": "2022-10-04T16:43:16Z", + "platform": "sentinel-1a", + "constellation": "sentinel-1" + }, + "assets": { + "GRD": { + "type": "application/zip", + "roles": [ "data" ], + "href": "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.zip" + } + }, + "links": [ + { + "rel": "parent", + "href": "../catalog.json" + } + ] +} diff --git a/tests/resources/input_zip/catalog.json b/tests/resources/input_zip/catalog.json new file mode 100644 index 00000000..bfae5594 --- /dev/null +++ b/tests/resources/input_zip/catalog.json @@ -0,0 +1,13 @@ +{ + "stac_version": "1.1.0", + "id": "catalog", + "type": "Catalog", + "description": "Root catalog", + "links": [ + { + "type": "application/json", + "rel": "item", + "href": "./S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_045295_056A44_13CB.json" + } + ] +} diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 00000000..b9ab5675 --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,23 @@ +import os +from pathlib import Path + +# OST insists on knowing the path to gpt, but we don't need it for these tests. +os.environ["GPT_PATH"] = os.environ.get("GPT_PATH", "dummy") + +from ost.app import preprocessing + + +def test_get_input_path_from_stac_zip(): + cat_path = Path(__file__).parent / "resources" / "input_zip" + assert preprocessing.get_input_path_from_stac(str(cat_path)) == \ + str(cat_path / + "S1A_IW_GRDH_1SDV_20221004T164316_20221004T164341_" + "045295_056A44_13CB.zip") + + +def test_get_input_path_from_stac_dir(): + cat_path = Path(__file__).parent / "resources" / "input_dir" + assert preprocessing.get_input_path_from_stac(str(cat_path)) == \ + str(cat_path / + "S1A_IW_GRDH_1SDV_20241113T170607_20241113T170632_" + "056539_06EEA8_B145.SAFE")