From 08730bf794a42eda33fa68e87da5b07a37c4280d Mon Sep 17 00:00:00 2001 From: Dmytro Lukash Date: Thu, 19 Jan 2023 15:40:17 +0200 Subject: [PATCH 1/4] 21618/added overlap_with_geometry method to use in components --- sentinel2download/overlap.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sentinel2download/overlap.py b/sentinel2download/overlap.py index 5fb74ec..3882fe6 100644 --- a/sentinel2download/overlap.py +++ b/sentinel2download/overlap.py @@ -120,3 +120,33 @@ def _zone_number(lat, lon): return 32600 + zone else: return 32700 + zone + + def overlap_with_geometry(self, *, limit: float = 0.001) -> Optional[gp.GeoDataFrame]: + """ + Find unique tiles that intersects given aoi, area. + The same as overlap, but with geometry. + :param limit: float, min intersection area in km2 + :return: GeoDataFrame: Tile names and it's geometry in epsg:4326 + """ + + logger.info(f"Start finding overlapping tiles") + + grid, epsg = self._intersect(limit) + + aoi = self.aoi + overlap_tiles = list() + for row in grid.itertuples(): + start_area = aoi.geometry[0].area + aoi.geometry[0] = aoi.geometry[0].difference(row.geometry) + if start_area != aoi.geometry[0].area: + overlap_tiles.append(dict(Name=row.Name, geometry=row.geometry)) + + if not overlap_tiles: + return + + tiles = gp.GeoDataFrame(overlap_tiles, crs=epsg) + tiles = tiles.to_crs(self.crs) + + logger.info(f"Found {len(tiles)} tiles: {', '.join(sorted(tiles.Name))}") + return tiles + \ No newline at end of file From bca7ff06c72411a4ada3886131f1b030cbd94e0b Mon Sep 17 00:00:00 2001 From: Dmytro Lukash Date: Thu, 19 Jan 2023 15:55:09 +0200 Subject: [PATCH 2/4] 21618/method annotation small fix --- sentinel2download/overlap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentinel2download/overlap.py b/sentinel2download/overlap.py index 3882fe6..f851098 100644 --- a/sentinel2download/overlap.py +++ b/sentinel2download/overlap.py @@ -126,7 +126,7 @@ def overlap_with_geometry(self, *, limit: float = 0.001) -> Optional[gp.GeoDataF Find unique tiles that intersects given aoi, area. The same as overlap, but with geometry. :param limit: float, min intersection area in km2 - :return: GeoDataFrame: Tile names and it's geometry in epsg:4326 + :return: GeoDataFrame: Tile names (Name column) and it's geometry in epsg:4326 """ logger.info(f"Start finding overlapping tiles") From 05b47a549c6c2a7bed3372c6ca581b2791c2d727 Mon Sep 17 00:00:00 2001 From: "i.chepets" Date: Tue, 4 Jul 2023 11:47:00 +0300 Subject: [PATCH 3/4] Added possibility to retrieve the latest available image --- sentinel2download/downloader.py | 36 +++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/sentinel2download/downloader.py b/sentinel2download/downloader.py index ed44e5f..6cdd300 100644 --- a/sentinel2download/downloader.py +++ b/sentinel2download/downloader.py @@ -45,14 +45,17 @@ def __init__(self, api_key: str, verbose: bool = False): self.bucket = self.client.get_bucket('gcp-public-data-sentinel-2') self.metadata_suffix = 'MTD_TL.xml' - def _filter_by_dates(self, safe_prefixes) -> List[str]: + @staticmethod + def _prefix_to_date(prefix, date_pattern=r"_(\d+)T\d+_", date_format='%Y%m%d') -> datetime: # acquired date: 20200812T113607 - date_pattern = r"_(\d+)T\d+_" + search = re.search(date_pattern, prefix) + date = search.group(1) + return datetime.strptime(date, date_format) + + def _filter_by_dates(self, safe_prefixes) -> List[str]: filtered = list() for safe_prefix in safe_prefixes: - search = re.search(date_pattern, safe_prefix) - date = search.group(1) - date = datetime.strptime(date, '%Y%m%d') + date = self._prefix_to_date(safe_prefix) if date in self.date_range: filtered.append(safe_prefix) return filtered @@ -161,9 +164,22 @@ def _get_blobs_to_load(self, prefixes): return blobs_to_load + def _get_latest_available_date_prefix(self, safe_prefixes) -> List[str]: + prefixes_date_descend = sorted(safe_prefixes, reverse=True) + if self.full_download: + return prefixes_date_descend[:1] + for prefix in prefixes_date_descend: + blobs_to_load = self._get_blobs_to_load([prefix]) + if blobs_to_load: + return [prefix] + return [] + def _get_filtered_prefixes(self, tile_prefix) -> List[str]: # filter store items by base prefix, ex: tiles/36/U/YA/ safe_prefixes = self._get_safe_prefixes(tile_prefix) + if self.latest_date: + # get latest available image .SAFE path + return self._get_latest_available_date_prefix(safe_prefixes) # filter .SAFE paths by date range filtered_prefixes = self._filter_by_dates(safe_prefixes) return filtered_prefixes @@ -202,7 +218,7 @@ def _download_blobs_mult(self, blobs) -> List[Tuple[str, str]]: return results def _setup(self, product_type, tiles, start_date, end_date, bands, - constraints, output_dir, cores, full_download): + constraints, output_dir, cores, full_download, latest_date): if product_type not in PRODUCT_TYPE: raise ValueError(f"Provide proper Sentinel2 type: {PRODUCT_TYPE}") self.product_type = product_type @@ -234,6 +250,7 @@ def _setup(self, product_type, tiles, start_date, end_date, bands, self.output_dir = output_dir self.cores = cores self.full_download = full_download + self.latest_date = latest_date def download(self, product_type: str, @@ -245,7 +262,8 @@ def download(self, constraints: dict = CONSTRAINTS, output_dir: str = './sentinel2imagery', cores: int = 5, - full_download: bool = False) -> Optional[List]: + full_download: bool = False, + latest_date: bool = False) -> Optional[List]: """ :param product_type: str, "L2A" or "L1C" Sentinel2 products :param tiles: list, tiles to load (ex: {36UYA, 36UYB}) @@ -258,11 +276,13 @@ def download(self, :param output_dir: str, path to loading dir, default: './sentinel2imagery' :param cores: int, number of cores, default: 5 :param full_download: bool, option for full download of Sentinel-2 .SAFE folder, default: False + :param latest_date: bool, option for retrieving last available image, default: False :return: [tuple, None], tuples (save_path, blob_name), if save_path is None, the blob not loaded or None if nothing to load """ - self._setup(product_type, tiles, start_date, end_date, bands, constraints, output_dir, cores, full_download) + self._setup(product_type, tiles, start_date, end_date, bands, constraints, output_dir, cores, full_download, + latest_date) logger.info("Start downloading...") start_time = time.time() From 94e012c8135135838df4aa611022ce47606835ba Mon Sep 17 00:00:00 2001 From: "i.chepets" Date: Tue, 4 Jul 2023 12:51:06 +0300 Subject: [PATCH 4/4] Updated sorting function for prefixes --- sentinel2download/downloader.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sentinel2download/downloader.py b/sentinel2download/downloader.py index 6cdd300..bd1594a 100644 --- a/sentinel2download/downloader.py +++ b/sentinel2download/downloader.py @@ -23,6 +23,7 @@ CONSTRAINTS = MappingProxyType({'CLOUDY_PIXEL_PERCENTAGE': 100.0, 'NODATA_PIXEL_PERCENTAGE': 100.0, }) FOLDER_SUFFIX = "_$folder$" +DATE_PATTERN = r"_(\d+)T\d+_" class Sentinel2Downloader: @@ -46,7 +47,7 @@ def __init__(self, api_key: str, verbose: bool = False): self.metadata_suffix = 'MTD_TL.xml' @staticmethod - def _prefix_to_date(prefix, date_pattern=r"_(\d+)T\d+_", date_format='%Y%m%d') -> datetime: + def _prefix_to_date(prefix, date_pattern=DATE_PATTERN, date_format='%Y%m%d') -> datetime: # acquired date: 20200812T113607 search = re.search(date_pattern, prefix) date = search.group(1) @@ -164,8 +165,15 @@ def _get_blobs_to_load(self, prefixes): return blobs_to_load + @staticmethod + def extract_date(s): + match = re.search(DATE_PATTERN, s) + if match: + return match.group(1) + return '' + def _get_latest_available_date_prefix(self, safe_prefixes) -> List[str]: - prefixes_date_descend = sorted(safe_prefixes, reverse=True) + prefixes_date_descend = sorted(safe_prefixes, key=lambda s: self.extract_date(s), reverse=True) if self.full_download: return prefixes_date_descend[:1] for prefix in prefixes_date_descend: