diff --git a/.gitignore b/.gitignore index 5a9235d..d39508c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,11 @@ +test_status.json + \.idea/ \.vscode clearcut_detection_backend/clearcuts/migrations/ clearcut_detection_backend/model/predicted/ logs.txt -data/ +clearcut_detection_backend/data/ logs/ venv/ _install/ @@ -80,4 +82,4 @@ credentials.json # External files landcover.zip docker-compose-django-debug.yml -dyman_settings.py \ No newline at end of file +dyman_settings.py diff --git a/clearcut_detection_backend/docker-compose-test.yml b/clearcut_detection_backend/docker-compose-test.yml new file mode 100644 index 0000000..3b8a73e --- /dev/null +++ b/clearcut_detection_backend/docker-compose-test.yml @@ -0,0 +1,31 @@ +version: '3' +services: + model: + build: + context: ./model + dockerfile: model.Dockerfile + image: clearcut_detection/model + env_file: + - ./model/model.env + volumes: + - ./model/:/model + - ./data/:/model/data + working_dir: /model + environment: + - CUDA_VISIBLE_DEVICES=0 + ports: + - '5000:5000' + command: /bin/bash -c "python3 app.py" + + test: + build: + context: ./ + dockerfile: test.Dockerfile + image: clearcut_detection/test + volumes: + - ./:/code + working_dir: /code + command: /bin/bash -c "pip install -r ./test/requirements.txt && python test.py" + +volumes: + data: diff --git a/clearcut_detection_backend/model/model.Dockerfile b/clearcut_detection_backend/model/model.Dockerfile index 3e6aeb3..f1f7f96 100644 --- a/clearcut_detection_backend/model/model.Dockerfile +++ b/clearcut_detection_backend/model/model.Dockerfile @@ -1,11 +1,13 @@ -FROM python:3.6 +FROM nvidia/cuda:10.0-cudnn7-runtime-ubuntu18.04 + +RUN apt-get update && apt-get install -y python3-pip RUN mkdir /model WORKDIR /model -ADD requirements.txt /model +COPY requirements.txt /model -RUN pip install -r requirements.txt +RUN pip3 install -r requirements.txt -ADD . /model/ +COPY . /model/ diff --git a/clearcut_detection_backend/model/predict_raster.py b/clearcut_detection_backend/model/predict_raster.py index d5c162b..bc7a8e8 100644 --- a/clearcut_detection_backend/model/predict_raster.py +++ b/clearcut_detection_backend/model/predict_raster.py @@ -31,6 +31,8 @@ import warnings warnings.filterwarnings('ignore') +os.environ.get('CUDA_VISIBLE_DEVICES', '0') + logging.basicConfig(format='%(asctime)s %(message)s') def load_model(network, model_weights_path, channels, neighbours): diff --git a/clearcut_detection_backend/model/requirements.txt b/clearcut_detection_backend/model/requirements.txt index 5e736fd..a17305a 100644 --- a/clearcut_detection_backend/model/requirements.txt +++ b/clearcut_detection_backend/model/requirements.txt @@ -1,6 +1,6 @@ catalyst==19.5 Flask==1.1.1 -geopandas==0.5.1 +geopandas==0.8.1 google-api-python-client==1.8.0 google-auth-httplib2==0.0.3 google-auth-oauthlib==0.4.1 @@ -14,3 +14,4 @@ tqdm==4.19.9 oauth2client==4.1.3 Pillow==6.2.2 PyYAML==5.1.1 +opencv-python-headless==4.1.0.25 diff --git a/clearcut_detection_backend/test.Dockerfile b/clearcut_detection_backend/test.Dockerfile new file mode 100644 index 0000000..da5c155 --- /dev/null +++ b/clearcut_detection_backend/test.Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.6 + +RUN mkdir /test +WORKDIR /test + +COPY ./test/requirements.txt /test +RUN pip install -r requirements.txt + +RUN apt-get update -y && apt-get install -y \ + software-properties-common + +RUN add-apt-repository -r ppa:ubuntugis/ppa && apt-get update +RUN apt-get update +RUN apt-get install gdal-bin -y +RUN apt-get install libgdal-dev -y +RUN export CPLUS_INCLUDE_PATH=/usr/include/gdal +RUN export C_INCLUDE_PATH=/usr/include/gdal +RUN pip install GDAL==$(gdal-config --version | awk -F'[.]' '{print $1"."$2}') + +ADD . /test/ \ No newline at end of file diff --git a/clearcut_detection_backend/test.py b/clearcut_detection_backend/test.py new file mode 100644 index 0000000..3ea5116 --- /dev/null +++ b/clearcut_detection_backend/test.py @@ -0,0 +1,5 @@ +from test.predict import model_predict +from test.evaluation import model_evaluate + +results, test_tile_path = model_predict() +model_evaluate(results, test_tile_path) diff --git a/clearcut_detection_backend/test/__init__.py b/clearcut_detection_backend/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clearcut_detection_backend/test/evaluation.py b/clearcut_detection_backend/test/evaluation.py new file mode 100644 index 0000000..acc8ea5 --- /dev/null +++ b/clearcut_detection_backend/test/evaluation.py @@ -0,0 +1,141 @@ +import os +import json +import yaml + +import numpy as np +import pandas as pd +import rasterio +import geopandas + +from tqdm import tqdm +from rasterio import features + +from test.polygon_metrics import f1_score_evaluation, polygonize +from test.utils import GOLD_DICE, GOLD_F1SCORE, GOLD_IOU, SUCCESS_THRESHOLD, IOU_THRESHOLD +from test.test_data_prepare import get_gt_polygons + +def dice_coef(y_true, y_pred, eps=1e-7): + y_true_f = y_true.flatten() + y_pred_f = y_pred.flatten() + intersection = np.sum(y_true_f * y_pred_f) + return (2. * intersection + eps) / (np.sum(y_true_f) + np.sum(y_pred_f) + eps) + + +def iou(y_true, y_pred, smooth=1.0): + y_true_f = y_true.flatten() + y_pred_f = y_pred.flatten() + intersection = np.sum(y_true_f * y_pred_f) + return (1. * intersection + smooth) / (np.sum(y_true_f) + np.sum(y_pred_f) - intersection + smooth) + + +def confusion_matrix(y_true, y_pred): + mm, mn, nm, nn = 0, 0, 0, 0 + M, N = 0, 0 + for i in range(len(y_true)): + if(y_true.iloc[i] == y_pred.iloc[i]): + if(y_true.iloc[i] == 1): + M += 1 + mm += 1 + else: + N += 1 + nn += 1 + else: + if(y_true.iloc[i] == 1): + M += 1 + mn += 1 + else: + N += 1 + nm += 1 + return mm, mn, nm, nn, M, N + + +def get_raster_masks(reference_tif, model_result): + raster = {} + with rasterio.open(reference_tif) as src: + filenames = {} + filenames['mask'] = get_gt_polygons() + filenames['predicted'] = os.path.join('data', model_result[0].get('polygons')) + for name in filenames: + gt_polygons = geopandas.read_file(filenames[name]) + gt_polygons = gt_polygons.to_crs(src.crs) + raster[name] = features.rasterize(shapes=gt_polygons['geometry'], + out_shape=(src.height, src.width), + transform=src.transform, + default_value=1) + + return raster + +def load_config(): + with open('./model/predict_config.yml', 'r') as config: + cfg = yaml.load(config, Loader=yaml.SafeLoader) + + models = cfg['models'] + save_path = cfg['prediction']['save_path'] + threshold = cfg['prediction']['threshold'] + input_size = cfg['prediction']['input_size'] + + return models, save_path, threshold, input_size + + +def evaluate(model_result, test_tile_path): + raster = get_raster_masks(test_tile_path['current'], model_result) + _, _, _, size = load_config() + + res_cols = ['name', 'dice_score', 'iou_score', 'pixel_amount'] + test_df_results = pd.DataFrame(columns=res_cols) + dices, ious = [], [] + test_polys, truth_polys = [], [] + for i in tqdm(range(raster['mask'].shape[0] // size)): + for j in range(raster['mask'].shape[1] // size): + instance_name = f'{i}_{j}' + mask = raster['mask'][i*size : (i+1)*size, j*size : (j+1)*size] + if mask.sum() > 0: + prediction = raster['predicted'][i*size : (i+1)*size, j*size : (j+1)*size] + test_polys.append(polygonize(prediction.astype(np.uint8))) + truth_polys.append(polygonize(mask.astype(np.uint8))) + + dice_score = dice_coef(mask, prediction) + iou_score = iou(mask, prediction, smooth=1.0) + + dices.append(dice_score) + ious.append(iou_score) + + pixel_amount = mask.sum() + + test_df_results = test_df_results.append({'name': instance_name, + 'dice_score': dice_score, 'iou_score': iou_score, 'pixel_amount': pixel_amount}, ignore_index=True) + + log = pd.DataFrame(columns=['f1_score', 'threshold', 'TP', 'FP', 'FN']) + for threshold in np.arange(0.1, 1, 0.1): + F1score, true_pos_count, false_pos_count, false_neg_count, total_count = f1_score_evaluation(test_polys, truth_polys, threshold=threshold) + log = log.append({'f1_score': round(F1score,4), + 'threshold': round(threshold,2), + 'TP':int(true_pos_count), + 'FP':int(false_pos_count), + 'FN':int(false_neg_count)}, ignore_index=True) + + return log, np.average(dices), np.average(ious) + + + +def model_evaluate(model_result, test_tile_path): + f1_score_test, dice, iou = evaluate(model_result, test_tile_path) + + f1_score_test = f1_score_test[f1_score_test['threshold'] == IOU_THRESHOLD]['f1_score'].to_numpy() + f1_score_standard = GOLD_F1SCORE + + result = {} + result['f1_score'] = float(f1_score_standard - f1_score_test[0]) + result['dice_score'] = GOLD_DICE - dice + result['iou_score'] = GOLD_IOU - iou + result['status'] = (result['f1_score'] < SUCCESS_THRESHOLD) \ + & (result['dice_score'] < SUCCESS_THRESHOLD) \ + & (result['iou_score'] < SUCCESS_THRESHOLD) + + if result['status']: + result['status'] = str(result['status']).replace('True', 'success') + else: + result['status'] = str(result['status']).replace('False', 'failed') + + with open('test_status.json', 'w') as outfile: + json.dump(result, outfile) diff --git a/clearcut_detection_backend/test/polygon_metrics.py b/clearcut_detection_backend/test/polygon_metrics.py new file mode 100644 index 0000000..8ad76bf --- /dev/null +++ b/clearcut_detection_backend/test/polygon_metrics.py @@ -0,0 +1,117 @@ +import os +import cv2 +import numpy as np + +from scipy import ndimage as ndi +from shapely.geometry import Polygon +from skimage.segmentation import watershed +from skimage.feature import peak_local_max + +import matplotlib.pyplot as plt + +def watershed_segmentation(image): + distance = ndi.distance_transform_edt(image) + local_maxi = peak_local_max(distance, indices=False, footprint=np.ones((3, 3)), + labels=image) + markers = ndi.label(local_maxi)[0] + labels = watershed(-distance, markers, mask=image) + return labels, distance + +def polygonize(raster_array, meta=None, transform=False): + contours, hierarchy = cv2.findContours(raster_array.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + polygons = [] + for i in range(len(contours)): + c = contours[i] + n_s = (c.shape[0], c.shape[2]) + if n_s[0] > 2: + if transform: + polys = [tuple(i) * meta['transform'] for i in c.reshape(n_s)] + else: + polys = [tuple(i) for i in c.reshape(n_s)] + polygons.append(Polygon(polys)) + return polygons + +def iou_poly(test_poly, truth_poly): + iou_score = 0 + intersection_result = test_poly.intersection(truth_poly) + if not intersection_result.is_valid: + intersection_result = intersection_result.buffer(0) + if not intersection_result.is_empty: + intersection_area = intersection_result.area + union_area = test_poly.union(truth_poly).area + iou_score = intersection_area / union_area + else: + iou_score = 0 + return iou_score + + +def score(test_polys, truth_polys, threshold=0.5): + true_pos_count = 0 + true_neg_count = 0 + false_pos_count = 0 + false_neg_count = 0 + total_count = 0 + for test_poly, truth_poly in zip(test_polys, truth_polys): + if len(test_poly)==0 and len(truth_poly)==0: + true_neg_count += 1 + total_count+=1 + elif len(test_poly)==0 and len(truth_poly)>0: + false_pos_count += 1 + total_count+=1 + elif len(test_poly)>0 and len(truth_poly)==0: + false_neg_count += 1 + total_count+=1 + else: + intersected=[] + + for test_p in test_poly: + for truth_p in truth_poly: + if not test_p.is_valid: + test_p = test_p.buffer(0) + if not truth_p.is_valid: + truth_p = truth_p.buffer(0) + if test_p.intersection(truth_p).is_valid: + if not test_p.intersection(truth_p).is_empty: + intersected.append([test_p, truth_p]) + + if len(intersected) < len(test_poly): + false_neg_count += (len(test_poly) - len(intersected)) + total_count+=(len(test_poly) - len(intersected)) + if len(intersected) < len(truth_poly): + false_pos_count += (len(truth_poly) - len(intersected)) + total_count+=(len(truth_poly) - len(intersected)) + for inter in intersected: + iou_score = iou_poly(inter[0], inter[1]) + + if iou_score >= threshold: + true_pos_count += 1 + total_count+=1 + else: + false_pos_count += 1 + total_count+=1 + return true_pos_count, false_pos_count, false_neg_count, total_count + + +def f1_score_evaluation(test_polys, truth_polys, threshold = 0.5): + if len(truth_polys)==0 and len(test_polys)!=0: + true_pos_count = 0 + false_pos_count = len(test_polys) + false_neg_count = 0 + total_count = len(test_polys) + elif len(truth_polys)==0 and len(test_polys)==0: + true_pos_count = len(test_polys) + false_pos_count = 0 + false_neg_count = 0 + total_count = len(test_polys) + else: + true_pos_count, false_pos_count, false_neg_count, total_count = score(test_polys, truth_polys, + threshold=threshold + ) + + if (true_pos_count > 0): + precision = float(true_pos_count) / (float(true_pos_count) + float(false_pos_count)) + recall = float(true_pos_count) / (float(true_pos_count) + float(false_neg_count)) + F1score = 2.0 * precision * recall / (precision + recall) + else: + F1score = 0 + return F1score, true_pos_count, false_pos_count, false_neg_count, total_count diff --git a/clearcut_detection_backend/test/predict.py b/clearcut_detection_backend/test/predict.py new file mode 100644 index 0000000..c7367cf --- /dev/null +++ b/clearcut_detection_backend/test/predict.py @@ -0,0 +1,63 @@ +import json +import os +import logging +import requests +import yaml +from concurrent.futures import ThreadPoolExecutor + +from test.settings import MODEL_TIFFS_DIR, MAX_WORKERS, DATA_DIR +from test.utils import area_tile_set_test, path_exists_or_create +from test.utils import download_file_from_google_drive +from test.utils import gdrive_ids + +model_call_config = 'model_call_config.yml' +logger = logging.getLogger('model_call') + +def file_download(): + tile = area_tile_set_test.pop() + + test_tile_path = path_exists_or_create(f'{MODEL_TIFFS_DIR}/{tile}') + test_tile_path = {} + + test_tile_path['current'] = path_exists_or_create(f'{MODEL_TIFFS_DIR}/{tile}/{tile}_0/') + f'{tile}_0.tif' + test_tile_path['previous'] = path_exists_or_create(f'{MODEL_TIFFS_DIR}/{tile}/{tile}_1/') + f'{tile}_1.tif' + + test_tile_path['cloud_current'] = path_exists_or_create(f'{MODEL_TIFFS_DIR}/{tile}/{tile}_0/') + 'clouds.tiff' + test_tile_path['cloud_previous'] = path_exists_or_create(f'{MODEL_TIFFS_DIR}/{tile}/{tile}_1/') + 'clouds.tiff' + + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + for order in test_tile_path.keys(): + if not os.path.exists(test_tile_path[order]): + executor.submit(download_file_from_google_drive, gdrive_ids[order], test_tile_path[order]) + + return test_tile_path + + +def model_predict(): + test_tile_path = file_download() + tif_path = "/".join(test_tile_path['current'].split('/')[:4]) + logger.info(f'raster_prediction {tif_path}') + results = raster_prediction(tif_path) + logger.info(f'results:\n{results}') + results_path = os.path.join(DATA_DIR, results[0].get('polygons')) + return results, test_tile_path + +# TODO: add docstring +def raster_prediction(tif_path): + with open(model_call_config, 'r') as config: + cfg = yaml.load(config, Loader=yaml.SafeLoader) + model_api_cfg = cfg["model-api"] + api_endpoint = "http://{host}:{port}/{endpoint}".format( + host=model_api_cfg["host"], + port=model_api_cfg["port"], + endpoint=model_api_cfg["endpoint"] + ) + data = {"image_path": tif_path} + logger.info(f'sending request to model API for\n{tif_path}') + try: + response = requests.post(url=api_endpoint, json=data) + result = response.text + datastore = json.loads(result) + return datastore + except (ValueError, Exception): + logger.error('Error\n\n', exc_info=True) diff --git a/clearcut_detection_backend/test/requirements.txt b/clearcut_detection_backend/test/requirements.txt new file mode 100644 index 0000000..c626c53 --- /dev/null +++ b/clearcut_detection_backend/test/requirements.txt @@ -0,0 +1,88 @@ +affine==2.3.0 +argcomplete==1.11.1 +attrs==19.3.0 +boto==2.49.0 +boto3==1.14.11 +botocore==1.17.11 +cachetools==3.1.1 +certifi==2020.6.20 +cffi==1.14.0 +chardet==3.0.4 +click==7.1.2 +click-plugins==1.1.1 +cligj==0.5.0 +crcmod==1.7 +cryptography==2.9.2 +cycler==0.10.0 +Cython==0.29.20 +decorator==4.4.2 +docutils==0.15.2 +fasteners==0.15 +Fiona==1.8.13.post1 +gcs-oauth2-boto-plugin==2.6 +geopandas==0.5.1 +google-api-core==1.14.3 +google-api-python-client==1.7.4 +google-apitools==0.5.31 +google-auth==1.7.1 +google-auth-httplib2==0.0.3 +google-cloud-core==1.0.3 +google-cloud-storage==1.23.0 +google-reauth==0.1.0 +google-resumable-media==0.5.0 +googleapis-common-protos==1.6.0 +gsutil==4.47 +httplib2==0.18.1 +idna==2.9 +imageio==2.5.0 +importlib-metadata==1.6.1 +jmespath==0.10.0 +kiwisolver==1.2.0 +matplotlib==3.2.2 +mercantile==1.1.5 +mock==2.0.0 +monotonic==1.5 +munch==2.5.0 +networkx==2.4 +numpy==1.16.4 +oauth2client==4.1.3 +opencv-python==4.2.0.34 +pandas==0.24.2 +pbr==5.4.5 +Pillow==6.1.0 +protobuf==3.12.2 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyOpenSSL==19.1.0 +pyparsing==2.4.7 +pyproj==2.2.1 +python-dateutil==2.8.1 +pytz==2020.1 +pyu2f==0.1.4 +PyWavelets==1.1.1 +PyYAML==5.1.1 +rasterio==1.1.2 +requests==2.24.0 +retry-decorator==1.1.1 +rio-mbtiles==1.4.2 +rsa==4.0 +s3transfer==0.3.3 +scikit-image==0.17.2 +scipy==1.5.0 +sentinelhub==2.5.3 +Shapely==1.6.4.post2 +six==1.12.0 +snuggs==1.4.7 +SocksiPy-branch==1.1 +tifffile==2020.6.3 +tqdm==4.19.9 +uritemplate==3.0.1 +urllib3==1.25.9 +utm==0.5.0 +zipp==3.1.0 + +# catalyst==19.5 +# segmentation-models-pytorch==0.1.0 +# torch==1.4.0 +# torchvision==0.5.0 diff --git a/clearcut_detection_backend/test/scripts/Dockerfile b/clearcut_detection_backend/test/scripts/Dockerfile new file mode 100644 index 0000000..39b3193 --- /dev/null +++ b/clearcut_detection_backend/test/scripts/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.6 + +RUN mkdir /test +WORKDIR /test + +COPY requirements.txt /test +RUN pip install -r requirements.txt + +RUN apt-get update -y && apt-get install -y \ + software-properties-common + +RUN add-apt-repository -r ppa:ubuntugis/ppa && apt-get update +RUN apt-get update +RUN apt-get install gdal-bin -y +RUN apt-get install libgdal-dev -y +RUN export CPLUS_INCLUDE_PATH=/usr/include/gdal +RUN export C_INCLUDE_PATH=/usr/include/gdal +RUN pip install GDAL==$(gdal-config --version | awk -F'[.]' '{print $1"."$2}') + + +ADD . /test diff --git a/clearcut_detection_backend/test/scripts/download.py b/clearcut_detection_backend/test/scripts/download.py new file mode 100644 index 0000000..25bc581 --- /dev/null +++ b/clearcut_detection_backend/test/scripts/download.py @@ -0,0 +1,287 @@ +import os +import re +import datetime +import logging +from enum import Enum + +from concurrent.futures import ThreadPoolExecutor, as_completed +from google.cloud import storage +from google.cloud.exceptions import NotFound +from xml.dom import minidom +from xml.etree.ElementTree import ParseError + +from utils import area_tile_set_test, bands_to_download, date_current_test, date_previous_test +import settings + +logger = logging.getLogger('sentinel') + + +class TillNameError(Exception): + def __init__(self, till_name): + self.message = f'{till_name} is not valid till_name' + Exception.__init__(self, self.message) + + def __str__(self): + return self.message + + +class Bands(Enum): + TCI = 'TCI' + B04 = 'B04' + B08 = 'B08' + B8A = 'B8A' + B11 = 'B11' + B12 = 'B12' + + +class SentinelDownload: + + def __init__(self): + settings.DOWNLOADED_IMAGES_DIR.mkdir(parents=True, exist_ok=True) + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './key.json' + self.sequential_dates_count = settings.MAXIMUM_DATES_STORE_FOR_TILE + self.area_tile_set = area_tile_set_test + self.bands_to_download = bands_to_download + self.base_uri = 'gs://gcp-public-data-sentinel-2' + self.bucket_name = 'gcp-public-data-sentinel-2' + self.prefix = 'L2/tiles' + self.tiles_and_uris_dict = {tile_name: self.get_tile_uri(tile_name) for tile_name in self.area_tile_set} + self.storage_client = storage.Client() + self.storage_bucket = self.get_storage_bucket() + + logger.info(f'area tile set for download: {self.area_tile_set}') + logger.info(f'bands to download:{self.bands_to_download}') + print(bands_to_download) + + def get_storage_bucket(self): + try: + return self.storage_client.get_bucket(self.bucket_name) + except NotFound: + logger.error('Error\n\n', exc_info=True) + + def process_download(self): + """ + Requests metadata file to define if update is needed for tile + Launches multithread download + """ + tiles_to_update = self.request_google_cloud_storage_for_latest_acquisition(self.tiles_and_uris_dict) + self.launch_download_pool(tiles_to_update) + + @staticmethod + def get_tile_uri(tile_name): + """ + Reads config and extract name of the tiles which are needed for application + Converts Tile Name to part of the URI in format [UTM_ZONE]/[LATITUDE_BAND]/[GRID_SQUARE] + Creates URI for full tile path + :tile_name: str + :return: str: tile_uri + """ + try: + match = re.search(r'\b\d+', tile_name) + utm_zone = int(match[0]) if match else None + if not utm_zone or 1 > utm_zone or utm_zone > 60: + raise TillNameError(tile_name) + string = tile_name.replace(match[0], '') + + match = re.search(r'\b[C-Z]', string) + latitude_band = match[0] if match else None + if not latitude_band: + raise TillNameError(tile_name) + string = string.replace(match[0], '', 1) + + match = re.search(r'\b[A-Z][A-Z]\b', string) + grid_square = match[0] if match else None + if not grid_square: + logger.info(string) + raise TillNameError(tile_name) + return f'{utm_zone}/{latitude_band}/{grid_square}' + except TillNameError: + logger.error('Error\n\n', exc_info=True) + + def launch_download_pool(self, tiles_to_update): + """ + For each tile starts own thread processing + :param tiles_to_update: + :return: + """ + with ThreadPoolExecutor(max_workers=settings.MAX_WORKERS) as executor: + future_list = [] + for tile_name, tile_path in tiles_to_update.items(): + future = executor.submit(self.download_images_from_tiles, tile_name, tile_path) + future_list.append(future) + + for future in as_completed(future_list): + if not future.result(): + exit(1) + else: + logger.info(f'images for {future.result()[0]} were downloaded') + + def download_images_from_tiles(self, tile_name, tile_path): + """ + Iterates over folders to fetch tile images folder + Downloads band specified in config + :param tile_name: + :param tile_path: + :return: + """ + tile_prefix = f'{tile_path}/IMG_DATA/R20m/' + blobs = self.storage_bucket.list_blobs(prefix=tile_prefix) + + for blob in blobs: + band, download_needed = self.file_need_to_be_downloaded(blob.name) + if download_needed: + filename = settings.DOWNLOADED_IMAGES_DIR / f'{tile_name}_{band}.jp2' + self.download_file_from_storage(blob, filename) + + tile_prefix = f'{tile_path}/IMG_DATA/R10m/' + blobs = self.storage_bucket.list_blobs(prefix=tile_prefix) + + for blob in blobs: + band, download_needed = self.file_need_to_be_downloaded(blob.name) + if download_needed: + filename = settings.DOWNLOADED_IMAGES_DIR / f'{tile_name}_{band}.jp2' + self.download_file_from_storage(blob, filename) + + + tile_prefix = f'{tile_path}/QI_DATA/' + blobs = self.storage_bucket.list_blobs(prefix=tile_prefix) + + endswith = 'MSK_CLDPRB_20m.jp2' + for blob in blobs: + if blob.name.endswith(endswith): + filename = settings.DOWNLOADED_IMAGES_DIR / f"{tile_name}_{blob.name.split('/')[-1]}" + self.download_file_from_storage(blob, filename) + + return tile_name, tile_path + + def file_need_to_be_downloaded(self, name): + """ + Checks if blob is eligible for download through formats specified in config + :param name: + :return: + """ + for band in self.bands_to_download: + if name.endswith(f'_{band}_10m.jp2') or name.endswith(f'_{band}_20m.jp2'): + return band, True + return None, False + + def request_google_cloud_storage_for_latest_acquisition(self, tiles_path): + """ + Iterates over tile sets and picks latest tile dataset. + Defines if tile is needed to be updated. + :param tiles_path: + :return: + """ + tiles_to_be_downloaded = {} + metadata_file = 'MTD_TL.xml' + + for tile_name, tile_path in tiles_path.items(): + logger.info(f'TILE NAME: {tile_name}') + delimiter = '/' + tile_uri = f'L2/tiles/{tile_path}/' + + blobs = self.storage_client.list_blobs(self.storage_bucket, prefix=tile_uri, delimiter=delimiter) + blobs._next_page() + + prefixes = list(blobs.prefixes) + if not prefixes: + raise NotFound(f'no such tile_uri: {tile_uri}') + prefixes.sort(key=self.get_folders_date, reverse=True) + granule_id_list = [prefix for prefix in prefixes if date_current_test in prefix or date_previous_test in prefix] + + granule_num = 0 + for granule_id in granule_id_list: + if granule_num < self.sequential_dates_count: + prefix = f'{granule_id}GRANULE/' + blobs = self.storage_client.list_blobs(self.storage_bucket, prefix=prefix, delimiter=delimiter) + blobs._next_page() + + nested_granule_id_list = list(blobs.prefixes) + if not nested_granule_id_list: + raise NotFound(f'no such prefix: {prefix}') + + nested_granule_id = nested_granule_id_list[0] + updated_tile_uri = nested_granule_id[:-1] if nested_granule_id.endswith('/') else nested_granule_id + + filename = settings.DOWNLOADED_IMAGES_DIR / f'{tile_name}_{metadata_file}' + print(granule_num, filename) + blob = self.storage_bucket.get_blob(f'{updated_tile_uri}/{metadata_file}') + if not blob: + raise NotFound(f'not found {updated_tile_uri}/{metadata_file}') + + update_needed = self.define_if_tile_update_needed(blob, f'{tile_name}_{granule_num}', filename) + if update_needed: + logger.info(f'Tile {tile_name}_{granule_num} will be downloaded from {updated_tile_uri}') + tiles_to_be_downloaded[f'{tile_name}_{granule_num}'] = f'{updated_tile_uri}' + os.remove(filename) + granule_num += 1 + else: + break + print(tiles_to_be_downloaded) + return tiles_to_be_downloaded + + def define_if_tile_update_needed(self, blob, tile_name, filename) -> bool: + """ + Checks hash of the metadata file for latest image and information from DB + Downloads metadata file if hash is not equal + Checks cloud coverage value from metadata file if it lower then one from settings - allows to download images + :param blob: + :param tile_name: + :param filename: + :return: + """ + filename = str(filename) + self.download_file_from_storage(blob, filename) + nodata_pixel_value = self.define_xml_node_value(filename, 'NODATA_PIXEL_PERCENTAGE') + # print('====== NO DATA PIXEL VALUE ======') + # print(nodata_pixel_value) + if nodata_pixel_value >= settings.MAXIMUM_EMPTY_PIXEL_PERCENTAGE: + return False + cloud_coverage_value = self.define_xml_node_value(filename, 'CLOUDY_PIXEL_PERCENTAGE') + # print('====== CLOUD COVERAGE VALUE ======') + # print(cloud_coverage_value) + if cloud_coverage_value <= settings.MAXIMUM_CLOUD_PERCENTAGE_ALLOWED: + return True + else: + return False + + def get_folders_date(self, path): + match = re.search(r'_\d{8}T\d{6}', path) + if match: + return match[0] + else: + raise ValueError(f'No date in {self.bucket_name} for {path}') + + @staticmethod + def download_file_from_storage(blob, filename): + """ + Downloads blob to local storage + :param blob: + :param filename: + :return: + """ + with open(filename, 'wb') as new_file: + blob.download_to_file(new_file) + + @staticmethod + def define_xml_node_value(xml_file_name, node): + """ + Parsing XML file for passed node name + :param xml_file_name: + :param node: + :return: + """ + xml_dom = minidom.parse(xml_file_name) + try: + xml_node = xml_dom.getElementsByTagName(node) + xml_node_value = xml_node[0].firstChild.data + return float(xml_node_value) + except FileNotFoundError(f'No such file: {xml_file_name}'): + logger.error('Error\n\n', exc_info=True) + except ParseError(f'no such node ({node}) in the {xml_file_name}'): + logger.error('Error\n\n', exc_info=True) + return None + + +sentinel_downloader = SentinelDownload() +sentinel_downloader.process_download() diff --git a/clearcut_detection_backend/test/scripts/preprocess.py b/clearcut_detection_backend/test/scripts/preprocess.py new file mode 100644 index 0000000..8a5e623 --- /dev/null +++ b/clearcut_detection_backend/test/scripts/preprocess.py @@ -0,0 +1,117 @@ +import os +from os.path import join + +import imageio +import rasterio +import numpy as np + +from configparser import ConfigParser +from tqdm import tqdm + +from utils import path_exists_or_create, bands_to_download +from settings import DOWNLOADED_IMAGES_DIR, MODEL_TIFFS_DIR + +ROOT = '.SAFE' + +def get_ndvi(b4_file, b8_file, ndvi_file): + os.system( + f'gdal_calc.py -A {b4_file} -B {b8_file} \ + --outfile={ndvi_file} \ + --calc="(B-A)/(A+B+0.001)" --type=Float32 --quiet' + ) + +def to_tiff(input_jp2_file, output_tiff_file, output_type='Float32'): + os.system( + f'gdal_translate -ot {output_type} \ + {input_jp2_file} {output_tiff_file}' + ) + +def scale_img(img_file, output_file=None, min_value=0, max_value=255, output_type='Byte'): + with rasterio.open(img_file) as src: + img = src.read(1) + img = np.nan_to_num(img) + mean_ = img.mean() + std_ = img.std() + min_ = max(img.min(), mean_ - 2 * std_) + max_ = min(img.max(), mean_ + 2 * std_) + + output_file = os.path.splitext(img_file)[0] if output_file is None else output_file + + os.system( + f'gdal_translate -ot {output_type} \ + -scale {min_} {max_} {min_value} {max_value} \ + {img_file} {output_file}' + ) + +def prepare_tiff(filename): + save_path = path_exists_or_create(join(MODEL_TIFFS_DIR, filename.split('_')[0], f"{filename}")) + output_tiffs = {} + bands_to_convert = [band for band in bands_to_download] + + if 'TCI' in bands_to_download: + output_tiffs['tiff_rgb_name'] = join(save_path, f'{filename}_TCI.tif') + to_tiff(join(DOWNLOADED_IMAGES_DIR, f'{filename}_TCI.jp2'), + join(save_path, f'{filename}_TCI.tif'), 'Byte') + bands_to_convert.remove('TCI') + + for band in bands_to_convert: + output_tiffs[f'tiff_{band}_name'] = join(save_path, f'{filename}_{band}.tif') + to_tiff(join(DOWNLOADED_IMAGES_DIR, f'{filename}_{band}.jp2'), + output_tiffs[f'tiff_{band}_name']) + + + if 'B04' in bands_to_download and 'B08' in bands_to_download: + output_tiffs['tiff_ndvi_name'] = join(save_path, f'{filename}_ndvi.tif') + print('\nndvi band is processing...') + get_ndvi(output_tiffs.get('tiff_B04_name'), + output_tiffs.get('tiff_B08_name'), + output_tiffs.get('tiff_ndvi_name')) + bands_to_convert.append('ndvi') + + if 'B11' in bands_to_download and 'B8A' in bands_to_download: + output_tiffs['tiff_ndmi_name'] = join(save_path, f'{filename}_ndmi.tif') + print('\nndmi band is processing...') + get_ndvi(output_tiffs.get('tiff_B11_name'), + output_tiffs.get('tiff_B8A_name'), + output_tiffs.get('tiff_ndmi_name')) + bands_to_convert.append('ndmi') + + for band in bands_to_convert: + output_tiffs[f'scaled_{band}_name'] = f"{output_tiffs[f'tiff_{band}_name']}_scaled.tif" + scale_img(output_tiffs[f'tiff_{band}_name'], output_tiffs[f'scaled_{band}_name']) + + tiff_output_name = join(save_path, f'{filename}_merged.tiff') + + if 'B04' in bands_to_download: + bands_to_convert.remove('B04') + # if 'TCI' in bands_to_download: + # bands_to_convert = [output_tiffs['tiff_rgb_name']] + bands_to_convert + + files_to_merge = [output_tiffs.get(f'scaled_{band}_name') for band in bands_to_convert] + files_to_merge = [output_tiffs['tiff_rgb_name']] + files_to_merge + merged_files = " ".join(files_to_merge) + + print(merged_files) + os.system( + f"gdal_merge.py -separate -o {tiff_output_name} {merged_files}" + ) + + to_tiff(join(DOWNLOADED_IMAGES_DIR, f'{filename}_MSK_CLDPRB_20m.jp2'), + f'{join(save_path, "clouds.tiff")}') + + for item in os.listdir(save_path): + if item.endswith('.tif'): + os.remove(join(save_path, item)) + + + +def preprocess(): + filenames = ["_".join(file.split('_')[:2]) for file in os.listdir(DOWNLOADED_IMAGES_DIR)] + filenames = set(filenames) + for filename in tqdm(filenames): + print(filename) + prepare_tiff(filename) + print('==============') + + +preprocess() \ No newline at end of file diff --git a/clearcut_detection_backend/test/settings.py b/clearcut_detection_backend/test/settings.py new file mode 100644 index 0000000..6b646c4 --- /dev/null +++ b/clearcut_detection_backend/test/settings.py @@ -0,0 +1,13 @@ +from pathlib import Path + +MAXIMUM_CLOUD_PERCENTAGE_ALLOWED = 20.0 +MAXIMUM_EMPTY_PIXEL_PERCENTAGE = 5.0 +MAXIMUM_DATES_STORE_FOR_TILE = 2 +MAX_WORKERS = 6 + +DATA_DIR = Path('./data/test') +DOWNLOADED_IMAGES_DIR = f'{DATA_DIR}/source_images/' +MODEL_TIFFS_DIR = f'{DATA_DIR}/model_tiffs' + +METRIC_CONFIG = './test/metrics.ini' +TEST_CONFIG = './test/test_config.ini' diff --git a/clearcut_detection_backend/test/test_config.ini b/clearcut_detection_backend/test/test_config.ini new file mode 100644 index 0000000..9068d01 --- /dev/null +++ b/clearcut_detection_backend/test/test_config.ini @@ -0,0 +1,11 @@ +[config] +AREA_TILE_SET = 36UYA +BANDS_TO_DOWNLOAD = TCI B04 B08 B8A B11 B12 +DATE_CURRENT = 20190427 +DATE_PREVIOUS = 20190402 +TEST_POLYGONS_URL = https://raw.githubusercontent.com/vldkhramtsov/ClearcutServiceDatasets/master/36UYA_Spring_time-dependent.geojson +METRICS_URL = https://raw.githubusercontent.com/vldkhramtsov/ClearcutServiceDatasets/master/metrics.ini +TEST_TILE_CURRENT_GDRIVE_ID = 1uGVCkVnA5Zdp7IlHM2S6STjHbEyf3L3C +TEST_TILE_PREVIOUS_GDRIVE_ID = 1ITe552pKCkN8sqv-qOCd04pJrGymcomi +TEST_CLOUDS_CURRENT_GDRIVE_ID = 1mfg9KpcUoh6Q2-hOJXIXuL4i2HHcGcrf +TEST_CLOUDS_PREVIOUS_GDRIVE_ID = 1yBxZGyzZDjRKXCz6dqK95TnSysGxBxf0 diff --git a/clearcut_detection_backend/test/test_data_prepare.py b/clearcut_detection_backend/test/test_data_prepare.py new file mode 100644 index 0000000..e99fab5 --- /dev/null +++ b/clearcut_detection_backend/test/test_data_prepare.py @@ -0,0 +1,29 @@ +import os +import pandas as pd +import geopandas + +from datetime import datetime +from shapely.ops import unary_union +from test.utils import DATE_CURRENT, DATE_PREVIOUS, TEST_POLYGONS +from test.settings import DATA_DIR + + +def save_polygons(polygons, crs, save_path, filename): + if len(polygons) == 0: + return + polygons = geopandas.GeoDataFrame({'geometry': polygons}, crs = crs) + polygons.to_file(os.path.join(save_path, f'{filename}.geojson'), driver='GeoJSON') + return os.path.join(save_path, f'{filename}.geojson') + + +def prepare_testfile(): + test = geopandas.read_file(TEST_POLYGONS) + test['img_date'] = pd.to_datetime(test['img_date'], format='%Y-%m-%d') + test = test[(test['img_date'] > DATE_PREVIOUS) & (test['img_date'] <= DATE_CURRENT)] + return test + + +def get_gt_polygons(): + test = prepare_testfile() + clearcuts = unary_union(test['geometry'].buffer(1e-5)).buffer(-1e-5) + return save_polygons(clearcuts, test.crs, DATA_DIR, 'test_clearcuts') diff --git a/clearcut_detection_backend/test/utils.py b/clearcut_detection_backend/test/utils.py new file mode 100644 index 0000000..0aeb961 --- /dev/null +++ b/clearcut_detection_backend/test/utils.py @@ -0,0 +1,83 @@ +import os +import requests +import urllib.request + +from configparser import ConfigParser +from datetime import datetime + +from test.settings import DATA_DIR, METRIC_CONFIG, TEST_CONFIG + +def download_file_from_google_drive(id, destination): + # https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url + URL = "https://docs.google.com/uc?export=download" + + session = requests.Session() + + response = session.get(URL, params = { 'id' : id }, stream = True) + token = get_confirm_token(response) + + if token: + params = { 'id' : id, 'confirm' : token } + response = session.get(URL, params = params, stream = True) + + save_response_content(response, destination) + +def get_confirm_token(response): + for key, value in response.cookies.items(): + if key.startswith('download_warning'): + return value + + return None + +def save_response_content(response, destination): + CHUNK_SIZE = 32768 + + with open(destination, "wb") as f: + for chunk in response.iter_content(CHUNK_SIZE): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + + +def path_exists_or_create(path): + if not os.path.exists(path): + os.makedirs(path) + return path + +def download_dataset(file_url, save_path): + urllib.request.urlretrieve(file_url, save_path) + return save_path + +path_exists_or_create(DATA_DIR) + +config_test = ConfigParser(allow_no_value=True) +config_test.read(TEST_CONFIG) + +area_tile_set_test = set(config_test.get('config', 'AREA_TILE_SET').split()) +bands_to_download = config_test.get('config', 'BANDS_TO_DOWNLOAD').split() +date_current_test = config_test.get('config', 'DATE_CURRENT') +date_previous_test = config_test.get('config', 'DATE_PREVIOUS') +test_polys_url = config_test.get('config', 'TEST_POLYGONS_URL') +metrics_url = config_test.get('config', 'METRICS_URL') + +gdrive_ids = {} +gdrive_ids['current'] = config_test.get('config', 'TEST_TILE_CURRENT_GDRIVE_ID') +gdrive_ids['previous'] = config_test.get('config', 'TEST_TILE_PREVIOUS_GDRIVE_ID') +gdrive_ids['cloud_current'] = config_test.get('config', 'TEST_CLOUDS_CURRENT_GDRIVE_ID') +gdrive_ids['cloud_previous'] = config_test.get('config', 'TEST_CLOUDS_PREVIOUS_GDRIVE_ID') + + +TEST_POLYGONS = download_dataset(test_polys_url, f'{DATA_DIR}/test_clearcuts.geojson') +DATE_CURRENT = datetime.strptime(date_current_test, '%Y%m%d') +DATE_PREVIOUS = datetime.strptime(date_previous_test, '%Y%m%d') + +# Target metrics values +download_dataset(metrics_url, METRIC_CONFIG) + +config_metrics = ConfigParser(allow_no_value=True) +config_metrics.read(METRIC_CONFIG) + +GOLD_DICE = float(config_metrics.get('metric', 'GOLD_DICE')) +GOLD_IOU = float(config_metrics.get('metric', 'GOLD_IOU')) +IOU_THRESHOLD = float(float(config_metrics.get('metric', 'IOU_THRESHOLD'))) +GOLD_F1SCORE = float(config_metrics.get('metric', 'GOLD_F1SCORE')) +SUCCESS_THRESHOLD = float(config_metrics.get('metric', 'SUCCESS_THRESHOLD'))