diff --git a/Dockerfile b/Dockerfile index 7a90951..172c9a8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim-buster +FROM python:3.10-slim-bookworm COPY --from=ghcr.io/astral-sh/uv:0.5.29 /uv /uvx /bin/ diff --git a/config.py b/config.py index 7442139..656fe98 100644 --- a/config.py +++ b/config.py @@ -10,6 +10,12 @@ class Settings(BaseSettings): # EMDAT GAUL GAUL_FILE_PATH: str = "./geodata-prep/geodata/gaul.gpkg" GAUL_DOWNLOAD_URL: str = "https://github.com/IFRCGo/geocoding-service/releases/download/v1.0.0/gaul.gpkg" + # World Administrative Boundaries + SUPER_SIMPLIFIED_WAB_FILE_PATH: str = "./geodata-prep/geodata/super_simple.wab.fgb" + SUPER_SIMPLIFIED_WAB_DOWNLOAD_URL: str = "https://github.com/IFRCGo/geocoding-service/releases/download/v1.0.0/wab.fgb" + # EMDAT GAUL + SUPER_SIMPLIFIED_GAUL_FILE_PATH: str = "./geodata-prep/geodata/super_simple.gaul.gpkg" + SUPER_SIMPLIFIED_GAUL_DOWNLOAD_URL: str = "https://github.com/IFRCGo/geocoding-service/releases/download/v1.0.0/gaul.gpkg" settings = Settings() diff --git a/docker-compose.yml b/docker-compose.yml index b4d0c31..6ce6d8c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,4 +11,6 @@ services: environment: WAB_FILE_PATH: /geodata/simple.wab.fgb GAUL_FILE_PATH: /geodata/simple.gaul.gpkg + SUPER_SIMPLIFIED_WAB_FILE_PATH: /geodata/super_simple.wab.fgb + SUPER_SIMPLIFIED_GAUL_DOWNLOAD_URL: /geodata/super_simple.gaul.gpkg command: /bin/sh -c "uvicorn service:app --host 0.0.0.0 --port 8001 --reload --workers 1" diff --git a/geodata-prep/docker-compose.yml b/geodata-prep/docker-compose.yml index 7688430..7f91908 100644 --- a/geodata-prep/docker-compose.yml +++ b/geodata-prep/docker-compose.yml @@ -9,4 +9,5 @@ services: environment: DATA_DIR: /geodata TOLERANCE: 0.001 + HIGH_TOLERANCE: 0.2 command: /bin/sh /code/prepare.sh diff --git a/geodata-prep/prepare.sh b/geodata-prep/prepare.sh index 490f9fb..a6e18fb 100755 --- a/geodata-prep/prepare.sh +++ b/geodata-prep/prepare.sh @@ -4,6 +4,7 @@ set -e set -x echo "Simplification tolerance is $TOLERANCE" +echo "Super Simplification tolerance is $HIGH_TOLERANCE" echo "Data directory is at $DATA_DIR" # PROCESS EMDAT GAUL @@ -13,6 +14,7 @@ ZIP_NAME="gaul.zip" ITEM_NAME="gaul2014_2015.gpkg" FILE_NAME="gaul.gpkg" SIMPLIFIED_FILE_NAME="simple.$FILE_NAME" +SUPER_SIMPLIFIED_FILE_NAME="super_simple.$FILE_NAME" # Initialize mkdir -p "$TMP_DIR" @@ -25,11 +27,13 @@ unzip -u "$TMP_DIR/$ZIP_NAME" "$ITEM_NAME" -d "$TMP_DIR" # Simplify GAUL file while preserving the topology ogr2ogr "$TMP_DIR/$SIMPLIFIED_FILE_NAME" "$TMP_DIR/$ITEM_NAME" -simplify $TOLERANCE +ogr2ogr "$TMP_DIR/$SUPER_SIMPLIFIED_FILE_NAME" "$TMP_DIR/$ITEM_NAME" -simplify $HIGH_TOLERANCE # QT_QPA_PLATFORM=offscreen qgis_process plugins enable grassprovider # QT_QPA_PLATFORM=offscreen qgis_process run grass7:v.generalize --input="$TMP_DIR/$ITEM_NAME" --output="$TMP_DIR/$SIMPLIFIED_FILE_NAME" --threshold=0.01 --type=1 --method=0 --error="$TMP_DIR/errors.qgis.log" # Cleanup mv "$TMP_DIR/$SIMPLIFIED_FILE_NAME" "$DATA_DIR" +mv "$TMP_DIR/$SUPER_SIMPLIFIED_FILE_NAME" "$DATA_DIR" # PROCESS WAL @@ -39,6 +43,7 @@ DATA_DIR=/geodata TMP_DIR="$DATA_DIR/tmp_wab" FILE_NAME="wab.fgb" SIMPLIFIED_FILE_NAME="simple.$FILE_NAME" +SUPER_SIMPLIFIED_FILE_NAME="super_simple.$FILE_NAME" # Initialize mkdir -p "$TMP_DIR" @@ -48,6 +53,8 @@ curl --no-progress-meter --output "$TMP_DIR/$FILE_NAME" "https://public.opendata # Simplify GAUL file while preserving the topology ogr2ogr "$TMP_DIR/$SIMPLIFIED_FILE_NAME" "$TMP_DIR/$FILE_NAME" -simplify $TOLERANCE +ogr2ogr "$TMP_DIR/$SUPER_SIMPLIFIED_FILE_NAME" "$TMP_DIR/$FILE_NAME" -simplify $HIGH_TOLERANCE # Cleanup mv "$TMP_DIR/$SIMPLIFIED_FILE_NAME" "$DATA_DIR" +mv "$TMP_DIR/$SUPER_SIMPLIFIED_FILE_NAME" "$DATA_DIR" diff --git a/init.py b/init.py index 653b20c..fea419c 100644 --- a/init.py +++ b/init.py @@ -13,9 +13,11 @@ class SharedMem(typing.TypedDict): geocoder: FastGeocoder | None + super_simplified_geocoder: FastGeocoder | None -shared_mem: SharedMem = {"geocoder": None} +shared_mem: SharedMem = {"geocoder": None, "super_simplified_geocoder": None} + logger = logging.getLogger(__name__) @@ -65,12 +67,27 @@ async def lifespan(app: FastAPI): file_path=settings.GAUL_FILE_PATH, ) + _download_geodata( + name="SUPER_SIMPLIFIED_WAB", + url_path=settings.SUPER_SIMPLIFIED_WAB_DOWNLOAD_URL, + file_path=settings.SUPER_SIMPLIFIED_WAB_FILE_PATH, + ) + + _download_geodata( + name="SUPER_SIMPLIFIED_GAUL", + url_path=settings.SUPER_SIMPLIFIED_GAUL_DOWNLOAD_URL, + file_path=settings.SUPER_SIMPLIFIED_GAUL_FILE_PATH, + ) + logger.info("Initializing geocoder") geocoder = FastGeocoder( settings.WAB_FILE_PATH, settings.GAUL_FILE_PATH, ) + super_simplified_geocoder = FastGeocoder(settings.SUPER_SIMPLIFIED_WAB_FILE_PATH, settings.SUPER_SIMPLIFIED_GAUL_FILE_PATH) shared_mem["geocoder"] = geocoder + shared_mem["super_simplified_geocoder"] = super_simplified_geocoder + logger.info("Initialization for geocoder complete.") yield diff --git a/service.py b/service.py index 0737294..68a1d78 100644 --- a/service.py +++ b/service.py @@ -60,12 +60,11 @@ async def get_iso3(lat: float, lng: float) -> geocoding.Country: @app.get("/country/geometry") async def get_country_geometry( - country_name: str | None = None, - iso3: str | None = None, + country_name: str | None = None, iso3: str | None = None, simplified: bool = False ) -> geocoding.AdminGeometry: """Get the country geometry based on country name or iso3""" try: - geocoder = shared_mem["geocoder"] + geocoder = shared_mem["geocoder"] if not simplified else shared_mem["super_simplified_geocoder"] if not geocoder: raise Exception("Geocoder is not initialized") if iso3: @@ -87,12 +86,11 @@ async def get_country_geometry( @app.get("/admin2/geometries") async def get_admin2_geometries( - admin1_codes: list[int] = Query(default=[]), - admin2_codes: list[int] = Query(default=[]), + admin1_codes: list[int] = Query(default=[]), admin2_codes: list[int] = Query(default=[]), simplified: bool = False ) -> geocoding.AdminGeometry: """Get the admin 2 geometries based on admin 1 codes or admin 2 codes""" try: - geocoder = shared_mem["geocoder"] + geocoder = shared_mem["geocoder"] if not simplified else shared_mem["super_simplified_geocoder"] if not geocoder: raise Exception("Geocoder is not initialized") if admin1_codes or admin2_codes: