From beedebf2bd7ce1d8827daf5d8b3f76625f3b5edf Mon Sep 17 00:00:00 2001 From: Lingbo Liu Date: Tue, 15 Apr 2025 10:53:03 -0400 Subject: [PATCH] solve read online zipeed shapefile in US TIGER and GeoFile Reader base_url = "ftp://ftp2.census.gov/geo/tiger/TIGER2020PL/STATE/" def urlread(url: str) in GeoFile reader --- knime_extension/src/nodes/io.py | 19 ++++++++++++++++++- knime_extension/src/nodes/opendata.py | 3 ++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/knime_extension/src/nodes/io.py b/knime_extension/src/nodes/io.py index 755d5c9f..6c448e87 100644 --- a/knime_extension/src/nodes/io.py +++ b/knime_extension/src/nodes/io.py @@ -102,6 +102,23 @@ def execute(self, exec_context: knext.ExecutionContext): 0.4, "Reading file (This might take a while without progress changes)" ) + import geopandas as gpd + + def urlread(url: str) -> gpd.GeoDataFrame: + try: + gdf = gpd.read_file(url) + return gdf + except Exception as e1: + if url.startswith("http") and url.endswith(".zip"): + try: + vsizip_url = "/vsizip/vsicurl/" + url + gdf = gpd.read_file(vsizip_url) + return gdf + except Exception as e2: + raise RuntimeError(f"Error:{e2}") + else: + raise RuntimeError(f"Error:{e1}") + if self.data_url.lower().endswith(".kml"): import fiona @@ -132,7 +149,7 @@ def execute(self, exec_context: knext.ExecutionContext): ): gdf = gp.read_parquet(self.data_url) else: - gdf = gp.read_file(self.data_url) + gdf = urlread(self.data_url) if "" in gdf.columns: gdf = gdf.drop(columns="") diff --git a/knime_extension/src/nodes/opendata.py b/knime_extension/src/nodes/opendata.py index d7032034..a757dc82 100644 --- a/knime_extension/src/nodes/opendata.py +++ b/knime_extension/src/nodes/opendata.py @@ -210,7 +210,7 @@ def execute(self, exec_context: knext.ExecutionContext): County5Fips = self.StateFips + self.County3Fips - base_url = "https://www2.census.gov/geo/tiger/TIGER2020PL/STATE/" + base_url = "ftp://ftp2.census.gov/geo/tiger/TIGER2020PL/STATE/" if self.StateFips != self.County3Fips and self.County3Fips != "*": data_url = f"{base_url}{Statepath}/{County5Fips}/tl_2020_{County5Fips}_{self.geofile}.zip" @@ -219,6 +219,7 @@ def execute(self, exec_context: knext.ExecutionContext): if self.geofile == "roads": self.geofile = "prisecroads" data_url = f"{base_url}{Statepath}/{County5Fips}/tl_2020_{County5Fips}_{self.geofile}.zip" + # data_url = "/vsizip/vsicurl/" + data_url gdf = gp.read_file(data_url) gdf.reset_index(drop=True, inplace=True) return knext.Table.from_pandas(gdf)