From 84270f35780559cda30bc48176f0eaa4ff014473 Mon Sep 17 00:00:00 2001 From: mcarans Date: Thu, 18 Dec 2025 10:53:53 +1300 Subject: [PATCH] When creating or updating a resource that doesn't have an id, if you do not supply a parameter dataset, but the resource contains a package_id then that id will be used to load a dataset. Then the resource will be assigned to that dataset and it will be compared to resources in that dataset. If a match is found, then the resource will be given the corresponding id and that resource on HDX will be overwritten. --- requirements.txt | 2 +- src/hdx/data/resource.py | 44 ++++++++++++++++------- tests/hdx/data/test_resource.py | 62 ++++++++++++++++++++++++++++----- 3 files changed, 85 insertions(+), 23 deletions(-) diff --git a/requirements.txt b/requirements.txt index c830b28..2b3b81b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -259,7 +259,7 @@ rpds-py==0.30.0 # referencing rsa==4.9.1 # via google-auth -ruamel-yaml==0.18.16 +ruamel-yaml==0.18.17 # via hdx-python-utilities ruamel-yaml-clib==0.2.15 # via ruamel-yaml diff --git a/src/hdx/data/resource.py b/src/hdx/data/resource.py index 387d182..5fea664 100755 --- a/src/hdx/data/resource.py +++ b/src/hdx/data/resource.py @@ -477,21 +477,39 @@ def _get_resource_id(self, **kwargs: Any) -> Optional[str]: dataset = kwargs.get("dataset") if dataset: dataset_id = dataset.get("id") + if not dataset_id: + dataset_name = dataset.get("name") + if dataset_name: + dataset = hdx.data.dataset.Dataset.read_from_hdx(dataset_name) + if dataset: + dataset_id = dataset["id"] + else: + raise HDXError("No dataset id or name in dataset!") if dataset_id: - existing_dataset_id = self.data.get("package_id") - if not existing_dataset_id or existing_dataset_id == dataset_id: - self.data["package_id"] = dataset["id"] - dataset_resources = dataset.get_resources() - matching_index = hdx.data.resource_matcher.ResourceMatcher.match_resource_list( - dataset_resources, self + package_id = self.data.get("package_id") + if package_id and package_id != dataset_id: + logger.warning( + f"Using dataset id {dataset_id} from dataset parameter which doesn't match {package_id} in resource!" ) - if matching_index: - matching_resource = dataset_resources[matching_index] - loadedid = matching_resource.get("id") - if loadedid: - self.data["id"] = loadedid - else: - loadedid = None + else: + dataset_id = self.data.get("package_id") + if dataset_id: + dataset = hdx.data.dataset.Dataset.read_from_hdx(dataset_id) + if not dataset: + dataset_id = None + if dataset_id: + self.data["package_id"] = dataset["id"] + dataset_resources = dataset.get_resources() + matching_index = ( + hdx.data.resource_matcher.ResourceMatcher.match_resource_list( + dataset_resources, self + ) + ) + if matching_index: + matching_resource = dataset_resources[matching_index] + loadedid = matching_resource.get("id") + if loadedid: + self.data["id"] = loadedid return loadedid def update_in_hdx(self, **kwargs: Any) -> int: diff --git a/tests/hdx/data/test_resource.py b/tests/hdx/data/test_resource.py index 4020c92..96b67b5 100755 --- a/tests/hdx/data/test_resource.py +++ b/tests/hdx/data/test_resource.py @@ -235,11 +235,20 @@ def mockdataset(url, datadict): 404, '{"success": false, "error": {"message": "TEST ERROR: Not show", "__type": "TEST ERROR: Not Show Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=package_show"}', ) - result = json.dumps(dataset_resultdict) + if datadict["id"] in ( + "1234", + "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", + "test_dataset", + ): + result = json.dumps(dataset_resultdict) + return MockResponse( + 200, + '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=package_show"}' + % result, + ) return MockResponse( - 200, - '{"success": true, "result": %s, "help": "http://test-data.humdata.org/api/3/action/help_show?name=package_show"}' - % result, + 404, + '{"success": false, "error": {"message": "Not found", "__type": "Not Found Error"}, "help": "http://test-data.humdata.org/api/3/action/help_show?name=package_show"}', ) @@ -1070,15 +1079,50 @@ def test_get_api_url(self, configuration, read): del resource["id"] assert resource.get_api_url() is None - def test_get_resource_id(self, configuration, read): + def test_get_resource_id(self, configuration, post_dataset): resources = [ - {"id": "abcd", "name": "test_resource", "format": "CSV"}, - {"id": "efgh", "name": "test_resource2", "format": "CSV"}, - {"id": "ijkl", "name": "test_resource2", "format": "XLSX"}, + {"id": "abcd", "name": "Resource1", "format": "CSV"}, + {"id": "efgh", "name": "Resource2", "format": "CSV"}, + {"id": "ijkl", "name": "Resource3", "format": "XLSX"}, ] dataset = Dataset({"id": "1234", "name": "test_dataset", "format": "CSV"}) dataset.add_update_resources(resources) - resource = Resource({"name": "test_resource2", "format": "CSV"}) + # Uses resource["name"] and resource["format"] + resource = Resource({"name": "Resource2", "format": "CSV"}) + result = resource._get_resource_id(dataset=dataset) + assert result == "efgh" + + # Uses resource["id"] result = resource._get_resource_id(dataset=dataset) assert result == "efgh" + + # Uses dataset["name"] + del resource["id"] + del resource["package_id"] + del dataset["id"] + result = resource._get_resource_id(dataset=dataset) + assert result == "3d777226-96aa-4239-860a-703389d16d1f" + + # Uses resource["package id"] + del resource["id"] + result = resource._get_resource_id() + assert result == "3d777226-96aa-4239-860a-703389d16d1f" + + # Uses resource["package id"] + del resource["id"] + resource["package_id"] = "NOTFOUND" + result = resource._get_resource_id() + assert result is None + + # resource["package id"] != dataset["id"] + resource["package_id"] = "NOTFOUND" + result = resource._get_resource_id(dataset=dataset) + assert result == "3d777226-96aa-4239-860a-703389d16d1f" + + # Invalid dataset + del resource["id"] + del resource["package_id"] + del dataset["name"] + with pytest.raises(HDXError): + resource._get_resource_id(dataset=dataset)