Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ jobs:
id-token: write # IMPORTANT: mandatory for trusted publishing

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
- name: Get history and tags for versioning to work
run: |
git fetch --prune --unshallow
git fetch --depth=1 origin +refs/tags/*:refs/tags/*
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.x'
- name: Install dependencies
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/run-python-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ jobs:
pull-requests: write

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.x'
- name: Install dependencies
Expand Down
7 changes: 7 additions & 0 deletions documentation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,13 @@ The built-in configuration assumes data will be of form similar to below:

## Resource Specific Operations

When creating or updating a resource that doesn't have an id, if you supply a parameter
dataset, then the resource will be assigned to that dataset and it will be compared to
resources in that dataset. If a match is found, then the resource will be given the
corresponding id and that resource on HDX will be overwritten.

resource.create_in_hdx(dataset=DATASET)

You can download a resource using the **download** function eg.

url, path = resource.download("FOLDER_TO_DOWNLOAD_TO")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dependencies = [
"defopt>=7.0.0",
"email_validator",
"hdx-python-country>=3.9.8",
"hdx-python-utilities>=3.9.5",
"hdx-python-utilities>=3.9.6",
"libhxl>=5.2.2",
"makefun",
"quantulum3",
Expand Down
28 changes: 14 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ babel==2.17.0
# via mkdocs-material
backrefs==6.1
# via mkdocs-material
cachetools==6.2.2
cachetools==6.2.4
# via google-auth
certifi==2025.11.12
# via requests
cfgv==3.4.0
cfgv==3.5.0
# via pre-commit
chardet==5.2.0
# via frictionless
Expand All @@ -32,7 +32,7 @@ click==8.3.1
# typer
colorama==0.4.6
# via mkdocs-material
coverage==7.12.0
coverage==7.13.0
# via pytest-cov
defopt==7.0.0
# via hdx-python-api (pyproject.toml)
Expand All @@ -50,13 +50,13 @@ email-validator==2.3.0
# via hdx-python-api (pyproject.toml)
et-xmlfile==2.0.0
# via openpyxl
filelock==3.20.0
filelock==3.20.1
# via virtualenv
frictionless==5.18.1
# via hdx-python-utilities
ghp-import==2.1.0
# via mkdocs
google-auth==2.43.0
google-auth==2.45.0
# via
# google-auth-oauthlib
# gspread
Expand All @@ -66,7 +66,7 @@ gspread==6.2.1
# via hdx-python-api (pyproject.toml)
hdx-python-country==3.9.8
# via hdx-python-api (pyproject.toml)
hdx-python-utilities==3.9.5
hdx-python-utilities==3.9.6
# via
# hdx-python-api (pyproject.toml)
# hdx-python-country
Expand Down Expand Up @@ -161,7 +161,7 @@ pathspec==0.12.1
# via mkdocs
petl==1.7.17
# via frictionless
platformdirs==4.5.0
platformdirs==4.5.1
# via
# mkdocs-get-deps
# virtualenv
Expand All @@ -175,15 +175,15 @@ ply==3.11
# libhxl
pockets==0.9.1
# via sphinxcontrib-napoleon
pre-commit==4.4.0
pre-commit==4.5.1
# via hdx-python-api (pyproject.toml)
pyasn1==0.6.1
# via
# pyasn1-modules
# rsa
pyasn1-modules==0.4.2
# via google-auth
pydantic==2.12.4
pydantic==2.12.5
# via frictionless
pydantic-core==2.41.5
# via pydantic
Expand All @@ -192,16 +192,16 @@ pygments==2.19.2
# mkdocs-material
# pytest
# rich
pymdown-extensions==10.17.1
pymdown-extensions==10.19.1
# via mkdocs-material
pyphonetics==0.5.3
# via hdx-python-utilities
pytest==9.0.1
pytest==9.0.2
# via
# hdx-python-api (pyproject.toml)
# pytest-check
# pytest-cov
pytest-check==2.6.0
pytest-check==2.6.2
# via hdx-python-api (pyproject.toml)
pytest-cov==7.0.0
# via hdx-python-api (pyproject.toml)
Expand Down Expand Up @@ -253,7 +253,7 @@ rfc3986==2.0.0
# via frictionless
rich==14.2.0
# via typer
rpds-py==0.29.0
rpds-py==0.30.0
# via
# jsonschema
# referencing
Expand Down Expand Up @@ -307,7 +307,7 @@ unidecode==1.4.0
# via
# libhxl
# pyphonetics
urllib3==2.5.0
urllib3==2.6.2
# via
# libhxl
# requests
Expand Down
21 changes: 11 additions & 10 deletions src/hdx/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,15 @@ def add_update_resource(
self,
resource: Union["Resource", Dict, str],
ignore_datasetid: bool = False,
) -> None:
) -> "Resource":
"""Add new or update existing resource in dataset with new metadata

Args:
resource (Union[Resource,Dict,str]): Either resource id or resource metadata from a Resource object or a dictionary
ignore_datasetid (bool): Whether to ignore dataset id in the resource

Returns:
None
Resource: The resource that was added after matching with any existing resource
"""
resource = self._get_resource_from_obj(resource)
if "package_id" in resource:
Expand All @@ -298,14 +298,15 @@ def add_update_resource(
resource_index = ResourceMatcher.match_resource_list(self._resources, resource)
if resource_index is None:
self._resources.append(resource)
else:
updated_resource = merge_two_dictionaries(
self._resources[resource_index], resource
)
if resource.get_file_to_upload():
updated_resource.set_file_to_upload(resource.get_file_to_upload())
if resource.is_marked_data_updated():
updated_resource.mark_data_updated()
return resource
updated_resource = merge_two_dictionaries(
self._resources[resource_index], resource
)
if resource.get_file_to_upload():
updated_resource.set_file_to_upload(resource.get_file_to_upload())
if resource.is_marked_data_updated():
updated_resource.mark_data_updated()
return updated_resource

def add_update_resources(
self,
Expand Down
47 changes: 43 additions & 4 deletions src/hdx/data/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Any, Dict, List, Optional, Tuple, Union

import hdx.data.dataset
import hdx.data.resource_matcher
from hdx.api.configuration import Configuration
from hdx.api.utilities.date_helper import DateHelper
from hdx.api.utilities.size_hash import get_size_and_hash
Expand Down Expand Up @@ -460,6 +461,39 @@ def _resource_merge_hdx_update(
self._merge_hdx_update("resource", "id", files, True, **kwargs)
return status

def _get_resource_id(self, **kwargs: Any) -> Optional[str]:
"""Helper function to get resource id if available from given resource or by
comparing ot a given dataset's resources.

Args:
**kwargs: See below
dataset (Dataset): Existing dataset if available to obtain resource id

Returns:
Optional[str]: Resource id or None
"""
loadedid = self.data.get("id")
if loadedid is None:
dataset = kwargs.get("dataset")
if dataset:
dataset_id = dataset.get("id")
if dataset_id:
existing_dataset_id = self.data.get("package_id")
if not existing_dataset_id or existing_dataset_id == dataset_id:
self.data["package_id"] = dataset["id"]
dataset_resources = dataset.get_resources()
matching_index = hdx.data.resource_matcher.ResourceMatcher.match_resource_list(
dataset_resources, self
)
if matching_index:
matching_resource = dataset_resources[matching_index]
loadedid = matching_resource.get("id")
if loadedid:
self.data["id"] = loadedid
else:
loadedid = None
return loadedid

def update_in_hdx(self, **kwargs: Any) -> int:
"""Check if resource exists in HDX and if so, update it. To indicate
that the data in an external resource (given by a URL) has been
Expand All @@ -482,11 +516,13 @@ def update_in_hdx(self, **kwargs: Any) -> int:
data_updated (bool): If True, set last_modified to now. Defaults to False.
date_data_updated (datetime): Date to use for last_modified. Default to None.
force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False.
dataset (Dataset): Existing dataset if available to obtain resource id

Returns:
int: Status code
"""
self.check_both_url_filetoupload()
_ = self._get_resource_id(**kwargs)
self._check_load_existing_object("resource", "id")
return self._resource_merge_hdx_update(**kwargs)

Expand All @@ -513,15 +549,18 @@ def create_in_hdx(self, **kwargs: Any) -> int:
data_updated (bool): If True, set last_modified to now. Defaults to False.
date_data_updated (datetime): Date to use for last_modified. Default to None.
force_update (bool): Force file to be updated even if it hasn't changed. Defaults to False.
dataset (Dataset): Existing dataset if available to obtain resource id

Returns:
int: Status code
"""
self.check_both_url_filetoupload()
id = self.data.get("id")
if id and self._load_from_hdx("resource", id):
logger.warning(f"{'resource'} exists. Updating {id}")
return self._resource_merge_hdx_update(**kwargs)
loadedid = self._get_resource_id(**kwargs)
if loadedid:
if self._load_from_hdx("resource", loadedid):
logger.warning(f"{'resource'} exists. Updating {loadedid}")
return self._resource_merge_hdx_update(**kwargs)
logger.warning(f"Failed to load resource with id {loadedid}")

self.set_types()
self.correct_format(self.data)
Expand Down
14 changes: 14 additions & 0 deletions tests/hdx/data/test_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .. import MockResponse, dataset_resultdict, resource_data
from .test_resource_view import resource_view_list, resource_view_mocklist
from hdx.api.configuration import Configuration
from hdx.data.dataset import Dataset
from hdx.data.hdxobject import HDXError
from hdx.data.resource import Resource
from hdx.utilities.dateparse import parse_date
Expand Down Expand Up @@ -1068,3 +1069,16 @@ def test_get_api_url(self, configuration, read):
)
del resource["id"]
assert resource.get_api_url() is None

def test_get_resource_id(self, configuration, read):
resources = [
{"id": "abcd", "name": "test_resource", "format": "CSV"},
{"id": "efgh", "name": "test_resource2", "format": "CSV"},
{"id": "ijkl", "name": "test_resource2", "format": "XLSX"},
]
dataset = Dataset({"id": "1234", "name": "test_dataset", "format": "CSV"})
dataset.add_update_resources(resources)

resource = Resource({"name": "test_resource2", "format": "CSV"})
result = resource._get_resource_id(dataset=dataset)
assert result == "efgh"
Loading