diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml
index 6e72641382..830b147805 100644
--- a/.github/conda/meta.yaml
+++ b/.github/conda/meta.yaml
@@ -16,7 +16,7 @@ requirements:
     - pip
     - fsspec
     - filelock
-    - requests
+    - httpx
     - tqdm
     - typing-extensions
     - packaging
@@ -26,7 +26,7 @@ requirements:
     - python
     - pip
     - filelock
-    - requests
+    - httpx
     - tqdm
     - typing-extensions
     - packaging
diff --git a/.github/workflows/check-installers.yml b/.github/workflows/check-installers.yml
new file mode 100644
index 0000000000..9a3757e1bc
--- /dev/null
+++ b/.github/workflows/check-installers.yml
@@ -0,0 +1,88 @@
+name: Check CLI installers
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - "utils/installers/**"
+      - ".github/workflows/check-installers.yml"
+  pull_request:
+    paths:
+      - "utils/installers/**"
+      - ".github/workflows/check-installers.yml"
+  workflow_dispatch: {}
+
+permissions:
+  contents: read
+
+jobs:
+  linux-installer:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Run installer
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          HF_TEST_ROOT=$(mktemp -d)
+          INSTALL_DIR="$HF_TEST_ROOT/install"
+          BIN_DIR="$HF_TEST_ROOT/bin"
+
+          HF_HOME="$INSTALL_DIR" HF_CLI_BIN_DIR="$BIN_DIR" utils/installers/install.sh --no-modify-path
+
+          export PATH="$BIN_DIR:$PATH"
+
+          HF_VERSION_PATH="$HF_TEST_ROOT/hf-version.txt"
+          hf version | tee "$HF_VERSION_PATH"
+          if ! grep -Eq 'huggingface_hub version: [0-9]+(\.[0-9]+){1,2}' "$HF_VERSION_PATH"; then
+            echo "hf version output missing huggingface_hub version" >&2
+            cat "$HF_VERSION_PATH" >&2
+            exit 1
+          fi
+
+          NO_COLOR=1 hf --help
+
+          rm -rf "$HF_TEST_ROOT"
+
+  windows-installer:
+    runs-on: windows-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Run installer
+        shell: pwsh
+        run: |
+          $hfTestRoot = Join-Path $env:TEMP ([System.Guid]::NewGuid().ToString())
+          $installDir = Join-Path $hfTestRoot 'install'
+          $binDir = Join-Path $hfTestRoot 'bin'
+          New-Item -ItemType Directory -Path $installDir -Force | Out-Null
+          New-Item -ItemType Directory -Path $binDir -Force | Out-Null
+
+          $env:HF_HOME = $installDir
+          $env:HF_CLI_BIN_DIR = $binDir
+          & "$PWD/utils/installers/install.ps1" -NoModifyPath
+
+          $env:PATH = "$binDir;$env:PATH"
+
+          $hfVersionPath = Join-Path $hfTestRoot 'hf-version.txt'
+          & hf.exe version | Tee-Object -FilePath $hfVersionPath
+          if ($LASTEXITCODE -ne 0) {
+            throw 'hf version failed'
+          }
+          if (-not (Select-String -Path $hfVersionPath -Pattern 'huggingface_hub version: [0-9]+(\.[0-9]+){1,2}')) {
+            throw 'hf version output missing huggingface_hub version'
+          }
+
+          $env:NO_COLOR = '1'
+          & hf.exe --help
+          if ($LASTEXITCODE -ne 0) {
+            throw 'hf --help failed'
+          }
+          Remove-Item Env:NO_COLOR
+
+          Remove-Item -Path $hfTestRoot -Recurse -Force
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
deleted file mode 100644
index df663ce975..0000000000
--- a/.github/workflows/contrib-tests.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: Contrib tests
-
-on:
-  workflow_dispatch:
-  schedule:
-  - cron:  '0 0 * * 6' # Run once a week, Saturday midnight
-  push:
-    branches:
-      - ci_contrib_*
-  pull_request:
-    types: [assigned, opened, synchronize, reopened]
-    paths:
-      - contrib/**
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        contrib: [
-          "sentence_transformers",
-          "spacy",
-          "timm",
-        ]
-
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-
-      # Install pip
-      - name: Install pip
-        run: pip install --upgrade pip
-
-      # Install downstream library and its specific dependencies
-      - name: Install ${{ matrix.contrib }}
-        run: pip install -r contrib/${{ matrix.contrib }}/requirements.txt
-
-      # Install huggingface_hub from source code + testing extras
-      - name: Install `huggingface_hub`
-        run: |
-          pip uninstall -y huggingface_hub
-          pip install .[testing]
-
-      # Run tests
-      - name: Run tests
-        run: pytest contrib/${{ matrix.contrib }}
diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml
index d9594a5435..a29c577e75 100644
--- a/.github/workflows/python-quality.yml
+++ b/.github/workflows/python-quality.yml
@@ -35,9 +35,8 @@ jobs:
 
       - name: Install dependencies
         run: uv pip install "huggingface_hub[dev] @ ."
-      - run: .venv/bin/ruff check tests src contrib # linter
-      - run: .venv/bin/ruff format --check tests src contrib # formatter
-      - run: .venv/bin/python utils/check_contrib_list.py
+      - run: .venv/bin/ruff check tests src # linter
+      - run: .venv/bin/ruff format --check tests src # formatter
       - run: .venv/bin/python utils/check_inference_input_params.py
       - run: .venv/bin/python utils/check_static_imports.py
       - run: .venv/bin/python utils/check_all_variable.py
@@ -50,4 +49,4 @@ jobs:
       - run: .venv/bin/mypy src/huggingface_hub/__init__.py --follow-imports=silent --show-traceback
 
       # Run mypy on full package
-      - run: .venv/bin/mypy src
\ No newline at end of file
+      - run: .venv/bin/mypy src
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 11bfcc806f..f30945a461 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -21,27 +21,17 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.13"]
-        test_name:
-          [
-            "Repository only",
-            "Everything else",
-            "Inference only",
-            "Xet only"
-          ]
+        python-version: ["3.9", "3.13"]
+        test_name: ["Everything else", "Inference only", "Xet only"]
         include:
-          - python-version: "3.13" # LFS not ran on 3.8
+          - python-version: "3.13" # LFS not ran on 3.9
             test_name: "lfs"
-          - python-version: "3.8"
+          - python-version: "3.9"
             test_name: "fastai"
           - python-version: "3.10" # fastai not supported on 3.12 and 3.11 -> test it on 3.10
             test_name: "fastai"
-          - python-version: "3.8"
-            test_name: "tensorflow"
-          - python-version: "3.10" # tensorflow not supported on 3.12 -> test it on 3.10
-            test_name: "tensorflow"
-          - python-version: "3.8" # test torch~=1.11 on python 3.8 only.
-            test_name: "Python 3.8, torch_1.11"
+          - python-version: "3.9" # test torch~=1.11 on python 3.9 only.
+            test_name: "Python 3.9, torch_1.11"
           - python-version: "3.12" # test torch latest on python 3.12 only.
             test_name: "torch_latest"
     steps:
@@ -65,7 +55,7 @@ jobs:
 
           case "${{ matrix.test_name }}" in
 
-            "Repository only" | "Everything else" | "Inference only")
+            "Everything else" | "Inference only")
               sudo apt update
               sudo apt install -y libsndfile1-dev
               ;;
@@ -84,17 +74,11 @@ jobs:
               uv pip install --upgrade torch
               ;;
 
-            "Python 3.8, torch_1.11")
+            "Python 3.9, torch_1.11")
               uv pip install "huggingface_hub[torch] @ ."
               uv pip install torch~=1.11
               ;;
 
-            tensorflow)
-              sudo apt update
-              sudo apt install -y graphviz
-              uv pip install "huggingface_hub[tensorflow-testing] @ ."
-              ;;
-
           esac
 
           # If not "Xet only", we want to test upload/download with regular LFS workflow
@@ -112,13 +96,6 @@ jobs:
 
           case "${{ matrix.test_name }}" in
 
-            "Repository only")
-              # Run repo tests concurrently
-              PYTEST="$PYTEST ../tests -k 'TestRepository' -n 4"
-              echo $PYTEST
-              eval $PYTEST
-            ;;
-
             "Inference only")
               # Run inference tests concurrently
               PYTEST="$PYTEST ../tests -k 'test_inference' -n 4"
@@ -140,14 +117,7 @@ jobs:
               eval "$PYTEST ../tests/test_fastai*"
             ;;
 
-            tensorflow)
-              # Cannot be on same line since '_tf*' checks if tensorflow is NOT imported by default
-              eval "$PYTEST ../tests/test_tf*"
-              eval "$PYTEST ../tests/test_keras*"
-              eval "$PYTEST ../tests/test_serialization.py"
-            ;;
-
-            "Python 3.8, torch_1.11" | torch_latest)
+            "Python 3.9, torch_1.11" | torch_latest)
             eval "$PYTEST ../tests/test_hub_mixin*"
             eval "$PYTEST ../tests/test_serialization.py"
             ;;
@@ -178,7 +148,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.9", "3.11"]
         test_name: ["Everything else", "Xet only"]
 
     steps:
diff --git a/.github/workflows/release-conda.yml b/.github/workflows/release-conda.yml
index 135d988809..b6ead02950 100644
--- a/.github/workflows/release-conda.yml
+++ b/.github/workflows/release-conda.yml
@@ -26,7 +26,7 @@ jobs:
         with:
           auto-update-conda: true
           auto-activate-base: false
-          python-version: 3.8
+          python-version: 3.9
           activate-environment: "build-hub"
 
       - name: Setup conda env
diff --git a/Makefile b/Makefile
index faa2a63500..35128faf2b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,14 +1,13 @@
-.PHONY: contrib quality style test
+.PHONY: quality style test
 
 
-check_dirs := contrib src tests utils setup.py
+check_dirs := src tests utils setup.py
 
 
 quality:
 	ruff check $(check_dirs)  # linter
 	ruff format --check $(check_dirs) # formatter
 	python utils/check_inference_input_params.py
-	python utils/check_contrib_list.py
 	python utils/check_static_imports.py
 	python utils/check_all_variable.py
 	python utils/generate_async_inference_client.py
@@ -18,7 +17,6 @@ quality:
 style:
 	ruff format $(check_dirs) # formatter
 	ruff check --fix $(check_dirs) # linter
-	python utils/check_contrib_list.py --update
 	python utils/check_static_imports.py --update
 	python utils/check_all_variable.py --update
 	python utils/generate_async_inference_client.py --update
@@ -38,42 +36,3 @@ repocard:
 
 test:
 	pytest ./tests/
-
-# Taken from https://stackoverflow.com/a/12110773
-# Commands:
-#	make contrib_setup_timm : setup tests for timm
-#	make contrib_test_timm  : run tests for timm
-#	make contrib_timm       : setup and run tests for timm
-#	make contrib_clear_timm : delete timm virtual env
-#
-#	make contrib_setup      : setup ALL tests
-#	make contrib_test       : run ALL tests
-#	make contrib            : setup and run ALL tests
-#	make contrib_clear      : delete all virtual envs
-# Use -j4 flag to run jobs in parallel.
-CONTRIB_LIBS := sentence_transformers spacy timm
-CONTRIB_JOBS := $(addprefix contrib_,${CONTRIB_LIBS})
-CONTRIB_CLEAR_JOBS := $(addprefix contrib_clear_,${CONTRIB_LIBS})
-CONTRIB_SETUP_JOBS := $(addprefix contrib_setup_,${CONTRIB_LIBS})
-CONTRIB_TEST_JOBS := $(addprefix contrib_test_,${CONTRIB_LIBS})
-
-contrib_clear_%:
-	rm -rf contrib/$*/.venv
-
-contrib_setup_%:
-	python3 -m venv contrib/$*/.venv
-	./contrib/$*/.venv/bin/pip install -r contrib/$*/requirements.txt
-	./contrib/$*/.venv/bin/pip uninstall -y huggingface_hub
-	./contrib/$*/.venv/bin/pip install -e .[testing]
-
-contrib_test_%:
-	./contrib/$*/.venv/bin/python -m pytest contrib/$*
-
-contrib_%:
-	make contrib_setup_$*
-	make contrib_test_$*
-
-contrib: ${CONTRIB_JOBS};
-contrib_clear: ${CONTRIB_CLEAR_JOBS}; echo "Successful contrib tests."
-contrib_setup: ${CONTRIB_SETUP_JOBS}; echo "Successful contrib setup."
-contrib_test: ${CONTRIB_TEST_JOBS}; echo "Successful contrib tests."
diff --git a/contrib/README.md b/contrib/README.md
deleted file mode 100644
index 05db2d705b..0000000000
--- a/contrib/README.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Contrib test suite
-
-The contrib folder contains simple end-to-end scripts to test integration of `huggingface_hub` in downstream libraries. The main goal is to proactively notice breaking changes and deprecation warnings.
-
-## Add tests for a new library
-
-To add another contrib lib, one must:
-1. Create a subfolder with the lib name. Example: `./contrib/transformers`
-2. Create a `requirements.txt` file specific to this lib. Example `./contrib/transformers/requirements.txt`
-3. Implements tests for this lib. Example: `./contrib/transformers/test_push_to_hub.py`
-4. Run `make style`. This will edit both `makefile` and `.github/workflows/contrib-tests.yml` to add the lib to list of libs to test. Make sure changes are accurate before committing.
-
-## Run contrib tests on CI
-
-Contrib tests can be [manually triggered in GitHub](https://github.com/huggingface/huggingface_hub/actions) with the `Contrib tests` workflow.
-
-Tests are not run in the default test suite (for each PR) as this would slow down development process. The goal is to notice breaking changes, not to avoid them. In particular, it is interesting to trigger it before a release to make sure it will not cause too much friction.
-
-## Run contrib tests locally
-
-Tests must be ran individually for each dependent library. Here is an example to run
-`timm` tests. Tests are separated to avoid conflicts between version dependencies.
-
-### Run all contrib tests
-
-Before running tests, a virtual env must be setup for each contrib library. To do so, run:
-
-```sh
-# Run setup in parallel to save time
-make contrib_setup -j4
-```
-
-Then tests can be run
-
-```sh
-# Optional: -j4 to run in parallel. Output will be messy in that case.
-make contrib_test -j4
-```
-
-Optionally, it is possible to setup and run all tests in a single command. However this
-take more time as you don't need to setup the venv each time you run tests.
-
-```sh
-make contrib -j4
-```
-
-Finally, it is possible to delete all virtual envs to get a fresh start for contrib tests.
-After running this command, `contrib_setup` will have to re-download/re-install all dependencies.
-
-```
-make contrib_clear
-```
-
-### Run contrib tests for a single lib
-
-Instead of running tests for all contrib libraries, you can run a specific lib:
-
-```sh
-# Setup timm tests
-make contrib_setup_timm
-
-# Run timm tests
-make contrib_test_timm
-
-# (or) Setup and run timm tests at once
-make contrib_timm
-
-# Delete timm virtualenv if corrupted
-make contrib_clear_timm
-```
diff --git a/contrib/__init__.py b/contrib/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/contrib/conftest.py b/contrib/conftest.py
deleted file mode 100644
index 285139fd69..0000000000
--- a/contrib/conftest.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import os
-import time
-import uuid
-from typing import Generator
-
-import pytest
-
-from huggingface_hub import delete_repo
-
-
-@pytest.fixture(scope="session")
-def token() -> str:
-    # Not critical, only usable on the sandboxed CI instance.
-    return "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL"
-
-
-@pytest.fixture(scope="session")
-def user() -> str:
-    return "__DUMMY_TRANSFORMERS_USER__"
-
-
-@pytest.fixture(autouse=True, scope="session")
-def login_as_dummy_user(token: str) -> Generator:
-    """Log in with dummy user token."""
-    # Cannot use `monkeypatch` fixture since we want it to be "session-scoped"
-    old_token = os.environ["HF_TOKEN"]
-    os.environ["HF_TOKEN"] = token
-    yield
-    os.environ["HF_TOKEN"] = old_token
-
-
-@pytest.fixture
-def repo_name(request) -> None:
-    """
-    Return a readable pseudo-unique repository name for tests.
-
-    Example: "repo-2fe93f-16599646671840"
-    """
-    prefix = request.module.__name__  # example: `test_timm`
-    id = uuid.uuid4().hex[:6]
-    ts = int(time.time() * 10e3)
-    return f"repo-{prefix}-{id}-{ts}"
-
-
-@pytest.fixture
-def cleanup_repo(user: str, repo_name: str) -> None:
-    """Delete the repo at the end of the tests.
-
-    TODO: Adapt to handle `repo_type` as well
-    """
-    yield  # run test
-    delete_repo(repo_id=f"{user}/{repo_name}")
diff --git a/contrib/sentence_transformers/__init__.py b/contrib/sentence_transformers/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/contrib/sentence_transformers/requirements.txt b/contrib/sentence_transformers/requirements.txt
deleted file mode 100644
index c8c5244b95..0000000000
--- a/contrib/sentence_transformers/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-git+https://github.com/UKPLab/sentence-transformers.git#egg=sentence-transformers
diff --git a/contrib/sentence_transformers/test_sentence_transformers.py b/contrib/sentence_transformers/test_sentence_transformers.py
deleted file mode 100644
index d1ceeb43dc..0000000000
--- a/contrib/sentence_transformers/test_sentence_transformers.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import time
-
-import pytest
-from sentence_transformers import SentenceTransformer, util
-
-from huggingface_hub import model_info
-
-from ..utils import production_endpoint
-
-
-@pytest.fixture(scope="module")
-def multi_qa_model() -> SentenceTransformer:
-    with production_endpoint():
-        return SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
-
-
-def test_from_pretrained(multi_qa_model: SentenceTransformer) -> None:
-    # Example taken from https://www.sbert.net/docs/hugging_face.html#using-hugging-face-models.
-    query_embedding = multi_qa_model.encode("How big is London")
-    passage_embedding = multi_qa_model.encode(
-        [
-            "London has 9,787,426 inhabitants at the 2011 census",
-            "London is known for its financial district",
-        ]
-    )
-    print("Similarity:", util.dot_score(query_embedding, passage_embedding))
-
-
-def test_push_to_hub(multi_qa_model: SentenceTransformer, repo_name: str, user: str, cleanup_repo: None) -> None:
-    multi_qa_model.save_to_hub(repo_name, organization=user)
-
-    # Sleep to ensure that model_info isn't called too soon
-    time.sleep(1)
-
-    # Check model has been pushed properly
-    model_id = f"{user}/{repo_name}"
-    assert model_info(model_id).library_name == "sentence-transformers"
diff --git a/contrib/spacy/__init__.py b/contrib/spacy/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/contrib/spacy/requirements.txt b/contrib/spacy/requirements.txt
deleted file mode 100644
index 6255342454..0000000000
--- a/contrib/spacy/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-git+https://github.com/explosion/spacy-huggingface-hub.git#egg=spacy-huggingface-hub
diff --git a/contrib/spacy/test_spacy.py b/contrib/spacy/test_spacy.py
deleted file mode 100644
index 00d4c9b671..0000000000
--- a/contrib/spacy/test_spacy.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import time
-
-from spacy_huggingface_hub import push
-
-from huggingface_hub import delete_repo, hf_hub_download, model_info
-from huggingface_hub.errors import HfHubHTTPError
-
-from ..utils import production_endpoint
-
-
-def test_push_to_hub(user: str) -> None:
-    """Test equivalent of `python -m spacy huggingface-hub push`.
-
-    (0. Delete existing repo on the Hub (if any))
-    1. Download an example file from production
-    2. Push the model!
-    3. Check model pushed the Hub + as spacy library
-    (4. Cleanup)
-    """
-    model_id = f"{user}/en_core_web_sm"
-    _delete_repo(model_id)
-
-    # Download example file from HF Hub (see https://huggingface.co/spacy/en_core_web_sm)
-    with production_endpoint():
-        whl_path = hf_hub_download(
-            repo_id="spacy/en_core_web_sm",
-            filename="en_core_web_sm-any-py3-none-any.whl",
-        )
-
-    # Push spacy model to Hub
-    push(whl_path)
-
-    # Sleep to ensure that model_info isn't called too soon
-    time.sleep(1)
-
-    # Check model has been pushed properly
-    model_id = f"{user}/en_core_web_sm"
-    assert model_info(model_id).library_name == "spacy"
-
-    # Cleanup
-    _delete_repo(model_id)
-
-
-def _delete_repo(model_id: str) -> None:
-    try:
-        delete_repo(model_id)
-    except HfHubHTTPError:
-        pass
diff --git a/contrib/timm/__init__.py b/contrib/timm/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/contrib/timm/requirements.txt b/contrib/timm/requirements.txt
deleted file mode 100644
index 33944e7373..0000000000
--- a/contrib/timm/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-# Timm
-git+https://github.com/rwightman/pytorch-image-models.git#egg=timm
-safetensors
diff --git a/contrib/timm/test_timm.py b/contrib/timm/test_timm.py
deleted file mode 100644
index f57788f8c4..0000000000
--- a/contrib/timm/test_timm.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import timm
-
-from ..utils import production_endpoint
-
-
-MODEL_ID = "timm/mobilenetv3_large_100.ra_in1k"
-
-
-@production_endpoint()
-def test_load_from_hub() -> None:
-    # Test load only config
-    _ = timm.models.load_model_config_from_hf(MODEL_ID)
-
-    # Load entire model from Hub
-    _ = timm.create_model("hf_hub:" + MODEL_ID, pretrained=True)
-
-
-def test_push_to_hub(repo_name: str, cleanup_repo: None) -> None:
-    model = timm.create_model("mobilenetv3_rw")
-    timm.models.push_to_hf_hub(model, repo_name)
diff --git a/contrib/utils.py b/contrib/utils.py
deleted file mode 100644
index e1681cd561..0000000000
--- a/contrib/utils.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import contextlib
-from typing import Generator
-from unittest.mock import patch
-
-
-@contextlib.contextmanager
-def production_endpoint() -> Generator:
-    """Patch huggingface_hub to connect to production server in a context manager.
-
-    Ugly way to patch all constants at once.
-    TODO: refactor when https://github.com/huggingface/huggingface_hub/issues/1172 is fixed.
-
-    Example:
-    ```py
-    def test_push_to_hub():
-        # Pull from production Hub
-        with production_endpoint():
-            model = ...from_pretrained("modelname")
-
-        # Push to staging Hub
-        model.push_to_hub()
-    ```
-    """
-    PROD_ENDPOINT = "https://huggingface.co"
-    ENDPOINT_TARGETS = [
-        "huggingface_hub.constants",
-        "huggingface_hub._commit_api",
-        "huggingface_hub.hf_api",
-        "huggingface_hub.lfs",
-        "huggingface_hub.commands.user",
-        "huggingface_hub.utils._git_credential",
-    ]
-
-    PROD_URL_TEMPLATE = PROD_ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
-    URL_TEMPLATE_TARGETS = [
-        "huggingface_hub.constants",
-        "huggingface_hub.file_download",
-    ]
-
-    from huggingface_hub.hf_api import api
-
-    patchers = (
-        [patch(target + ".ENDPOINT", PROD_ENDPOINT) for target in ENDPOINT_TARGETS]
-        + [patch(target + ".HUGGINGFACE_CO_URL_TEMPLATE", PROD_URL_TEMPLATE) for target in URL_TEMPLATE_TARGETS]
-        + [patch.object(api, "endpoint", PROD_URL_TEMPLATE)]
-    )
-
-    # Start all patches
-    for patcher in patchers:
-        patcher.start()
-
-    yield
-
-    # Stop all patches
-    for patcher in patchers:
-        patcher.stop()
diff --git a/docs/source/cn/_toctree.yml b/docs/source/cn/_toctree.yml
index b4949efa35..db6d3244a9 100644
--- a/docs/source/cn/_toctree.yml
+++ b/docs/source/cn/_toctree.yml
@@ -20,7 +20,4 @@
       title: 概览
     - local: guides/hf_file_system
       title: Hugging Face 文件系统
-- title: "concepts"
-  sections:
-    - local: concepts/git_vs_http
-      title: Git vs HTTP 范式
+
diff --git a/docs/source/cn/concepts/git_vs_http.md b/docs/source/cn/concepts/git_vs_http.md
deleted file mode 100644
index b582b5f991..0000000000
--- a/docs/source/cn/concepts/git_vs_http.md
+++ /dev/null
@@ -1,40 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# Git 与 HTTP 范式
-
-`huggingface_hub`库是用于与Hugging Face Hub进行交互的库，Hugging Face Hub是一组基于Git的存储库（模型、数据集或Spaces）。使用 `huggingface_hub`有两种主要方式来访问Hub。
-
-第一种方法，即所谓的“基于git”的方法，由[`Repository`]类驱动。这种方法使用了一个包装器，它在 `git`命令的基础上增加了专门与Hub交互的额外函数。第二种选择，称为“基于HTTP”的方法，涉及使用[`HfApi`]客户端进行HTTP请求。让我们来看一看每种方法的优缺点。
-
-## 存储库：基于历史的 Git 方法
-
-最初，`huggingface_hub`主要围绕 [`Repository`] 类构建。它为常见的 `git` 命令（如 `"git add"`、`"git commit"`、`"git push"`、`"git tag"`、`"git checkout"` 等）提供了 Python 包装器
-
-该库还可以帮助设置凭据和跟踪大型文件，这些文件通常在机器学习存储库中使用。此外，该库允许您在后台执行其方法，使其在训练期间上传数据很有用。
-
-使用 [`Repository`] 的最大优点是它允许你在本地机器上维护整个存储库的本地副本。这也可能是一个缺点，因为它需要你不断更新和维护这个本地副本。这类似于传统软件开发中，每个开发人员都维护自己的本地副本，并在开发功能时推送更改。但是，在机器学习的上下文中，这可能并不总是必要的，因为用户可能只需要下载推理所需的权重，或将权重从一种格式转换为另一种格式，而无需克隆整个存储库。
-
-## HfApi: 一个功能强大且方便的HTTP客户端
-
-`HfApi` 被开发为本地 git 存储库的替代方案，因为本地 git 存储库在处理大型模型或数据集时可能会很麻烦。`HfApi` 提供与基于 git 的方法相同的功能，例如下载和推送文件以及创建分支和标签，但无需本地文件夹来保持同步。
-
-`HfApi`除了提供 `git` 已经提供的功能外，还提供其他功能，例如：
-
-* 管理存储库
-* 使用缓存下载文件以进行有效的重复使用
-* 在 Hub 中搜索存储库和元数据
-* 访问社区功能，如讨论、PR和评论
-* 配置Spaces
-
-## 我应该使用什么？以及何时使用？
-
-总的来说，在大多数情况下，`HTTP 方法`是使用 huggingface_hub 的推荐方法。但是，在以下几种情况下，维护本地 git 克隆（使用 `Repository`）可能更有益：
-
-如果您在本地机器上训练模型，使用传统的 git 工作流程并定期推送更新可能更有效。`Repository` 被优化为此类情况，因为它能够在后台运行。
-如果您需要手动编辑大型文件，`git `是最佳选择，因为它只会将文件的差异发送到服务器。使用 `HfAPI` 客户端，每次编辑都会上传整个文件。请记住，大多数大型文件是二进制文件，因此无法从 git 差异中受益。
-
-并非所有 git 命令都通过 [`HfApi`] 提供。有些可能永远不会被实现，但我们一直在努力改进并缩小差距。如果您没有看到您的用例被覆盖。
-
-请在[Github](https://github.com/huggingface/huggingface_hub)打开一个 issue！我们欢迎反馈，以帮助我们与我们的用户一起构建 🤗 生态系统。
diff --git a/docs/source/cn/guides/repository.md b/docs/source/cn/guides/repository.md
index ac64acd90b..fd827fa71a 100644
--- a/docs/source/cn/guides/repository.md
+++ b/docs/source/cn/guides/repository.md
@@ -153,91 +153,3 @@ GitRefs(
 >>> from huggingface_hub import move_repo
 >>> move_repo(from_id="Wauplin/cool-model", to_id="huggingface/cool-model")
 ```
-
-## 管理存储库的本地副本
-
-上述所有操作都可以通过HTTP请求完成。然而，在某些情况下，您可能希望在本地拥有存储库的副本，并使用您熟悉的Git命令与之交互。
-
-[`Repository`] 类允许您使用类似于Git命令的函数与Hub上的文件和存储库进行交互。它是对Git和Git-LFS方法的包装，以使用您已经了解和喜爱的Git命令。在开始之前，请确保已安装Git-LFS（请参阅[此处](https://git-lfs.github.com/)获取安装说明）。
-
-### 使用本地存储库
-
-使用本地存储库路径实例化一个 [`Repository`] 对象：
-
-请运行以下代码：
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="<path>/<to>/<folder>")
-```
-
-### 克隆
-
-`clone_from`参数将一个存储库从Hugging Face存储库ID克隆到由 `local_dir`参数指定的本地目录：
-
-请运行以下代码：
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
-```
-`clone_from`还可以使用URL克隆存储库：
-
-请运行以下代码：
-
-```py
->>> repo = Repository(local_dir="huggingface-hub", clone_from="https://huggingface.co/facebook/wav2vec2-large-960h-lv60")
-```
-
-你可以将`clone_from`参数与[`create_repo`]结合使用，以创建并克隆一个存储库：
-
-请运行以下代码：
-
-```py
->>> repo_url = create_repo(repo_id="repo_name")
->>> repo = Repository(local_dir="repo_local_path", clone_from=repo_url)
-```
-
-当你克隆一个存储库时，通过在克隆时指定`git_user`和`git_email`参数，你还可以为克隆的存储库配置Git用户名和电子邮件。当用户提交到该存储库时，Git将知道提交的作者是谁。
-
-请运行以下代码：
-
-```py
->>> repo = Repository(
-...   "my-dataset",
-...   clone_from="<user>/<dataset_id>",
-...   token=True,
-...   repo_type="dataset",
-...   git_user="MyName",
-...   git_email="me@cool.mail"
-... )
-```
-
-### 分支
-
-分支对于协作和实验而不影响当前文件和代码非常重要。使用[`~Repository.git_checkout`]来在不同的分支之间切换。例如，如果你想从 `branch1`切换到 `branch2`：
-
-请运行以下代码：
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="huggingface-hub", clone_from="<user>/<dataset_id>", revision='branch1')
->>> repo.git_checkout("branch2")
-```
-
-### 拉取
-
-[`~Repository.git_pull`] 允许你使用远程存储库的更改更新当前本地分支：
-
-请运行以下代码：
-
-```py
->>> from huggingface_hub import Repository
->>> repo.git_pull()
-```
-
-如果你希望本地的提交发生在你的分支被远程的新提交更新之后，请设置`rebase=True`：
-
-```py
->>> repo.git_pull(rebase=True)
-```
diff --git a/docs/source/cn/installation.md b/docs/source/cn/installation.md
index c800b4b173..516d8b9f70 100644
--- a/docs/source/cn/installation.md
+++ b/docs/source/cn/installation.md
@@ -6,7 +6,7 @@ rendered properly in your Markdown viewer.
 
 在开始之前，您需要通过安装适当的软件包来设置您的环境
 
-huggingface_hub 在 Python 3.8 或更高版本上进行了测试，可以保证在这些版本上正常运行。如果您使用的是 Python 3.7 或更低版本，可能会出现兼容性问题
+huggingface_hub 在 Python 3.9 或更高版本上进行了测试，可以保证在这些版本上正常运行。如果您使用的是 Python 3.7 或更低版本，可能会出现兼容性问题
 
 ## 使用 pip 安装
 
@@ -48,11 +48,7 @@ pip install --upgrade huggingface_hub
 您可以通过`pip`安装可选依赖项,请运行以下代码：
 
 ```bash
-# 安装 TensorFlow 特定功能的依赖项
-# /!\ 注意：这不等同于 `pip install tensorflow`
-pip install 'huggingface_hub[tensorflow]'
-
-# 安装 TensorFlow 特定功能和 CLI 特定功能的依赖项
+# 安装 Torch 特定功能和 CLI 特定功能的依赖项
 pip install 'huggingface_hub[cli,torch]'
 ```
 
@@ -60,7 +56,7 @@ pip install 'huggingface_hub[cli,torch]'
 
 - `cli`：为 `huggingface_hub` 提供更方便的命令行界面
 
-- `fastai`,` torch`, `tensorflow`: 运行框架特定功能所需的依赖项
+- `fastai`,` torch`: 运行框架特定功能所需的依赖项
 
 - `dev`：用于为库做贡献的依赖项。包括 `testing`（用于运行测试）、`typing`（用于运行类型检查器）和 `quality`（用于运行 linter）
 
diff --git a/docs/source/de/_toctree.yml b/docs/source/de/_toctree.yml
index 48807ba0d8..2b994c7cc6 100644
--- a/docs/source/de/_toctree.yml
+++ b/docs/source/de/_toctree.yml
@@ -34,7 +34,3 @@
       title: Integrieren einer Bibliothek
     - local: guides/webhooks_server
       title: Webhooks server
-- title: "Konzeptionelle Anleitungen"
-  sections:
-    - local: concepts/git_vs_http
-      title: Git vs. HTTP-Paradigma
diff --git a/docs/source/de/concepts/git_vs_http.md b/docs/source/de/concepts/git_vs_http.md
deleted file mode 100644
index 978123762a..0000000000
--- a/docs/source/de/concepts/git_vs_http.md
+++ /dev/null
@@ -1,69 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# Git vs. HTTP-Paradigma
-
-Die `huggingface_hub`-Bibliothek ist eine Bibliothek zur Interaktion mit dem Hugging Face
-Hub, einer Sammlung von auf Git basierenden Repositories (Modelle, Datensätze oder
-Spaces). Es gibt zwei Hauptmethoden, um auf den Hub mit `huggingface_hub` zuzugreifen.
-
-Der erste Ansatz, der sogenannte "Git-basierte" Ansatz, wird von der [`Repository`] Klasse
-geleitet. Diese Methode verwendet einen Wrapper um den `git`-Befehl mit zusätzlichen
-Funktionen, die speziell für die Interaktion mit dem Hub entwickelt wurden. Die zweite
-Option, die als "HTTP-basierter" Ansatz bezeichnet wird, umfasst das Senden von
-HTTP-Anfragen mit dem [`HfApi`] Client. Schauen wir uns die Vor- und Nachteile jeder
-Methode an.
-
-## Repository: Der historische git-basierte Ansatz
-
-Ursprünglich wurde `huggingface_hub` größtenteils um die [`Repository`] Klasse herum
-entwickelt. Sie bietet Python-Wrapper für gängige git-Befehle wie `"git add"`, `"git commit"`,
-`"git push"`, `"git tag"`, `"git checkout"` usw.
-
-Die Bibliothek hilft auch beim Festlegen von Zugangsdaten und beim Tracking von großen
-Dateien, die in Machine-Learning-Repositories häufig verwendet werden. Darüber hinaus
-ermöglicht die Bibliothek das Ausführen ihrer Methoden im Hintergrund, was nützlich ist,
-um Daten während des Trainings hochzuladen.
-
-Der Hauptvorteil bei der Verwendung einer [`Repository`] besteht darin, dass Sie eine
-lokale Kopie des gesamten Repositorys auf Ihrem Computer pflegen können. Dies kann jedoch
-auch ein Nachteil sein, da es erfordert, diese lokale Kopie ständig zu aktualisieren und
-zu pflegen. Dies ähnelt der traditionellen Softwareentwicklung, bei der jeder Entwickler
-eine eigene lokale Kopie pflegt und Änderungen überträgt, wenn an einer Funktion
-gearbeitet wird. Im Kontext des Machine Learning ist dies jedoch nicht immer erforderlich,
-da Benutzer möglicherweise nur Gewichte für die Inferenz herunterladen oder Gewichte von
-einem Format in ein anderes konvertieren müssen, ohne das gesamte Repository zu klonen.
-
-## HfApi: Ein flexibler und praktischer HTTP-Client
-
-Die [`HfApi`] Klasse wurde entwickelt, um eine Alternative zu lokalen Git-Repositories
-bereitzustellen, die besonders bei der Arbeit mit großen Modellen oder Datensätzen
-umständlich zu pflegen sein können. Die [`HfApi`] Klasse bietet die gleiche Funktionalität
-wie git-basierte Ansätze, wie das Herunterladen und Hochladen von Dateien sowie das
-Erstellen von Branches und Tags, jedoch ohne die Notwendigkeit eines lokalen Ordners, der
-synchronisiert werden muss.
-
-Zusätzlich zu den bereits von `git` bereitgestellten Funktionen bietet die [`HfApi`]
-Klasse zusätzliche Features wie die Möglichkeit, Repositories zu verwalten, Dateien mit
-Caching für effiziente Wiederverwendung herunterzuladen, im Hub nach Repositories und
-Metadaten zu suchen, auf Community-Funktionen wie Diskussionen, Pull Requests und
-Kommentare zuzugreifen und Spaces-Hardware und Geheimnisse zu konfigurieren.
-
-## Was sollte ich verwenden ? Und wann ?
-
-Insgesamt ist der **HTTP-basierte Ansatz in den meisten Fällen die empfohlene Methode zur Verwendung von**
-`huggingface_hub`. Es gibt jedoch einige Situationen, in denen es vorteilhaft sein kann,
-eine lokale Git-Kopie (mit [`Repository`]) zu pflegen:
-- Wenn Sie ein Modell auf Ihrem Computer trainieren, kann es effizienter sein, einen
-herkömmlichen git-basierten Workflow zu verwenden und regelmäßige Updates zu pushen.
-[`Repository`] ist für diese Art von Situation mit seiner Fähigkeit zur Hintergrundarbeit optimiert.
-- Wenn Sie große Dateien manuell bearbeiten müssen, ist `git` die beste Option, da es nur
-die Differenz an den Server sendet. Mit dem [`HfAPI`] Client wird die gesamte Datei bei
-jeder Bearbeitung hochgeladen. Beachten Sie jedoch, dass die meisten großen Dateien binär
-sind und daher sowieso nicht von Git-Diffs profitieren.
-
-Nicht alle Git-Befehle sind über [`HfApi`] verfügbar. Einige werden vielleicht nie
-implementiert, aber wir bemühen uns ständig, die Lücken zu schließen und zu verbessern.
-Wenn Sie Ihren Anwendungsfall nicht abgedeckt sehen, öffnen Sie bitte [ein Issue auf
-Github](https://github.com/huggingface/huggingface_hub)! Wir freuen uns über Feedback, um das 🤗-Ökosystem mit und für unsere Benutzer aufzubauen.
diff --git a/docs/source/de/guides/inference.md b/docs/source/de/guides/inference.md
index 0fbc1ac378..dbb0ea19da 100644
--- a/docs/source/de/guides/inference.md
+++ b/docs/source/de/guides/inference.md
@@ -8,7 +8,6 @@ Inferenz ist der Prozess, bei dem ein trainiertes Modell verwendet wird, um Vorh
 - [Inferenz API](https://huggingface.co/docs/api-inference/index): ein Service, der Ihnen ermöglicht, beschleunigte Inferenz auf der Infrastruktur von Hugging Face kostenlos auszuführen. Dieser Service ist eine schnelle Möglichkeit, um anzufangen, verschiedene Modelle zu testen und AI-Produkte zu prototypisieren.
 - [Inferenz Endpunkte](https://huggingface.co/inference-endpoints/index): ein Produkt zur einfachen Bereitstellung von Modellen im Produktivbetrieb. Die Inferenz wird von Hugging Face in einer dedizierten, vollständig verwalteten Infrastruktur auf einem Cloud-Anbieter Ihrer Wahl durchgeführt.
 
-Diese Dienste können mit dem [`InferenceClient`] Objekt aufgerufen werden. Dieser fungiert als Ersatz für den älteren [`InferenceApi`] Client und fügt spezielle Unterstützung für Aufgaben und das Ausführen von Inferenz hinzu, sowohl auf [Inferenz API](https://huggingface.co/docs/api-inference/index) als auch auf [Inferenz Endpunkten](https://huggingface.co/docs/inference-endpoints/index). Im Abschnitt [Legacy InferenceAPI client](#legacy-inferenceapi-client) erfahren Sie, wie Sie zum neuen Client migrieren können.
 
 > [!TIP]
 > [`InferenceClient`] ist ein Python-Client, der HTTP-Anfragen an unsere APIs stellt. Wenn Sie die HTTP-Anfragen direkt mit Ihrem bevorzugten Tool (curl, postman,...) durchführen möchten, lesen Sie bitte die Dokumentationsseiten der [Inferenz API](https://huggingface.co/docs/api-inference/index) oder der [Inferenz Endpunkte](https://huggingface.co/docs/inference-endpoints/index).
@@ -77,34 +76,34 @@ Aufrufe, die mit dem [`InferenceClient`] gemacht werden, können mit einem [User
 
 Das Ziel von [`InferenceClient`] ist es, die einfachste Schnittstelle zum Ausführen von Inferenzen auf Hugging Face-Modellen bereitzustellen. Es verfügt über eine einfache API, die die gebräuchlichsten Aufgaben unterstützt. Hier ist eine Liste der derzeit unterstützten Aufgaben:
 
-| Domäne | Aufgabe                           | Unterstützt   | Dokumentation                             |
-|--------|--------------------------------|--------------|------------------------------------|
-| Audio | [Audio Classification](https://huggingface.co/tasks/audio-classification)           | ✅ | [`~InferenceClient.audio_classification`] |
-| | [Automatic Speech Recognition](https://huggingface.co/tasks/automatic-speech-recognition)   | ✅ | [`~InferenceClient.automatic_speech_recognition`] |
-| | [Text-to-Speech](https://huggingface.co/tasks/text-to-speech)                 | ✅ | [`~InferenceClient.text_to_speech`] |
-| Computer Vision | [Image Classification](https://huggingface.co/tasks/image-classification)           | ✅ | [`~InferenceClient.image_classification`] |
-| | [Image Segmentation](https://huggingface.co/tasks/image-segmentation)             | ✅ | [`~InferenceClient.image_segmentation`] |
-| | [Image-to-Image](https://huggingface.co/tasks/image-to-image)                 | ✅ | [`~InferenceClient.image_to_image`] |
-| | [Image-to-Text](https://huggingface.co/tasks/image-to-text)                  | ✅ | [`~InferenceClient.image_to_text`] |
-| | [Object Detection](https://huggingface.co/tasks/object-detection)            | ✅ | [`~InferenceClient.object_detection`] |
-| | [Text-to-Image](https://huggingface.co/tasks/text-to-image)                  | ✅ | [`~InferenceClient.text_to_image`] |
-| | [Zero-Shot-Image-Classification](https://huggingface.co/tasks/zero-shot-image-classification)                  | ✅ | [`~InferenceClient.zero_shot_image_classification`] |
-| Multimodal | [Documentation Question Answering](https://huggingface.co/tasks/document-question-answering) | ✅ | [`~InferenceClient.document_question_answering`] |
-| | [Visual Question Answering](https://huggingface.co/tasks/visual-question-answering)      | ✅ | [`~InferenceClient.visual_question_answering`] |
-| NLP | [Conversational](https://huggingface.co/tasks/conversational)                 | ✅ | [`~InferenceClient.conversational`] |
-| | [Feature Extraction](https://huggingface.co/tasks/feature-extraction)             | ✅ | [`~InferenceClient.feature_extraction`] |
-| | [Fill Mask](https://huggingface.co/tasks/fill-mask)                      | ✅ | [`~InferenceClient.fill_mask`] |
-| | [Question Answering](https://huggingface.co/tasks/question-answering)             | ✅ | [`~InferenceClient.question_answering`] |
-| | [Sentence Similarity](https://huggingface.co/tasks/sentence-similarity) | ✅ | [`~InferenceClient.sentence_similarity`] |
-| | [Summarization](https://huggingface.co/tasks/summarization)                  | ✅ | [`~InferenceClient.summarization`] |
-| | [Table Question Answering](https://huggingface.co/tasks/table-question-answering)       | ✅ | [`~InferenceClient.table_question_answering`] |
-| | [Text Classification](https://huggingface.co/tasks/text-classification)            | ✅ | [`~InferenceClient.text_classification`] |
-| | [Text Generation](https://huggingface.co/tasks/text-generation)   | ✅ | [`~InferenceClient.text_generation`] |
-| | [Token Classification](https://huggingface.co/tasks/token-classification)           | ✅ | [`~InferenceClient.token_classification`] |
-| | [Translation](https://huggingface.co/tasks/translation)       | ✅ | [`~InferenceClient.translation`] |
-| | [Zero Shot Classification](https://huggingface.co/tasks/zero-shot-classification)       | ✅ | [`~InferenceClient.zero_shot_classification`] |
-| Tabular | [Tabular Classification](https://huggingface.co/tasks/tabular-classification)         | ✅ | [`~InferenceClient.tabular_classification`] |
-| | [Tabular Regression](https://huggingface.co/tasks/tabular-regression)             | ✅ | [`~InferenceClient.tabular_regression`] |
+| Domäne          | Aufgabe                                                                                       | Unterstützt | Dokumentation                                       |
+| --------------- | --------------------------------------------------------------------------------------------- | ----------- | --------------------------------------------------- |
+| Audio           | [Audio Classification](https://huggingface.co/tasks/audio-classification)                     | ✅           | [`~InferenceClient.audio_classification`]           |
+|                 | [Automatic Speech Recognition](https://huggingface.co/tasks/automatic-speech-recognition)     | ✅           | [`~InferenceClient.automatic_speech_recognition`]   |
+|                 | [Text-to-Speech](https://huggingface.co/tasks/text-to-speech)                                 | ✅           | [`~InferenceClient.text_to_speech`]                 |
+| Computer Vision | [Image Classification](https://huggingface.co/tasks/image-classification)                     | ✅           | [`~InferenceClient.image_classification`]           |
+|                 | [Image Segmentation](https://huggingface.co/tasks/image-segmentation)                         | ✅           | [`~InferenceClient.image_segmentation`]             |
+|                 | [Image-to-Image](https://huggingface.co/tasks/image-to-image)                                 | ✅           | [`~InferenceClient.image_to_image`]                 |
+|                 | [Image-to-Text](https://huggingface.co/tasks/image-to-text)                                   | ✅           | [`~InferenceClient.image_to_text`]                  |
+|                 | [Object Detection](https://huggingface.co/tasks/object-detection)                             | ✅           | [`~InferenceClient.object_detection`]               |
+|                 | [Text-to-Image](https://huggingface.co/tasks/text-to-image)                                   | ✅           | [`~InferenceClient.text_to_image`]                  |
+|                 | [Zero-Shot-Image-Classification](https://huggingface.co/tasks/zero-shot-image-classification) | ✅           | [`~InferenceClient.zero_shot_image_classification`] |
+| Multimodal      | [Documentation Question Answering](https://huggingface.co/tasks/document-question-answering)  | ✅           | [`~InferenceClient.document_question_answering`]    |
+|                 | [Visual Question Answering](https://huggingface.co/tasks/visual-question-answering)           | ✅           | [`~InferenceClient.visual_question_answering`]      |
+| NLP             | [Conversational](https://huggingface.co/tasks/conversational)                                 | ✅           | [`~InferenceClient.conversational`]                 |
+|                 | [Feature Extraction](https://huggingface.co/tasks/feature-extraction)                         | ✅           | [`~InferenceClient.feature_extraction`]             |
+|                 | [Fill Mask](https://huggingface.co/tasks/fill-mask)                                           | ✅           | [`~InferenceClient.fill_mask`]                      |
+|                 | [Question Answering](https://huggingface.co/tasks/question-answering)                         | ✅           | [`~InferenceClient.question_answering`]             |
+|                 | [Sentence Similarity](https://huggingface.co/tasks/sentence-similarity)                       | ✅           | [`~InferenceClient.sentence_similarity`]            |
+|                 | [Summarization](https://huggingface.co/tasks/summarization)                                   | ✅           | [`~InferenceClient.summarization`]                  |
+|                 | [Table Question Answering](https://huggingface.co/tasks/table-question-answering)             | ✅           | [`~InferenceClient.table_question_answering`]       |
+|                 | [Text Classification](https://huggingface.co/tasks/text-classification)                       | ✅           | [`~InferenceClient.text_classification`]            |
+|                 | [Text Generation](https://huggingface.co/tasks/text-generation)                               | ✅           | [`~InferenceClient.text_generation`]                |
+|                 | [Token Classification](https://huggingface.co/tasks/token-classification)                     | ✅           | [`~InferenceClient.token_classification`]           |
+|                 | [Translation](https://huggingface.co/tasks/translation)                                       | ✅           | [`~InferenceClient.translation`]                    |
+|                 | [Zero Shot Classification](https://huggingface.co/tasks/zero-shot-classification)             | ✅           | [`~InferenceClient.zero_shot_classification`]       |
+| Tabular         | [Tabular Classification](https://huggingface.co/tasks/tabular-classification)                 | ✅           | [`~InferenceClient.tabular_classification`]         |
+|                 | [Tabular Regression](https://huggingface.co/tasks/tabular-regression)                         | ✅           | [`~InferenceClient.tabular_regression`]             |
 
 
 > [!TIP]
@@ -175,90 +174,3 @@ Einige Aufgaben erfordern binäre Eingaben, zum Beispiel bei der Arbeit mit Bild
 [{'score': 0.9779096841812134, 'label': 'Blenheim spaniel'}, ...]
 ```
 
-## Legacy InferenceAPI client
-
-Der [`InferenceClient`] dient als Ersatz für den veralteten [`InferenceApi`]-Client. Er bietet spezifische Unterstützung für Aufgaben und behandelt Inferenz sowohl auf der [Inferenz API](https://huggingface.co/docs/api-inference/index) als auch auf den [Inferenz Endpunkten](https://huggingface.co/docs/inference-endpoints/index).
-
-Hier finden Sie eine kurze Anleitung, die Ihnen hilft, von [`InferenceApi`] zu [`InferenceClient`] zu migrieren.
-
-### Initialisierung
-
-Ändern Sie von
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="bert-base-uncased", token=API_TOKEN)
-```
-
-zu
-
-```python
->>> from huggingface_hub import InferenceClient
->>> inference = InferenceClient(model="bert-base-uncased", token=API_TOKEN)
-```
-
-### Ausführen einer bestimmten Aufgabe
-
-Ändern Sie von
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="paraphrase-xlm-r-multilingual-v1", task="feature-extraction")
->>> inference(...)
-```
-
-zu
-
-```python
->>> from huggingface_hub import InferenceClient
->>> inference = InferenceClient()
->>> inference.feature_extraction(..., model="paraphrase-xlm-r-multilingual-v1")
-```
-
-> [!TIP]
-> Dies ist der empfohlene Weg, um Ihren Code an [`InferenceClient`] anzupassen. Dadurch können Sie von den aufgabenspezifischen Methoden wie `feature_extraction` profitieren.
-
-### Eigene Anfragen ausführen
-
-Ändern Sie von
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="bert-base-uncased")
->>> inference(inputs="The goal of life is [MASK].")
-[{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
-```
-zu
-
-```python
->>> from huggingface_hub import InferenceClient
->>> client = InferenceClient()
->>> response = client.post(json={"inputs": "The goal of life is [MASK]."}, model="bert-base-uncased")
->>> response.json()
-[{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
-```
-
-### Mit Parametern ausführen
-
-Ändern Sie von
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="typeform/distilbert-base-uncased-mnli")
->>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
->>> params = {"candidate_labels":["refund", "legal", "faq"]}
->>> inference(inputs, params)
-{'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
-```
-
-zu
-
-```python
->>> from huggingface_hub import InferenceClient
->>> client = InferenceClient()
->>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
->>> params = {"candidate_labels":["refund", "legal", "faq"]}
->>> response = client.post(json={"inputs": inputs, "parameters": params}, model="typeform/distilbert-base-uncased-mnli")
->>> response.json()
-{'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
-```
diff --git a/docs/source/de/guides/integrations.md b/docs/source/de/guides/integrations.md
index 06384c80da..34e9bae3ce 100644
--- a/docs/source/de/guides/integrations.md
+++ b/docs/source/de/guides/integrations.md
@@ -82,7 +82,7 @@ Obwohl dieser Ansatz flexibel ist, hat er einige Nachteile, insbesondere in Bezu
 - `token`: zum Herunterladen aus einem privaten Repository
 - `revision`: zum Herunterladen von einem spezifischen Branch
 - `cache_dir`: um Dateien in einem spezifischen Verzeichnis zu cachen
-- `force_download`/`resume_download`/`local_files_only`: um den Cache wieder zu verwenden oder nicht
+- `force_download`/`local_files_only`: um den Cache wieder zu verwenden oder nicht
 - `api_endpoint`/`proxies`: HTTP-Session konfigurieren
 
 Beim Pushen von Modellen werden ähnliche Parameter unterstützt:
@@ -202,8 +202,7 @@ class PyTorchModelHubMixin(ModelHubMixin):
       revision: str,
       cache_dir: str,
       force_download: bool,
-      proxies: Optional[Dict],
-      resume_download: bool,
+      proxies: Optional[dict],
       local_files_only: bool,
       token: Union[str, bool, None],
       map_location: str = "cpu", # zusätzliches Argument
@@ -221,8 +220,6 @@ class PyTorchModelHubMixin(ModelHubMixin):
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
-            proxies=proxies,
-            resume_download=resume_download,
             token=token,
             local_files_only=local_files_only,
          )
@@ -242,9 +239,9 @@ Und das war's! Ihre Bibliothek ermöglicht es Benutzern nun, Dateien vom und zum
 Lassen Sie uns die beiden Ansätze, die wir gesehen haben, schnell mit ihren Vor- und Nachteilen zusammenfassen. Die untenstehende Tabelle ist nur indikativ. Ihr Framework könnte einige Besonderheiten haben, die Sie berücksichtigen müssen. Dieser Leitfaden soll nur Richtlinien und Ideen geben, wie Sie die Integration handhaben können. Kontaktieren Sie uns in jedem Fall, wenn Sie Fragen haben!
 
 <!-- Generated using https://www.tablesgenerator.com/markdown_tables -->
-| Integration | Mit Helfern | Mit [`ModelHubMixin`] |
-|:---:|:---:|:---:|
-| Benutzererfahrung | `model = load_from_hub(...)`<br>`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)` |
-| Flexibilität | Sehr flexibel.<br>Sie haben die volle Kontrolle über die Implementierung. | Weniger flexibel.<br>Ihr Framework muss eine Modellklasse haben. |
-| Wartung | Mehr Wartung, um Unterstützung für Konfiguration und neue Funktionen hinzuzufügen. Könnte auch das Beheben von Benutzerproblemen erfordern. | Weniger Wartung, da die meisten Interaktionen mit dem Hub in `huggingface_hub` implementiert sind. |
-| Dokumentation/Typ-Annotation| Manuell zu schreiben. | Teilweise durch `huggingface_hub` behandelt. |
+|         Integration          |                                                                 Mit Helfern                                                                 |                                       Mit [`ModelHubMixin`]                                        |
+| :--------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------: |
+|      Benutzererfahrung       |                                          `model = load_from_hub(...)`<br>`push_to_hub(model, ...)`                                          |                 `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)`                 |
+|         Flexibilität         |                                  Sehr flexibel.<br>Sie haben die volle Kontrolle über die Implementierung.                                  |                  Weniger flexibel.<br>Ihr Framework muss eine Modellklasse haben.                  |
+|           Wartung            | Mehr Wartung, um Unterstützung für Konfiguration und neue Funktionen hinzuzufügen. Könnte auch das Beheben von Benutzerproblemen erfordern. | Weniger Wartung, da die meisten Interaktionen mit dem Hub in `huggingface_hub` implementiert sind. |
+| Dokumentation/Typ-Annotation |                                                            Manuell zu schreiben.                                                            |                            Teilweise durch `huggingface_hub` behandelt.                            |
diff --git a/docs/source/de/installation.md b/docs/source/de/installation.md
index 3ba965bd4b..a603d25558 100644
--- a/docs/source/de/installation.md
+++ b/docs/source/de/installation.md
@@ -6,7 +6,7 @@ rendered properly in your Markdown viewer.
 
 Bevor Sie beginnen, müssen Sie Ihre Umgebung vorbereiten, indem Sie die entsprechenden Pakete installieren.
 
-`huggingface_hub` wurde für **Python 3.8+** getestet.
+`huggingface_hub` wurde für **Python 3.9+** getestet.
 
 ## Installation mit pip
 
@@ -44,10 +44,6 @@ Einige Abhängigkeiten von `huggingface_hub` sind [optional](https://setuptools.
 
 Sie können optionale Abhängigkeiten über `pip` installieren:
 ```bash
-# Abhängigkeiten für spezifische TensorFlow-Funktionen installieren
-# /!\ Achtung: dies entspricht nicht `pip install tensorflow`
-pip install 'huggingface_hub[tensorflow]'
-
 # Abhängigkeiten sowohl für torch-spezifische als auch für CLI-spezifische Funktionen installieren.
 pip install 'huggingface_hub[cli,torch]'
 ```
@@ -55,7 +51,7 @@ pip install 'huggingface_hub[cli,torch]'
 Hier ist die Liste der optionalen Abhängigkeiten in huggingface_hub:
 
 - `cli`: bietet eine komfortablere CLI-Schnittstelle für huggingface_hub.
-- `fastai`, `torch`, `tensorflow`: Abhängigkeiten, um framework-spezifische Funktionen auszuführen.
+- `fastai`, `torch`: Abhängigkeiten, um framework-spezifische Funktionen auszuführen.
 - `dev`: Abhängigkeiten, um zur Bibliothek beizutragen. Enthält `testing` (um Tests auszuführen), `typing` (um den Type Checker auszuführen) und `quality` (um Linters auszuführen).
 
 
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 4c03a41c7b..5407e0374a 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -46,6 +46,8 @@
   sections:
     - local: concepts/git_vs_http
       title: Git vs HTTP paradigm
+    - local: concepts/migration
+      title: Migrating to huggingface_hub v1.0
 - title: 'Reference'
   sections:
     - local: package_reference/overview
@@ -54,8 +56,6 @@
       title: Authentication
     - local: package_reference/environment_variables
       title: Environment variables
-    - local: package_reference/repository
-      title: Managing local and online repositories
     - local: package_reference/hf_api
       title: Hugging Face Hub API
     - local: package_reference/file_download
diff --git a/docs/source/en/concepts/git_vs_http.md b/docs/source/en/concepts/git_vs_http.md
index fe5ca4a8fc..49d0370752 100644
--- a/docs/source/en/concepts/git_vs_http.md
+++ b/docs/source/en/concepts/git_vs_http.md
@@ -4,56 +4,28 @@ rendered properly in your Markdown viewer.
 
 # Git vs HTTP paradigm
 
-The `huggingface_hub` library is a library for interacting with the Hugging Face Hub, which is a
-collection of git-based repositories (models, datasets or Spaces). There are two main
-ways to access the Hub using `huggingface_hub`.
-
-The first approach, the so-called "git-based" approach, is led by the [`Repository`] class.
-This method uses a wrapper around the `git` command with additional functions specifically
-designed to interact with the Hub. The second option, called the "HTTP-based" approach,
-involves making HTTP requests using the [`HfApi`] client. Let's examine the pros and cons
-of each approach.
-
-## Repository: the historical git-based approach
-
-At first, `huggingface_hub` was mostly built around the [`Repository`] class. It provides
-Python wrappers for common `git` commands such as `"git add"`, `"git commit"`, `"git push"`,
-`"git tag"`, `"git checkout"`, etc.
-
-The library also helps with setting credentials and tracking large files, which are often
-used in machine learning repositories. Additionally, the library allows you to execute its
-methods in the background, making it useful for uploading data during training.
-
-The main advantage of using a [`Repository`] is that it allows you to maintain a local
-copy of the entire repository on your machine. This can also be a disadvantage as
-it requires you to constantly update and maintain this local copy. This is similar to
-traditional software development where each developer maintains their own local copy and
-pushes changes when working on a feature. However, in the context of machine learning,
-this may not always be necessary as users may only need to download weights for inference
-or convert weights from one format to another without the need to clone the entire
-repository.
-
-> [!WARNING]
-> [`Repository`] is now deprecated in favor of the http-based alternatives. Given its large adoption in legacy code, the complete removal of [`Repository`] will only happen in release `v1.0`.
+The `huggingface_hub` library is a library for interacting with the Hugging Face Hub, which is a collection of git-based repositories (models, datasets or Spaces). There are two main ways to access the Hub using `huggingface_hub`.
+
+The first approach, the so-called "git-based" approach, relies on using standard `git` commands directly in a terminal. This method allows you to clone repositories, create commits, and push changes manually. The second option, called the "HTTP-based" approach, involves making HTTP requests using the [`HfApi`] client. Let's examine the pros and cons of each approach.
+
+## Git: the historical CLI-based approach
+
+At first, most users interacted with the Hugging Face Hub using plain `git` commands such as `git clone`, `git add`, `git commit`, `git push`, `git tag`, or `git checkout`.
+
+This approach lets you work with a full local copy of the repository on your machine, just like in traditional software development. This can be an advantage when you need offline access or want to work with the full history of a repository. However, it also comes with downsides: you are responsible for keeping the repository up-to-date locally, handling credentials, and managing large files (via `git-lfs`), which can become cumbersome when working with large machine learning models or datasets.
+
+In many machine learning workflows, you may only need to download a few files for inference or convert weights without needing to clone the entire repository. In such cases, using `git` can be overkill and introduce unnecessary complexity.
 
 ## HfApi: a flexible and convenient HTTP client
 
-The [`HfApi`] class was developed to provide an alternative to local git repositories, which
-can be cumbersome to maintain, especially when dealing with large models or datasets. The
-[`HfApi`] class offers the same functionality as git-based approaches, such as downloading
-and pushing files and creating branches and tags, but without the need for a local folder
-that needs to be kept in sync.
+The [`HfApi`] class was developed to provide an alternative to using local git repositories, which can be cumbersome to maintain, especially when dealing with large models or datasets. The [`HfApi`] class offers the same functionality as git-based workflows -such as downloading and pushing files and creating branches and tags- but without the need for a local folder that needs to be kept in sync.
 
-In addition to the functionalities already provided by `git`, the [`HfApi`] class offers
-additional features, such as the ability to manage repos, download files using caching for
-efficient reuse, search the Hub for repos and metadata, access community features such as
-discussions, PRs, and comments, and configure Spaces hardware and secrets.
+In addition to the functionalities already provided by `git`, the [`HfApi`] class offers additional features, such as the ability to manage repos, download files using caching for efficient reuse, search the Hub for repos and metadata, access community features such as discussions, PRs, and comments, and configure Spaces hardware and secrets.
 
 ## What should I use ? And when ?
 
-Overall, the **HTTP-based approach is the recommended way to use** `huggingface_hub`
-in all cases. [`HfApi`] allows to pull and push changes, work with PRs, tags and branches, interact with discussions and much more. Since the `0.16` release, the http-based methods can also run in the background, which was the last major advantage of the [`Repository`] class.
+Overall, the **HTTP-based approach is the recommended way to use** `huggingface_hub` in all cases. [`HfApi`] allows you to pull and push changes, work with PRs, tags and branches, interact with discussions and much more.
 
-However, not all git commands are available through [`HfApi`]. Some may never be implemented, but we are always trying to improve and close the gap. If you don't see your use case covered, please open [an issue on Github](https://github.com/huggingface/huggingface_hub)! We welcome feedback to help build the 🤗 ecosystem with and for our users.
+However, not all git commands are available through [`HfApi`]. Some may never be implemented, but we are always trying to improve and close the gap. If you don't see your use case covered, please open [an issue on GitHub](https://github.com/huggingface/huggingface_hub)! We welcome feedback to help build the HF ecosystem with and for our users.
 
-This preference of the http-based [`HfApi`] over the git-based [`Repository`] does not mean that git versioning will disappear from the Hugging Face Hub anytime soon. It will always be possible to use `git` commands locally in workflows where it makes sense.
+This preference for the HTTP-based [`HfApi`] over direct `git` commands does not mean that git versioning will disappear from the Hugging Face Hub anytime soon. It will always be possible to use `git` locally in workflows where it makes sense.
\ No newline at end of file
diff --git a/docs/source/en/concepts/migration.md b/docs/source/en/concepts/migration.md
new file mode 100644
index 0000000000..2f60edc53d
--- /dev/null
+++ b/docs/source/en/concepts/migration.md
@@ -0,0 +1,95 @@
+# Migrating to huggingface_hub v1.0
+
+The v1.0 release is a major milestone for the `huggingface_hub` library. It marks our commitment to API stability and the maturity of the library. We have made several improvements and breaking changes to make the library more robust and easier to use.
+
+This guide is intended to help you migrate your existing code to the new version. If you have any questions or feedback, please let us know by [opening an issue on GitHub](https://github.com/huggingface/huggingface_hub/issues).
+
+## Python 3.9+
+
+`huggingface_hub` now requires Python 3.9 or higher. Python 3.8 is no longer supported.
+
+## HTTPX migration
+
+The `huggingface_hub` library now uses [`httpx`](https://www.python-httpx.org/) instead of `requests` for HTTP requests. This change was made to improve performance and to support both synchronous and asynchronous requests the same way. We therefore dropped both `requests` and `aiohttp` dependencies.
+
+### Breaking changes
+
+This is a major change that affects the entire library. While we have tried to make this change as transparent as possible, you may need to update your code in some cases. Here is a list of breaking changes introduced in the process:
+
+- **Proxy configuration**: "per method" proxies are no longer supported. Proxies must be configured globally using the `HTTP_PROXY` and `HTTPS_PROXY` environment variables.
+- **Custom HTTP backend**: The `configure_http_backend` function has been removed. You should now use [`set_client_factory`] and [`set_async_client_factory`] to configure the HTTP clients.
+- **Error handling**: HTTP errors are not inherited from `requests.HTTPError` anymore, but from `httpx.HTTPError`. We recommend catching `huggingface_hub.HfHubHttpError` which is a subclass of `requests.HTTPError` in v0.x and of `httpx.HTTPError` in v1.x. Catching from the `huggingface_hub` error ensures your code is compatible with both the old and new versions of the library.
+- **SSLError**: `httpx` does not have the concept of `SSLError`. It is now a generic `httpx.ConnectError`.
+- **`LocalEntryNotFoundError`**: This error no longer inherits from `HTTPError`. We now define a `EntryNotFoundError` (new) that is inherited by both [`LocalEntryNotFoundError`] (if file not found in local cache) and [`RemoteEntryNotFoundError`] (if file not found in repo on the Hub). Only the remote error inherits from `HTTPError`.
+- **`InferenceClient`**: The `InferenceClient` can now be used as a context manager. This is especially useful when streaming tokens from a language model to ensure that the connection is closed properly.
+- **`AsyncInferenceClient`**: The `trust_env` parameter has been removed from the `AsyncInferenceClient`'s constructor. Environment variables are trusted by default by `httpx`. If you explicitly don't want to trust the environment, you must configure it with [`set_client_factory`].
+
+For more details, you can check [PR #3328](https://github.com/huggingface/huggingface_hub/pull/3328) that introduced `httpx`.
+
+### Why `httpx`?
+
+
+The migration from `requests` to `httpx` brings several key improvements that enhance the library's performance, reliability, and maintainability:
+
+**Thread Safety and Connection Reuse**: `httpx` is thread-safe by design, allowing us to safely reuse the same client across multiple threads. This connection reuse reduces the overhead of establishing new connections for each HTTP request, improving performance especially when making frequent requests to the Hub.
+
+**HTTP/2 Support**: `httpx` provides native HTTP/2 support, which offers better efficiency when making multiple requests to the same server (exactly our use case). This translates to lower latency and reduced resource consumption compared to HTTP/1.1.
+
+**Unified Sync/Async API**: Unlike our previous setup with separate `requests` (sync) and `aiohttp` (async) dependencies, `httpx` provides both synchronous and asynchronous clients with identical behavior. This ensures that `InferenceClient` and `AsyncInferenceClient` have consistent functionality and eliminates subtle behavioral differences that previously existed between the two implementations.
+
+**Improved SSL Error Handling**: `httpx` handles SSL errors more gracefully, making debugging connection issues easier and more reliable.
+
+**Future-Proof Architecture**: `httpx` is actively maintained and designed for modern Python applications. In contrast, `requests` is in maintenance mode and won't receive major updates like thread-safety improvements or HTTP/2 support.
+
+**Better Environment Variable Handling**: `httpx` provides more consistent handling of environment variables across both sync and async contexts, eliminating previous inconsistencies where `requests` would read local environment variables by default while `aiohttp` would not.
+
+The transition to `httpx` positions `huggingface_hub` with a modern, efficient, and maintainable HTTP backend. While most users should experience seamless operation, the underlying improvements provide better performance and reliability for all Hub interactions.
+
+## `Repository` class
+
+The `Repository` class has been removed in v1.0. It was a thin wrapper around the `git` CLI for managing repositories. You can still use `git` directly in the terminal, but the recommended approach is to use the HTTP-based API in the `huggingface_hub` library for a smoother experience, especially when dealing with large files.
+
+Here is a mapping from the legacy `Repository` class to the new `HfApi` one:
+
+| `Repository` method                        | `HfApi` method                                        |
+| ------------------------------------------ | ----------------------------------------------------- |
+| `repo.clone_from`                          | `snapshot_download`                                   |
+| `repo.git_add` + `git_commit` + `git_push` | [`upload_file`], [`upload_folder`], [`create_commit`] |
+| `repo.git_tag`                             | `create_tag`                                          |
+| `repo.git_branch`                          | `create_branch`                                       |
+
+## `HfFolder` class
+
+`HfFolder` was used to manage the user access token. Use [`login`] to save a new token, [`logout`] to delete it and [`whoami`] to check the user associated to the current token. Finally, use [`get_token`] to retrieve user's token in a script.
+
+
+## `InferenceApi` class
+
+`InferenceApi` was a class to interact with the Inference API. It is now recommended to use the [`InferenceClient`] class instead.
+
+## Other deprecated features
+
+Some methods and parameters have been removed in v1.0. The ones listed below have already been deprecated with a warning message in v0.x.
+
+- `constants.hf_cache_home` has been removed. Please use `HF_HOME` instead.
+- `use_auth_token` parameters have been removed from all methods. Please use `token` instead.
+- `get_token_permission` method has been removed.
+- `update_repo_visibility` method has been removed. Please use `update_repo_settings` instead.
+- `is_write_action` parameter has been removed from `build_hf_headers` as well as `write_permission` from `login`. The concept of "write permission" has been removed and is no longer relevant now that fine-grained tokens are the recommended approach.
+- `new_session` parameter in `login` has been renamed to `skip_if_logged_in` for better clarity.
+- `resume_download`, `force_filename`, and `local_dir_use_symlinks` parameters have been removed from `hf_hub_download` and `snapshot_download`.
+- `library`, `language`, `tags`, and `task` parameters have been removed from `list_models`.
+
+## TensorFlow and Keras 2.x support
+
+All TensorFlow-related code and dependencies have been removed in v1.0. This includes the following breaking changes:
+
+- `huggingface_hub[tensorflow]` is no longer a supported extra dependency
+- The `split_tf_state_dict_into_shards` and `get_tf_storage_size` utility functions have been removed.
+- The `tensorflow`, `fastai`, and `fastcore` versions are no longer included in the built-in headers.
+
+The Keras 2.x integration has also been removed. This includes the `KerasModelHubMixin` class and the `save_pretrained_keras`, `from_pretrained_keras`, and `push_to_hub_keras` utilities. Keras 2.x is a legacy and unmaintained library. The recommended approach is to use Keras 3.x which is tightly integrated with the Hub (i.e. it contains built-in method to load/push to Hub). If you still want to work with Keras 2.x, you should downgrade `huggingface_hub` to v0.x version.
+
+## `upload_file` and `upload_folder` return values
+
+The [`upload_file`] and [`upload_folder`] functions now return the URL of the commit created on the Hub. Previously, they returned the URL of the file or folder. This is to align with the return value of [`create_commit`], [`delete_file`] and [`delete_folder`].
\ No newline at end of file
diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md
index 9fe5e7a34a..c15811a538 100644
--- a/docs/source/en/guides/cli.md
+++ b/docs/source/en/guides/cli.md
@@ -17,28 +17,45 @@ First of all, let's install the CLI:
 > [!TIP]
 > In the snippet above, we also installed the `[cli]` extra dependencies to make the user experience better, especially when using the `cache delete` command.
 
+Alternatively, you can install the `hf` CLI with a single command:
+
+On macOS and Linux:
+
+```bash
+>>> curl -LsSf https://hf.co/cli/install.sh | sh
+```
+
+On Windows:
+
+```powershell
+>>> powershell -ExecutionPolicy ByPass -c "irm https://hf.co/cli/install.ps1 | iex"
+```
+
 Once installed, you can check that the CLI is correctly setup:
 
 ```
 >>> hf --help
-usage: hf <command> [<args>]
-
-positional arguments:
-  {auth,cache,download,repo,repo-files,upload,upload-large-folder,env,version,lfs-enable-largefiles,lfs-multipart-upload}
-                        hf command helpers
-    auth                Manage authentication (login, logout, etc.).
-    cache               Manage local cache directory.
-    download            Download files from the Hub
-    repo                Manage repos on the Hub.
-    repo-files          Manage files in a repo on the Hub.
-    upload              Upload a file or a folder to the Hub. Recommended for single-commit uploads.
-    upload-large-folder
-                        Upload a large folder to the Hub. Recommended for resumable uploads.
-    env                 Print information about the environment.
-    version             Print information about the hf version.
-
-options:
-  -h, --help            show this help message and exit
+Usage: hf [OPTIONS] COMMAND [ARGS]...
+
+  Hugging Face Hub CLI
+
+Options:
+  --install-completion  Install completion for the current shell.
+  --show-completion     Show completion for the current shell, to copy it or
+                        customize the installation.
+  --help                Show this message and exit.
+
+Commands:
+  auth                 Manage authentication (login, logout, etc.).
+  cache                Manage local cache directory.
+  download             Download files from the Hub.
+  env                  Print information about the environment.
+  jobs                 Run and manage Jobs on the Hub.
+  repo                 Manage repos on the Hub.
+  repo-files           Manage files in a repo on the Hub.
+  upload               Upload a file or a folder to the Hub.
+  upload-large-folder  Upload a large folder to the Hub.
+  version              Print information about the hf version.
 ```
 
 If the CLI is correctly installed, you should see a list of all the options available in the CLI. If you get an error message such as `command not found: hf`, please refer to the [Installation](../installation) guide.
@@ -244,6 +261,46 @@ A `.cache/huggingface/` folder is created at the root of your local directory co
 fuyu/model-00001-of-00002.safetensors
 ```
 
+### Dry-run mode
+
+In some cases, you would like to check which files would be downloaded before actually downloading them. You can check this using the `--dry-run` parameter. It lists all files to download on the repo and checks whether they are already downloaded or not. This gives an idea of how many files have to be downloaded and their sizes.
+
+```sh
+>>> hf download openai-community/gpt2 --dry-run
+[dry-run] Fetching 26 files: 100%|█████████████| 26/26 [00:04<00:00,  6.26it/s]
+[dry-run] Will download 11 files (out of 26) totalling 5.6G.
+File                              Bytes to download
+--------------------------------- -----------------
+.gitattributes                    -
+64-8bits.tflite                   125.2M
+64-fp16.tflite                    248.3M
+64.tflite                         495.8M
+README.md                         -
+config.json                       -
+flax_model.msgpack                497.8M
+generation_config.json            -
+merges.txt                        -
+model.safetensors                 548.1M
+onnx/config.json                  -
+onnx/decoder_model.onnx           653.7M
+onnx/decoder_model_merged.onnx    655.2M
+onnx/decoder_with_past_model.onnx 653.7M
+onnx/generation_config.json       -
+onnx/merges.txt                   -
+onnx/special_tokens_map.json      -
+onnx/tokenizer.json               -
+onnx/tokenizer_config.json        -
+onnx/vocab.json                   -
+pytorch_model.bin                 548.1M
+rust_model.ot                     702.5M
+tf_model.h5                       497.9M
+tokenizer.json                    -
+tokenizer_config.json             -
+vocab.json                        -
+```
+
+For more details, check out the [download guide](./download.md#dry-run-mode).
+
 ### Specify cache directory
 
 If not using `--local-dir`, all files will be downloaded by default to the cache directory defined by the `HF_HOME` [environment variable](../package_reference/environment_variables#hfhome). You can specify a custom cache using `--cache-dir`:
@@ -276,7 +333,7 @@ By default, the `hf download` command will be verbose. It will print details suc
 
 On machines with slow connections, you might encounter timeout issues like this one:
 ```bash
-`requests.exceptions.ReadTimeout: (ReadTimeoutError("HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: a33d910c-84c6-4514-8362-c705e2039d38)')`
+`httpx.TimeoutException: (TimeoutException("HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: a33d910c-84c6-4514-8362-c705e2039d38)')`
 ```
 
 To mitigate this issue, you can set the `HF_HUB_DOWNLOAD_TIMEOUT` environment variable to a higher value (default is 10):
@@ -430,6 +487,69 @@ By default, the `hf upload` command will be verbose. It will print details such
 https://huggingface.co/Wauplin/my-cool-model/tree/main
 ```
 
+## hf repo
+
+`hf repo` lets you create, delete, move repositories and update their settings on the Hugging Face Hub. It also includes subcommands to manage branches and tags.
+
+### Create a repo
+
+```bash
+>>> hf repo create Wauplin/my-cool-model
+Successfully created Wauplin/my-cool-model on the Hub.
+Your repo is now available at https://huggingface.co/Wauplin/my-cool-model
+```
+
+Create a private dataset or a Space:
+
+```bash
+>>> hf repo create my-cool-dataset --repo-type dataset --private
+>>> hf repo create my-gradio-space --repo-type space --space-sdk gradio
+```
+
+Use `--exist-ok` if the repo may already exist, and `--resource-group-id` to target an Enterprise resource group.
+
+### Delete a repo
+
+```bash
+>>> hf repo delete Wauplin/my-cool-model
+```
+
+Datasets and Spaces:
+
+```bash
+>>> hf repo delete my-cool-dataset --repo-type dataset
+>>> hf repo delete my-gradio-space --repo-type space
+```
+
+### Move a repo
+
+```bash
+>>> hf repo move old-namespace/my-model new-namespace/my-model
+```
+
+### Update repo settings
+
+```bash
+>>> hf repo settings Wauplin/my-cool-model --gated auto
+>>> hf repo settings Wauplin/my-cool-model --private true
+>>> hf repo settings Wauplin/my-cool-model --private false
+```
+
+- `--gated`: one of `auto`, `manual`, `false`
+- `--private true|false`: set repository privacy
+
+### Manage branches
+
+```bash
+>>> hf repo branch create Wauplin/my-cool-model dev
+>>> hf repo branch create Wauplin/my-cool-model release-1 --revision refs/pr/104
+>>> hf repo branch delete Wauplin/my-cool-model dev
+```
+
+> [!TIP]
+> All commands accept `--repo-type` (one of `model`, `dataset`, `space`) and `--token` if you need to authenticate explicitly. Use `--help` on any command to see all options.
+
+
 ## hf repo-files
 
 If you want to delete files from a Hugging Face repository, use the `hf repo-files` command.
@@ -571,7 +691,6 @@ Copy-and-paste the text below in your GitHub issue.
 - Who am I ?: Wauplin
 - Configured git credential helpers: store
 - FastAI: N/A
-- Tensorflow: 2.11.0
 - Torch: 1.12.1
 - Jinja2: 3.1.2
 - Graphviz: 0.20.1
@@ -797,3 +916,34 @@ Manage scheduled jobs using
 # Delete a scheduled job
 >>> hf jobs scheduled delete <scheduled_job_id>
 ```
+
+## hf endpoints
+
+Use `hf endpoints` to list, deploy, describe, and manage Inference Endpoints directly from the terminal. The legacy
+`hf inference-endpoints` alias remains available for compatibility.
+
+```bash
+# Lists endpoints in your namespace
+>>> hf endpoints ls
+
+# Deploy an endpoint from Model Catalog
+>>> hf endpoints catalog deploy --repo openai/gpt-oss-120b --name my-endpoint
+
+# Deploy an endpoint from the Hugging Face Hub 
+>>> hf endpoints deploy my-endpoint --repo gpt2 --framework pytorch --accelerator cpu --instance-size x2 --instance-type intel-icl
+
+# List catalog entries
+>>> hf endpoints catalog ls
+
+# Show status and metadata
+>>> hf endpoints describe my-endpoint
+
+# Pause the endpoint
+>>> hf endpoints pause my-endpoint
+
+# Delete without confirmation prompt
+>>> hf endpoints delete my-endpoint --yes
+```
+
+> [!TIP]
+> Add `--namespace` to target an organization, `--token` to override authentication.
diff --git a/docs/source/en/guides/download.md b/docs/source/en/guides/download.md
index e01fa0ac74..2c5e64157c 100644
--- a/docs/source/en/guides/download.md
+++ b/docs/source/en/guides/download.md
@@ -158,6 +158,89 @@ Fetching 2 files: 100%|███████████████████
 
 For more details about the CLI download command, please refer to the [CLI guide](./cli#hf-download).
 
+## Dry-run mode
+
+In some cases, you would like to check which files would be downloaded before actually downloading them. You can check this using the `--dry-run` parameter. It lists all files to download on the repo and checks whether they are already downloaded or not. This gives an idea of how many files have to be downloaded and their sizes.
+
+Here is an example, checking on a single file:
+
+```sh
+>>> hf download openai-community/gpt2 onnx/decoder_model_merged.onnx --dry-run
+[dry-run] Will download 1 files (out of 1) totalling 655.2M
+File                           Bytes to download
+------------------------------ -----------------
+onnx/decoder_model_merged.onnx 655.2M
+```
+
+And if the file is already cached:
+
+```sh
+>>> hf download openai-community/gpt2 onnx/decoder_model_merged.onnx --dry-run
+[dry-run] Will download 0 files (out of 1) totalling 0.0.
+File                           Bytes to download
+------------------------------ -----------------
+onnx/decoder_model_merged.onnx -
+```
+
+You can also execute a dry-run on an entire repository:
+
+```sh
+>>> hf download openai-community/gpt2 --dry-run
+[dry-run] Fetching 26 files: 100%|█████████████| 26/26 [00:04<00:00,  6.26it/s]
+[dry-run] Will download 11 files (out of 26) totalling 5.6G.
+File                              Bytes to download
+--------------------------------- -----------------
+.gitattributes                    -
+64-8bits.tflite                   125.2M
+64-fp16.tflite                    248.3M
+64.tflite                         495.8M
+README.md                         -
+config.json                       -
+flax_model.msgpack                497.8M
+generation_config.json            -
+merges.txt                        -
+model.safetensors                 548.1M
+onnx/config.json                  -
+onnx/decoder_model.onnx           653.7M
+onnx/decoder_model_merged.onnx    655.2M
+onnx/decoder_with_past_model.onnx 653.7M
+onnx/generation_config.json       -
+onnx/merges.txt                   -
+onnx/special_tokens_map.json      -
+onnx/tokenizer.json               -
+onnx/tokenizer_config.json        -
+onnx/vocab.json                   -
+pytorch_model.bin                 548.1M
+rust_model.ot                     702.5M
+tf_model.h5                       497.9M
+tokenizer.json                    -
+tokenizer_config.json             -
+vocab.json                        -
+```
+
+And with files filtering:
+
+```sh
+>>> hf download openai-community/gpt2 --include "*.json"  --dry-run
+[dry-run] Fetching 11 files: 100%|█████████████| 11/11 [00:00<00:00, 80518.92it/s]
+[dry-run] Will download 0 files (out of 11) totalling 0.0.
+File                         Bytes to download
+---------------------------- -----------------
+config.json                  -
+generation_config.json       -
+onnx/config.json             -
+onnx/generation_config.json  -
+onnx/special_tokens_map.json -
+onnx/tokenizer.json          -
+onnx/tokenizer_config.json   -
+onnx/vocab.json              -
+tokenizer.json               -
+tokenizer_config.json        -
+vocab.json                   -
+```
+
+Finally, you can also make a dry-run programmatically by passing `dry_run=True` to [`hf_hub_download`] and [`snapshot_download`]. It will return a [`DryRunFileInfo`] (respectively a list of [`DryRunFileInfo`]) with for each file, their commit hash, file name and file size, whether the file is cached and whether the file would be downloaded. In practice, the file will be downloaded if not cached or if `force_download=True` is passed.
+
 ## Faster downloads
 
 There are two options to speed up downloads. Both involve installing a Python package written in Rust.
diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md
index 6fa08f2736..8dab419f57 100644
--- a/docs/source/en/guides/inference.md
+++ b/docs/source/en/guides/inference.md
@@ -11,10 +11,6 @@ The `huggingface_hub` library provides a unified interface to run inference acro
 2.  [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index): a product to easily deploy models to production. Inference is run by Hugging Face in a dedicated, fully managed infrastructure on a cloud provider of your choice.
 3.  Local endpoints: you can also run inference with local inference servers like [llama.cpp](https://github.com/ggerganov/llama.cpp), [Ollama](https://ollama.com/), [vLLM](https://github.com/vllm-project/vllm), [LiteLLM](https://docs.litellm.ai/docs/simple_proxy), or [Text Generation Inference (TGI)](https://github.com/huggingface/text-generation-inference) by connecting the client to these local endpoints.
 
-These services can all be called from the [`InferenceClient`] object. It acts as a replacement for the legacy
-[`InferenceApi`] client, adding specific support for tasks and third-party providers.
-Learn how to migrate to the new client in the [Legacy InferenceAPI client](#legacy-inferenceapi-client) section.
-
 > [!TIP]
 > [`InferenceClient`] is a Python client making HTTP calls to our APIs. If you want to make the HTTP calls directly using
 > your preferred tool (curl, postman,...), please refer to the [Inference Providers](https://huggingface.co/docs/inference-providers/index) documentation
diff --git a/docs/source/en/guides/inference_endpoints.md b/docs/source/en/guides/inference_endpoints.md
index c89c47621a..1a1d64b8a9 100644
--- a/docs/source/en/guides/inference_endpoints.md
+++ b/docs/source/en/guides/inference_endpoints.md
@@ -33,6 +33,16 @@ The first step is to create an Inference Endpoint using [`create_inference_endpo
 ... )
 ```
 
+Or via CLI:
+
+```bash
+hf endpoints deploy my-endpoint-name --repo gpt2 --framework pytorch --accelerator cpu --vendor aws --region us-east-1 --instance-size x2 --instance-type intel-icl --task text-generation
+
+# Deploy from the catalog with a single command
+hf endpoints catalog deploy my-endpoint-name --repo openai/gpt-oss-120b
+```
+
+
 In this example, we created a `protected` Inference Endpoint named `"my-endpoint-name"`, to serve [gpt2](https://huggingface.co/gpt2) for `text-generation`. A `protected` Inference Endpoint means your token is required to access the API. We also need to provide additional information to configure the hardware requirements, such as vendor, region, accelerator, instance type, and size. You can check out the list of available resources [here](https://api.endpoints.huggingface.cloud/#/v2%3A%3Aprovider/list_vendors). Alternatively, you can create an Inference Endpoint manually using the [Web interface](https://ui.endpoints.huggingface.co/new) for convenience. Refer to this [guide](https://huggingface.co/docs/inference-endpoints/guides/advanced) for details on advanced settings and their usage.
 
 The value returned by [`create_inference_endpoint`] is an [`InferenceEndpoint`] object:
@@ -42,6 +52,12 @@ The value returned by [`create_inference_endpoint`] is an [`InferenceEndpoint`]
 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2', status='pending', url=None)
 ```
 
+Or via CLI:
+
+```bash
+hf endpoints describe my-endpoint-name
+```
+
 It's a dataclass that holds information about the endpoint. You can access important attributes such as `name`, `repository`, `status`, `task`, `created_at`, `updated_at`, etc. If you need it, you can also access the raw response from the server with `endpoint.raw`.
 
 Once your Inference Endpoint is created, you can find it on your [personal dashboard](https://ui.endpoints.huggingface.co/).
@@ -101,6 +117,14 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
 [InferenceEndpoint(name='aws-starchat-beta', namespace='huggingface', repository='HuggingFaceH4/starchat-beta', status='paused', url=None), ...]
 ```
 
+Or via CLI: 
+
+```bash
+hf endpoints describe my-endpoint-name
+hf endpoints ls --namespace huggingface
+hf endpoints ls --namespace '*'
+```
+
 ## Check deployment status
 
 In the rest of this guide, we will assume that we have a [`InferenceEndpoint`] object called `endpoint`. You might have noticed that the endpoint has a `status` attribute of type [`InferenceEndpointStatus`]. When the Inference Endpoint is deployed and accessible, the status should be `"running"` and the `url` attribute is set:
@@ -117,6 +141,12 @@ Before reaching a `"running"` state, the Inference Endpoint typically goes throu
 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2', status='pending', url=None)
 ```
 
+Or via CLI:
+
+```bash
+hf endpoints describe my-endpoint-name
+```
+
 Instead of fetching the Inference Endpoint status while waiting for it to run, you can directly call [`~InferenceEndpoint.wait`]. This helper takes as input a `timeout` and a `fetch_every` parameter (in seconds) and will block the thread until the Inference Endpoint is deployed. Default values are respectively `None` (no timeout) and `5` seconds.
 
 ```py
@@ -189,6 +219,14 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
 # Endpoint is not 'running' but still has a URL and will restart on first call.
 ```
 
+Or via CLI:
+
+```bash
+hf endpoints pause my-endpoint-name
+hf endpoints resume my-endpoint-name
+hf endpoints scale-to-zero my-endpoint-name
+```
+
 ### Update model or hardware requirements
 
 In some cases, you might also want to update your Inference Endpoint without creating a new one. You can either update the hosted model or the hardware requirements to run the model. You can do this using [`~InferenceEndpoint.update`]:
@@ -207,6 +245,14 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2
 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None)
 ```
 
+Or via CLI:
+
+```bash
+hf endpoints update my-endpoint-name --repo gpt2-large
+hf endpoints update my-endpoint-name --min-replica 2 --max-replica 6
+hf endpoints update my-endpoint-name --accelerator cpu --instance-size x4 --instance-type intel-icl
+```
+
 ### Delete the endpoint
 
 Finally if you won't use the Inference Endpoint anymore, you can simply call [`~InferenceEndpoint.delete()`].
diff --git a/docs/source/en/guides/integrations.md b/docs/source/en/guides/integrations.md
index e5ac9aaa87..61dace2df4 100644
--- a/docs/source/en/guides/integrations.md
+++ b/docs/source/en/guides/integrations.md
@@ -244,8 +244,6 @@ class PyTorchModelHubMixin(ModelHubMixin):
       revision: str,
       cache_dir: str,
       force_download: bool,
-      proxies: Optional[Dict],
-      resume_download: bool,
       local_files_only: bool,
       token: Union[str, bool, None],
       map_location: str = "cpu", # additional argument
@@ -265,8 +263,6 @@ class PyTorchModelHubMixin(ModelHubMixin):
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
-            proxies=proxies,
-            resume_download=resume_download,
             token=token,
             local_files_only=local_files_only,
             )
@@ -428,11 +424,11 @@ Your framework might have some specificities that you need to address. This guid
 ideas on how to handle integration. In any case, feel free to contact us if you have any questions!
 
 <!-- Generated using https://www.tablesgenerator.com/markdown_tables -->
-| Integration | Using helpers | Using [`ModelHubMixin`] |
-|:---:|:---:|:---:|
-| User experience | `model = load_from_hub(...)`<br>`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)` |
-| Flexibility | Very flexible.<br>You fully control the implementation. | Less flexible.<br>Your framework must have a model class. |
-| Maintenance | More maintenance to add support for configuration, and new features. Might also require fixing issues reported by users. | Less maintenance as most of the interactions with the Hub are implemented in `huggingface_hub`. |
-| Documentation / Type annotation | To be written manually. | Partially handled by `huggingface_hub`. |
-| Download counter | To be handled manually. | Enabled by default if class has a `config` attribute. |
-| Model card | To be handled manually | Generated by default with library_name, tags, etc. |
+|           Integration           |                                                      Using helpers                                                       |                                     Using [`ModelHubMixin`]                                     |
+| :-----------------------------: | :----------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------: |
+|         User experience         |                                `model = load_from_hub(...)`<br>`push_to_hub(model, ...)`                                 |               `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)`                |
+|           Flexibility           |                                 Very flexible.<br>You fully control the implementation.                                  |                    Less flexible.<br>Your framework must have a model class.                    |
+|           Maintenance           | More maintenance to add support for configuration, and new features. Might also require fixing issues reported by users. | Less maintenance as most of the interactions with the Hub are implemented in `huggingface_hub`. |
+| Documentation / Type annotation |                                                 To be written manually.                                                  |                             Partially handled by `huggingface_hub`.                             |
+|        Download counter         |                                                 To be handled manually.                                                  |                      Enabled by default if class has a `config` attribute.                      |
+|           Model card            |                                                  To be handled manually                                                  |                       Generated by default with library_name, tags, etc.                        |
diff --git a/docs/source/en/guides/repository.md b/docs/source/en/guides/repository.md
index 8937b14e5f..2c47100016 100644
--- a/docs/source/en/guides/repository.md
+++ b/docs/source/en/guides/repository.md
@@ -41,6 +41,14 @@ Create an empty repository with [`create_repo`] and give it a name with the `rep
 'https://huggingface.co/lysandre/test-model'
 ```
 
+Or via CLI:
+
+```bash
+>>> hf repo create lysandre/test-model
+Successfully created lysandre/test-model on the Hub.
+Your repo is now available at https://huggingface.co/lysandre/test-model
+```
+
 By default, [`create_repo`] creates a model repository. But you can use the `repo_type` parameter to specify another repository type. For example, if you want to create a dataset repository:
 
 ```py
@@ -49,6 +57,12 @@ By default, [`create_repo`] creates a model repository. But you can use the `rep
 'https://huggingface.co/datasets/lysandre/test-dataset'
 ```
 
+Or via CLI:
+
+```bash
+>>> hf repo create lysandre/test-dataset --repo-type dataset
+```
+
 When you create a repository, you can set your repository visibility with the `private` parameter.
 
 ```py
@@ -56,6 +70,12 @@ When you create a repository, you can set your repository visibility with the `p
 >>> create_repo("lysandre/test-private", private=True)
 ```
 
+Or via CLI:
+
+```bash
+>>> hf repo create lysandre/test-private --private
+```
+
 If you want to change the repository visibility at a later time, you can use the [`update_repo_settings`] function.
 
 > [!TIP]
@@ -71,6 +91,12 @@ Specify the `repo_id` of the repository you want to delete:
 >>> delete_repo(repo_id="lysandre/my-corrupted-dataset", repo_type="dataset")
 ```
 
+Or via CLI:
+
+```bash
+>>> hf repo delete lysandre/my-corrupted-dataset --repo-type dataset
+```
+
 ### Duplicate a repository (only for Spaces)
 
 In some cases, you want to copy someone else's repo to adapt it to your use case.
@@ -111,7 +137,15 @@ You can create new branch and tags using [`create_branch`] and [`create_tag`]:
 >>> create_tag("bigcode/the-stack", repo_type="dataset", revision="v0.1-release", tag="v0.1.1", tag_message="Bump release version.")
 ```
 
-You can use the [`delete_branch`] and [`delete_tag`] functions in the same way to delete a branch or a tag.
+Or via CLI:
+
+```bash
+>>> hf repo branch create Matthijs/speecht5-tts-demo handle-dog-speaker --repo-type space
+>>> hf repo tag create bigcode/the-stack v0.1.1 --repo-type dataset --revision v0.1-release -m "Bump release version."
+```
+
+You can use the [`delete_branch`] and [`delete_tag`] functions in the same way to delete a branch or a tag, or `hf repo branch delete` and `hf repo tag delete` respectively in CLI.
+
 
 ### List all branches and tags
 
@@ -149,6 +183,12 @@ A repository can be public or private. A private repository is only visible to y
 >>> update_repo_settings(repo_id=repo_id, private=True)
 ```
 
+Or via CLI:
+
+```bash
+>>> hf repo settings lysandre/test-private --private true
+```
+
 ### Setup gated access
 
 To give more control over how repos are used, the Hub allows repo authors to enable **access requests** for their repos. User must agree to share their contact information (username and email address) with the repo authors to access the files when enabled. A repo with access requests enabled is called a **gated repo**.
@@ -162,6 +202,12 @@ You can set a repo as gated using [`update_repo_settings`]:
 >>> api.update_repo_settings(repo_id=repo_id, gated="auto")  # Set automatic gating for a model
 ```
 
+Or via CLI:
+
+```bash
+>>> hf repo settings lysandre/test-private --gated auto
+```
+
 ### Rename your repository
 
 You can rename your repository on the Hub using [`move_repo`]. Using this method, you can also move the repo from a user to
@@ -173,81 +219,8 @@ that you should be aware of. For example, you can't transfer your repo to anothe
 >>> move_repo(from_id="Wauplin/cool-model", to_id="huggingface/cool-model")
 ```
 
-## Manage a local copy of your repository
-
-All the actions described above can be done using HTTP requests. However, in some cases you might be interested in having
-a local copy of your repository and interact with it using the Git commands you are familiar with.
-
-The [`Repository`] class allows you to interact with files and repositories on the Hub with functions similar to Git commands. It is a wrapper over Git and Git-LFS methods to use the Git commands you already know and love. Before starting, please make sure you have Git-LFS installed (see [here](https://git-lfs.github.com/) for installation instructions).
-
-> [!WARNING]
-> [`Repository`] is deprecated in favor of the http-based alternatives implemented in [`HfApi`]. Given its large adoption in legacy code, the complete removal of [`Repository`] will only happen in release `v1.0`. For more details, please read [this explanation page](./concepts/git_vs_http).
-
-### Use a local repository
-
-Instantiate a [`Repository`] object with a path to a local repository:
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="<path>/<to>/<folder>")
-```
-
-### Clone
-
-The `clone_from` parameter clones a repository from a Hugging Face repository ID to a local directory specified by the `local_dir` argument:
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
-```
-
-`clone_from` can also clone a repository using a URL:
-
-```py
->>> repo = Repository(local_dir="huggingface-hub", clone_from="https://huggingface.co/facebook/wav2vec2-large-960h-lv60")
-```
-
-You can combine the `clone_from` parameter with [`create_repo`] to create and clone a repository:
-
-```py
->>> repo_url = create_repo(repo_id="repo_name")
->>> repo = Repository(local_dir="repo_local_path", clone_from=repo_url)
-```
-
-You can also configure a Git username and email to a cloned repository by specifying the `git_user` and `git_email` parameters when you clone a repository. When users commit to that repository, Git will be aware of the commit author.
+Or via CLI:
 
-```py
->>> repo = Repository(
-...   "my-dataset",
-...   clone_from="<user>/<dataset_id>",
-...   token=True,
-...   repo_type="dataset",
-...   git_user="MyName",
-...   git_email="me@cool.mail"
-... )
-```
-
-### Branch
-
-Branches are important for collaboration and experimentation without impacting your current files and code. Switch between branches with [`~Repository.git_checkout`]. For example, if you want to switch from `branch1` to `branch2`:
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="huggingface-hub", clone_from="<user>/<dataset_id>", revision='branch1')
->>> repo.git_checkout("branch2")
-```
-
-### Pull
-
-[`~Repository.git_pull`] allows you to update a current local branch with changes from a remote repository:
-
-```py
->>> from huggingface_hub import Repository
->>> repo.git_pull()
-```
-
-Set `rebase=True` if you want your local commits to occur after your branch is updated with the new commits from the remote:
-
-```py
->>> repo.git_pull(rebase=True)
+```bash
+>>> hf repo move Wauplin/cool-model huggingface/cool-model
 ```
diff --git a/docs/source/en/guides/upload.md b/docs/source/en/guides/upload.md
index 24c93fcf6e..6936fbf9b2 100644
--- a/docs/source/en/guides/upload.md
+++ b/docs/source/en/guides/upload.md
@@ -4,12 +4,7 @@ rendered properly in your Markdown viewer.
 
 # Upload files to the Hub
 
-Sharing your files and work is an important aspect of the Hub. The `huggingface_hub` offers several options for uploading your files to the Hub. You can use these functions independently or integrate them into your library, making it more convenient for your users to interact with the Hub. This guide will show you how to push files:
-
-- without using Git.
-- that are very large with [Git LFS](https://git-lfs.github.com/).
-- with the `commit` context manager.
-- with the [`~Repository.push_to_hub`] function.
+Sharing your files and work is an important aspect of the Hub. The `huggingface_hub` offers several options for uploading your files to the Hub. You can use these functions independently or integrate them into your library, making it more convenient for your users to interact with the Hub.
 
 Whenever you want to upload files to the Hub, you need to log in to your Hugging Face account. For more details about authentication, check out [this section](../quick-start#authentication).
 
@@ -465,111 +460,3 @@ update of the object is that **the binary content is removed** from it, meaning
 you don't store another reference to it. This is expected as we don't want to keep in memory the content that is
 already uploaded. Finally we create the commit by passing all the operations to [`create_commit`]. You can pass
 additional operations (add, delete or copy) that have not been processed yet and they will be handled correctly.
-
-## (legacy) Upload files with Git LFS
-
-All the methods described above use the Hub's API to upload files. This is the recommended way to upload files to the Hub.
-However, we also provide [`Repository`], a wrapper around the git tool to manage a local repository.
-
-> [!WARNING]
-> Although [`Repository`] is not formally deprecated, we recommend using the HTTP-based methods described above instead.
-> For more details about this recommendation, please have a look at [this guide](../concepts/git_vs_http) explaining the
-> core differences between HTTP-based and Git-based approaches.
-
-Git LFS automatically handles files larger than 10MB. But for very large files (>5GB), you need to install a custom transfer agent for Git LFS:
-
-```bash
-hf lfs-enable-largefiles
-```
-
-You should install this for each repository that has a very large file. Once installed, you'll be able to push files larger than 5GB.
-
-### commit context manager
-
-The `commit` context manager handles four of the most common Git commands: pull, add, commit, and push. `git-lfs` automatically tracks any file larger than 10MB. In the following example, the `commit` context manager:
-
-1. Pulls from the `text-files` repository.
-2. Adds a change made to `file.txt`.
-3. Commits the change.
-4. Pushes the change to the `text-files` repository.
-
-```python
->>> from huggingface_hub import Repository
->>> with Repository(local_dir="text-files", clone_from="<user>/text-files").commit(commit_message="My first file :)"):
-...     with open("file.txt", "w+") as f:
-...         f.write(json.dumps({"hey": 8}))
-```
-
-Here is another example of how to use the `commit` context manager to save and upload a file to a repository:
-
-```python
->>> import torch
->>> model = torch.nn.Transformer()
->>> with Repository("torch-model", clone_from="<user>/torch-model", token=True).commit(commit_message="My cool model :)"):
-...     torch.save(model.state_dict(), "model.pt")
-```
-
-Set `blocking=False` if you would like to push your commits asynchronously. Non-blocking behavior is helpful when you want to continue running your script while your commits are being pushed.
-
-```python
->>> with repo.commit(commit_message="My cool model :)", blocking=False)
-```
-
-You can check the status of your push with the `command_queue` method:
-
-```python
->>> last_command = repo.command_queue[-1]
->>> last_command.status
-```
-
-Refer to the table below for the possible statuses:
-
-| Status   | Description                          |
-| -------- | ------------------------------------ |
-| -1       | The push is ongoing.                 |
-| 0        | The push has completed successfully. |
-| Non-zero | An error has occurred.               |
-
-When `blocking=False`, commands are tracked, and your script will only exit when all pushes are completed, even if other errors occur in your script. Some additional useful commands for checking the status of a push include:
-
-```python
-# Inspect an error.
->>> last_command.stderr
-
-# Check whether a push is completed or ongoing.
->>> last_command.is_done
-
-# Check whether a push command has errored.
->>> last_command.failed
-```
-
-### push_to_hub
-
-The [`Repository`] class has a [`~Repository.push_to_hub`] function to add files, make a commit, and push them to a repository. Unlike the `commit` context manager, you'll need to pull from a repository first before calling [`~Repository.push_to_hub`].
-
-For example, if you've already cloned a repository from the Hub, then you can initialize the `repo` from the local directory:
-
-```python
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="path/to/local/repo")
-```
-
-Update your local clone with [`~Repository.git_pull`] and then push your file to the Hub:
-
-```py
->>> repo.git_pull()
->>> repo.push_to_hub(commit_message="Commit my-awesome-file to the Hub")
-```
-
-However, if you aren't ready to push a file yet, you can use [`~Repository.git_add`] and [`~Repository.git_commit`] to only add and commit your file:
-
-```py
->>> repo.git_add("path/to/file")
->>> repo.git_commit(commit_message="add my first model config file :)")
-```
-
-When you're ready, push the file to your repository with [`~Repository.git_push`]:
-
-```py
->>> repo.git_push()
-```
diff --git a/docs/source/en/installation.md b/docs/source/en/installation.md
index 9af8a32676..e2c19bb69c 100644
--- a/docs/source/en/installation.md
+++ b/docs/source/en/installation.md
@@ -6,7 +6,7 @@ rendered properly in your Markdown viewer.
 
 Before you start, you will need to setup your environment by installing the appropriate packages.
 
-`huggingface_hub` is tested on **Python 3.8+**.
+`huggingface_hub` is tested on **Python 3.9+**.
 
 ## Install with pip
 
@@ -46,17 +46,13 @@ Some dependencies of `huggingface_hub` are [optional](https://setuptools.pypa.io
 
 You can install optional dependencies via `pip`:
 ```bash
-# Install dependencies for tensorflow-specific features
-# /!\ Warning: this is not equivalent to `pip install tensorflow`
-pip install 'huggingface_hub[tensorflow]'
-
 # Install dependencies for both torch-specific and CLI-specific features.
 pip install 'huggingface_hub[cli,torch]'
 ```
 
 Here is the list of optional dependencies in `huggingface_hub`:
 - `cli`: provide a more convenient CLI interface for `huggingface_hub`.
-- `fastai`, `torch`, `tensorflow`: dependencies to run framework-specific features.
+- `fastai`, `torch`: dependencies to run framework-specific features.
 - `dev`: dependencies to contribute to the lib. Includes `testing` (to run tests), `typing` (to run type checker) and `quality` (to run linters).
 
 
@@ -107,6 +103,22 @@ Python will now look inside the folder you cloned to in addition to the normal l
 For example, if your Python packages are typically installed in `./.venv/lib/python3.13/site-packages/`,
 Python will also search the folder you cloned `./huggingface_hub/`.
 
+## Install the Hugging Face CLI 
+
+Use our one-liner installers to set up the `hf` CLI without touching your Python environment:
+
+On macOS and Linux:
+
+```bash
+curl -LsSf https://hf.co/cli/install.sh | sh
+```
+
+On Windows:
+
+```powershell
+powershell -ExecutionPolicy ByPass -c "irm https://hf.co/cli/install.ps1 | iex"
+```
+
 ## Install with conda
 
 If you are more familiar with it, you can install `huggingface_hub` using the [conda-forge channel](https://anaconda.org/conda-forge/huggingface_hub):
diff --git a/docs/source/en/package_reference/dataclasses.md b/docs/source/en/package_reference/dataclasses.md
index 3691a2b40e..6575881db9 100644
--- a/docs/source/en/package_reference/dataclasses.md
+++ b/docs/source/en/package_reference/dataclasses.md
@@ -188,6 +188,14 @@ The `@strict` decorator enhances a dataclass with strict validation.
 
 [[autodoc]] dataclasses.strict
 
+### `validate_typed_dict`
+
+Method to validate that a dictionary conforms to the types defined in a `TypedDict` class.
+
+This is the equivalent to dataclass validation but for `TypedDict`s. Since typed dicts are never instantiated (only used by static type checkers), validation step must be manually called.
+
+[[autodoc]] dataclasses.validate_typed_dict
+
 ### `as_validated_field`
 
 Decorator to create a [`validated_field`]. Recommended for fields with a single validator to avoid boilerplate code.
diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md
index d0b32fb7e8..249a106454 100644
--- a/docs/source/en/package_reference/environment_variables.md
+++ b/docs/source/en/package_reference/environment_variables.md
@@ -179,7 +179,7 @@ Set to disable using `hf-xet`, even if it is available in your Python environmen
 
 Set to `True` for faster uploads and downloads from the Hub using `hf_transfer`.
 
-By default, `huggingface_hub` uses the Python-based `requests.get` and `requests.post` functions.
+By default, `huggingface_hub` uses the Python-based `httpx.get` and `httpx.post` functions.
 Although these are reliable and versatile,
 they may not be the most efficient choice for machines with high bandwidth.
 [`hf_transfer`](https://github.com/huggingface/hf_transfer) is a Rust-based package developed to
diff --git a/docs/source/en/package_reference/hf_api.md b/docs/source/en/package_reference/hf_api.md
index 99ce0c2905..07b039e02e 100644
--- a/docs/source/en/package_reference/hf_api.md
+++ b/docs/source/en/package_reference/hf_api.md
@@ -45,6 +45,10 @@ models = hf_api.list_models()
 
 [[autodoc]] huggingface_hub.hf_api.DatasetInfo
 
+### DryRunFileInfo
+
+[[autodoc]] huggingface_hub.hf_api.DryRunFileInfo
+
 ### GitRefInfo
 
 [[autodoc]] huggingface_hub.hf_api.GitRefInfo
diff --git a/docs/source/en/package_reference/inference_client.md b/docs/source/en/package_reference/inference_client.md
index eae0edc755..1a92641077 100644
--- a/docs/source/en/package_reference/inference_client.md
+++ b/docs/source/en/package_reference/inference_client.md
@@ -34,16 +34,3 @@ pip install --upgrade huggingface_hub[inference]
 ## InferenceTimeoutError
 
 [[autodoc]] InferenceTimeoutError
-
-## InferenceAPI
-
-[`InferenceAPI`] is the legacy way to call the Inference API. The interface is more simplistic and requires knowing
-the input parameters and output format for each task. It also lacks the ability to connect to other services like
-Inference Endpoints or AWS SageMaker. [`InferenceAPI`] will soon be deprecated so we recommend using [`InferenceClient`]
-whenever possible. Check out [this guide](../guides/inference#legacy-inferenceapi-client) to learn how to switch from
-[`InferenceAPI`] to [`InferenceClient`] in your scripts.
-
-[[autodoc]] InferenceApi
-    - __init__
-    - __call__
-    - all
diff --git a/docs/source/en/package_reference/mixins.md b/docs/source/en/package_reference/mixins.md
index 42c253e710..c725306efe 100644
--- a/docs/source/en/package_reference/mixins.md
+++ b/docs/source/en/package_reference/mixins.md
@@ -21,16 +21,6 @@ how to integrate any ML framework with the Hub.
 
 [[autodoc]] PyTorchModelHubMixin
 
-### Keras
-
-[[autodoc]] KerasModelHubMixin
-
-[[autodoc]] from_pretrained_keras
-
-[[autodoc]] push_to_hub_keras
-
-[[autodoc]] save_pretrained_keras
-
 ### Fastai
 
 [[autodoc]] from_pretrained_fastai
diff --git a/docs/source/en/package_reference/repository.md b/docs/source/en/package_reference/repository.md
deleted file mode 100644
index de7851d6a9..0000000000
--- a/docs/source/en/package_reference/repository.md
+++ /dev/null
@@ -1,51 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# Managing local and online repositories
-
-The `Repository` class is a helper class that wraps `git` and `git-lfs` commands. It provides tooling adapted
-for managing repositories which can be very large.
-
-It is the recommended tool as soon as any `git` operation is involved, or when collaboration will be a point
-of focus with the repository itself.
-
-## The Repository class
-
-[[autodoc]] Repository
-    - __init__
-    - current_branch
-    - all
-
-## Helper methods
-
-[[autodoc]] huggingface_hub.repository.is_git_repo
-
-[[autodoc]] huggingface_hub.repository.is_local_clone
-
-[[autodoc]] huggingface_hub.repository.is_tracked_with_lfs
-
-[[autodoc]] huggingface_hub.repository.is_git_ignored
-
-[[autodoc]] huggingface_hub.repository.files_to_be_staged
-
-[[autodoc]] huggingface_hub.repository.is_tracked_upstream
-
-[[autodoc]] huggingface_hub.repository.commits_to_push
-
-## Following asynchronous commands
-
-The `Repository` utility offers several methods which can be launched asynchronously:
-- `git_push`
-- `git_pull`
-- `push_to_hub`
-- The `commit` context manager
-
-See below for utilities to manage such asynchronous methods.
-
-[[autodoc]] Repository
-    - commands_failed
-    - commands_in_progress
-    - wait_for_commands
-
-[[autodoc]] huggingface_hub.repository.CommandInProgress
diff --git a/docs/source/en/package_reference/serialization.md b/docs/source/en/package_reference/serialization.md
index 0022c72aed..dab6b5d771 100644
--- a/docs/source/en/package_reference/serialization.md
+++ b/docs/source/en/package_reference/serialization.md
@@ -128,11 +128,7 @@ If you want to save a state dictionary (e.g. a mapping between layer names and r
 [[autodoc]] huggingface_hub.save_torch_state_dict
 
 
-The `serialization` module also contains low-level helpers to split a state dictionary into several shards, while creating a proper index in the process. These helpers are available for `torch` and `tensorflow` tensors and are designed to be easily extended to any other ML frameworks.
-
-### split_tf_state_dict_into_shards
-
-[[autodoc]] huggingface_hub.split_tf_state_dict_into_shards
+The `serialization` module also contains low-level helpers to split a state dictionary into several shards, while creating a proper index in the process. These helpers are available for `torch` tensors and are designed to be easily extended to any other ML frameworks.
 
 ### split_torch_state_dict_into_shards
 
@@ -156,7 +152,6 @@ The loading helpers support both single-file and sharded checkpoints in either s
 
 [[autodoc]] huggingface_hub.load_state_dict_from_file
 
-
 ## Tensors helpers
 
 ### get_torch_storage_id
diff --git a/docs/source/en/package_reference/utilities.md b/docs/source/en/package_reference/utilities.md
index 80fe3148ff..2b66c260d1 100644
--- a/docs/source/en/package_reference/utilities.md
+++ b/docs/source/en/package_reference/utilities.md
@@ -120,23 +120,40 @@ You can also enable or disable progress bars for specific groups. This allows yo
 
 [[autodoc]] huggingface_hub.utils.enable_progress_bars
 
-## Configure HTTP backend
+## Configuring the HTTP Backend
 
-In some environments, you might want to configure how HTTP calls are made, for example if you are using a proxy.
-`huggingface_hub` let you configure this globally using [`configure_http_backend`]. All requests made to the Hub will
-then use your settings. Under the hood, `huggingface_hub` uses `requests.Session` so you might want to refer to the
-[`requests` documentation](https://requests.readthedocs.io/en/latest/user/advanced) to learn more about the available
-parameters.
+<Tip>
 
-Since `requests.Session` is not guaranteed to be thread-safe, `huggingface_hub` creates one session instance per thread.
-Using sessions allows us to keep the connection open between HTTP calls and ultimately save time. If you are
-integrating `huggingface_hub` in a third-party library and wants to make a custom call to the Hub, use [`get_session`]
-to get a Session configured by your users (i.e. replace any `requests.get(...)` call by `get_session().get(...)`).
+In `huggingface_hub` v0.x, HTTP requests were handled with `requests`, and configuration was done via `configure_http_backend`. Since we now use `httpx`, configuration works differently: you must provide a factory function that takes no arguments and returns an `httpx.Client`. You can review the [default implementation here](https://github.com/huggingface/huggingface_hub/blob/v1.0-release/src/huggingface_hub/utils/_http.py) to see which parameters are used by default.
 
-[[autodoc]] configure_http_backend
+</Tip>
+
+
+In some setups, you may need to control how HTTP requests are made, for example when working behind a proxy. The `huggingface_hub` library allows you to configure this globally with [`set_client_factory`]. After configuration, all requests to the Hub will use your custom settings. Since `huggingface_hub` relies on `httpx.Client` under the hood, you can check the [`httpx` documentation](https://www.python-httpx.org/advanced/clients/) for details on available parameters.
+
+If you are building a third-party library and need to make direct requests to the Hub, use [`get_session`] to obtain a correctly configured `httpx` client. Replace any direct `httpx.get(...)` calls with `get_session().get(...)` to ensure proper behavior.
+
+[[autodoc]] set_client_factory
 
 [[autodoc]] get_session
 
+In rare cases, you may want to manually close the current session (for example, after a transient `SSLError`). You can do this with [`close_session`]. A new session will automatically be created on the next call to [`get_session`].
+
+Sessions are always closed automatically when the process exits.
+
+[[autodoc]] close_session
+
+For async code, use [`set_async_client_factory`] to configure an `httpx.AsyncClient` and [`get_async_session`] to retrieve one.
+
+[[autodoc]] set_async_client_factory
+
+[[autodoc]] get_async_session
+
+<Tip>
+
+Unlike the synchronous client, the lifecycle of the async client is not managed automatically. Use an async context manager to handle it properly.
+
+</Tip>
 
 ## Handle HTTP errors
 
@@ -177,35 +194,39 @@ Here is a list of HTTP errors thrown in `huggingface_hub`.
 the server response and format the error message to provide as much information to the
 user as possible.
 
-[[autodoc]] huggingface_hub.utils.HfHubHTTPError
+[[autodoc]] huggingface_hub.errors.HfHubHTTPError
 
 #### RepositoryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.RepositoryNotFoundError
+[[autodoc]] huggingface_hub.errors.RepositoryNotFoundError
 
 #### GatedRepoError
 
-[[autodoc]] huggingface_hub.utils.GatedRepoError
+[[autodoc]] huggingface_hub.errors.GatedRepoError
 
 #### RevisionNotFoundError
 
-[[autodoc]] huggingface_hub.utils.RevisionNotFoundError
+[[autodoc]] huggingface_hub.errors.RevisionNotFoundError
+
+#### BadRequestError
+
+[[autodoc]] huggingface_hub.errors.BadRequestError
 
 #### EntryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.EntryNotFoundError
+[[autodoc]] huggingface_hub.errors.EntryNotFoundError
 
-#### BadRequestError
+#### RemoteEntryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.BadRequestError
+[[autodoc]] huggingface_hub.errors.RemoteEntryNotFoundError
 
 #### LocalEntryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.LocalEntryNotFoundError
+[[autodoc]] huggingface_hub.errors.LocalEntryNotFoundError
 
 #### OfflineModeIsEnabled
 
-[[autodoc]] huggingface_hub.utils.OfflineModeIsEnabled
+[[autodoc]] huggingface_hub.errors.OfflineModeIsEnabled
 
 ## Telemetry
 
@@ -251,20 +272,6 @@ huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in rep
 
 >>> my_cool_method(repo_id="other..repo..id")
 huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
-
->>> @validate_hf_hub_args
-... def my_cool_auth_method(token: str):
-...     print(token)
-
->>> my_cool_auth_method(token="a token")
-"a token"
-
->>> my_cool_auth_method(use_auth_token="a use_auth_token")
-"a use_auth_token"
-
->>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token")
-UserWarning: Both `token` and `use_auth_token` are passed (...). `use_auth_token` value will be ignored.
-"a token"
 ```
 
 #### validate_hf_hub_args
@@ -284,8 +291,8 @@ validated.
 
 [[autodoc]] utils.validate_repo_id
 
-#### smoothly_deprecate_use_auth_token
+#### smoothly_deprecate_legacy_arguments
 
 Not exactly a validator, but ran as well.
 
-[[autodoc]] utils.smoothly_deprecate_use_auth_token
+[[autodoc]] utils._validators.smoothly_deprecate_legacy_arguments
diff --git a/docs/source/fr/_toctree.yml b/docs/source/fr/_toctree.yml
index f6c76ff6f5..d9ed776e0a 100644
--- a/docs/source/fr/_toctree.yml
+++ b/docs/source/fr/_toctree.yml
@@ -6,10 +6,6 @@
       title: Démarrage rapide
     - local: installation
       title: Installation
-- title: "Concepts"
-  sections:
-    - local: concepts/git_vs_http
-      title: Git ou HTTP?
 - title: "Guides"
   sections:
     - local: guides/integrations
diff --git a/docs/source/fr/concepts/git_vs_http.md b/docs/source/fr/concepts/git_vs_http.md
deleted file mode 100644
index 678119d48f..0000000000
--- a/docs/source/fr/concepts/git_vs_http.md
+++ /dev/null
@@ -1,64 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# Git ou HTTP?
-
-`huggingface_hub` est une librairie qui permet d'interagir avec le Hugging Face Hub,
-qui est une collection de dépots Git (modèles, datasets ou spaces).
-Il y a deux manières principales pour accéder au Hub en utilisant `huggingface_hub`.
-
-La première approche, basée sur Git, appelée approche "git-based", est rendue possible par la classe [`Repository`].
-Cette méthode utilise un wrapper autour de la commande `git` avec des fonctionnalités supplémentaires conçues pour interagir avec le Hub. La deuxième option, appelée approche "HTTP-based" , consiste à faire des requêtes HTTP en utilisant le client [`HfApi`]. Examinons
-les avantages et les inconvénients de ces deux méthodes.
-
-## Repository: l'approche historique basée sur git
-
-Initialement, `huggingface_hub` était principalement construite autour de la classe [`Repository`]. Elle fournit des
-wrappers Python pour les commandes `git` usuelles, telles que `"git add"`, `"git commit"`, `"git push"`,
-`"git tag"`, `"git checkout"`, etc.
-
-Cette librairie permet aussi de gérer l'authentification et les fichiers volumineux, souvent présents dans les dépôts Git de machine learning. De plus, ses méthodes sont exécutables en arrière-plan, ce qui est utile pour upload des données durant l'entrainement d'un modèle.
-
-L'avantage principal de l'approche [`Repository`] est qu'elle permet de garder une
-copie en local du dépot Git sur votre machine. Cela peut aussi devenir un désavantage,
-car cette copie locale doit être mise à jour et maintenue constamment. C'est une méthode
-analogue au développement de logiciel classique où chaque développeur maintient sa propre copie locale
-et push ses changements lorsqu'il travaille sur une nouvelle fonctionnalité.
-Toutefois, dans le contexte du machine learning la taille des fichiers rend peu pertinente cette approche car
-les utilisateurs ont parfois besoin d'avoir
-uniquement les poids des modèles pour l'inférence ou de convertir ces poids d'un format à un autre sans avoir à cloner
-tout le dépôt.
-
-> [!WARNING]
-> [`Repository`] est maintenant obsolète et remplacée par les alternatives basées sur des requêtes HTTP. Étant donné son adoption massive par les utilisateurs,
-> la suppression complète de [`Repository`] ne sera faite que pour la version `v1.0`.
-
-## HfApi: Un client HTTP plus flexible
-
-La classe [`HfApi`] a été développée afin de fournir une alternative aux dépôts git locaux,
-qui peuvent être encombrant à maintenir, en particulier pour des modèles ou datasets volumineux.
-La classe [`HfApi`]  offre les mêmes fonctionnalités que les approches basées sur Git,
-telles que le téléchargement et le push de fichiers ainsi que la création de branches et de tags, mais sans
-avoir besoin d'un fichier local qui doit être constamment synchronisé.
-
-En plus des fonctionnalités déjà fournies par `git`, La classe [`HfApi`] offre des fonctionnalités
-additionnelles, telles que la capacité à gérer des dépôts, le téléchargement des fichiers
-dans le cache (permettant une réutilisation), la recherche dans le Hub pour trouver
-des dépôts et des métadonnées, l'accès aux fonctionnalités communautaires telles que, les discussions,
-les pull requests et les commentaires.
-
-## Quelle méthode utiliser et quand ?
-
-En général, **l'approche HTTP est la méthode recommandée** pour utiliser `huggingface_hub`
-[`HfApi`] permet de pull et push des changements, de travailler avec les pull requests, les tags et les branches, l'interaction avec les discussions
-et bien plus encore. Depuis la version `0.16`, les méthodes HTTP-based peuvent aussi être exécutées en arrière-plan, ce qui constituait le
-dernier gros avantage  de la classe [`Repository`].
-
-Toutefois, certaines commandes restent indisponibles en utilisant [`HfApi`].
-Peut être que certaines ne le seront jamais, mais nous essayons toujours de réduire le fossé entre ces deux approches.
-Si votre cas d'usage n'est pas couvert, nous serions ravis de vous aider. Pour cela, ouvrez 
-[une issue sur Github](https://github.com/huggingface/huggingface_hub)! Nous écoutons tous les retours nous permettant de construire
-l'écosystème 🤗 avec les utilisateurs et pour les utilisateurs.
-
-Cette préférence pour l'approche basée sur [`HfApi`] plutôt que [`Repository`] ne signifie pas que les dépôts stopperons d'être versionnés avec git sur le Hugging Face Hub. Il sera toujours possible d'utiliser les commandes `git` en local lorsque nécessaire.
\ No newline at end of file
diff --git a/docs/source/fr/guides/integrations.md b/docs/source/fr/guides/integrations.md
index 5a9736667f..20dff4a73f 100644
--- a/docs/source/fr/guides/integrations.md
+++ b/docs/source/fr/guides/integrations.md
@@ -223,8 +223,6 @@ class PyTorchModelHubMixin(ModelHubMixin):
       revision: str,
       cache_dir: str,
       force_download: bool,
-      proxies: Optional[Dict],
-      resume_download: bool,
       local_files_only: bool,
       token: Union[str, bool, None],
       map_location: str = "cpu", # argument supplémentaire
@@ -242,8 +240,6 @@ class PyTorchModelHubMixin(ModelHubMixin):
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
-            proxies=proxies,
-            resume_download=resume_download,
             token=token,
             local_files_only=local_files_only,
          )
@@ -266,9 +262,9 @@ est ici pour vous donner des indications et des idées sur comment gérer l'int
 n'hésitez pas à nous contacter si vous avez une question !
 
 <!-- Généré en utilisant https://www.tablesgenerator.com/markdown_tables -->
-| Intégration | Utilisant des helpers | Utilisant [`ModelHubMixin`] |
-|:---:|:---:|:---:|
-| Expérience utilisateur | `model = load_from_hub(...)`<br>`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)` |
-| Flexible | Très flexible.<br>Vous controllez complètement l'implémentation. | Moins flexible.<br>Votre framework doit avoir une classe de modèle. |
-| Maintenance | Plus de maintenance pour ajouter du support pour la configuration, et de nouvelles fonctionnalités. Peut aussi nécessiter de fixx des problèmes signalés par les utilisateurs.| Moins de maintenance vu que la plupart des intégrations avec le Hub sont implémentés dans `huggingface_hub` |
-| Documentation / Anotation de type| A écrire à la main | Géré partiellement par `huggingface_hub`. |
+|            Intégration            |                                                                             Utilisant des helpers                                                                              |                                         Utilisant [`ModelHubMixin`]                                         |
+| :-------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: |
+|      Expérience utilisateur       |                                                           `model = load_from_hub(...)`<br>`push_to_hub(model, ...)`                                                            |                     `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)`                      |
+|             Flexible              |                                                        Très flexible.<br>Vous controllez complètement l'implémentation.                                                        |                     Moins flexible.<br>Votre framework doit avoir une classe de modèle.                     |
+|            Maintenance            | Plus de maintenance pour ajouter du support pour la configuration, et de nouvelles fonctionnalités. Peut aussi nécessiter de fixx des problèmes signalés par les utilisateurs. | Moins de maintenance vu que la plupart des intégrations avec le Hub sont implémentés dans `huggingface_hub` |
+| Documentation / Anotation de type |                                                                               A écrire à la main                                                                               |                                  Géré partiellement par `huggingface_hub`.                                  |
diff --git a/docs/source/fr/installation.md b/docs/source/fr/installation.md
index eb4b2ee9b4..fe3a279102 100644
--- a/docs/source/fr/installation.md
+++ b/docs/source/fr/installation.md
@@ -7,7 +7,7 @@ rendered properly in your Markdown viewer.
 Avant de commencer, vous allez avoir besoin de préparer votre environnement
 en installant les packages appropriés.
 
-`huggingface_hub` est testée sur **Python 3.8+**.
+`huggingface_hub` est testée sur **Python 3.9+**.
 
 ## Installation avec pip
 
@@ -48,17 +48,13 @@ Toutefois, certaines fonctionnalités de `huggingface_hub` ne seront pas disponi
 
 Vous pouvez installer des dépendances optionnelles via `pip`:
 ```bash
-#Installation des dépendances pour les fonctionnalités spécifiques à Tensorflow.
-#/!\ Attention : cette commande n'est pas équivalente à `pip install tensorflow`.
-pip install 'huggingface_hub[tensorflow]'
-
 #Installation des dépendances spécifiques à Pytorch et au CLI.
 pip install 'huggingface_hub[cli,torch]'
 ```
 
 Voici une liste des dépendances optionnelles dans `huggingface_hub`:
 - `cli` fournit une interface d'invite de commande plus pratique pour `huggingface_hub`.
-- `fastai`, `torch` et `tensorflow` sont des dépendances pour utiliser des fonctionnalités spécifiques à un framework.
+- `fastai`, `torch` sont des dépendances pour utiliser des fonctionnalités spécifiques à un framework.
 - `dev` permet de contribuer à la librairie. Cette dépendance inclut `testing` (pour lancer des tests), `typing` (pour lancer le vérifieur de type) et `quality` (pour lancer des linters).
 
 
diff --git a/docs/source/hi/_toctree.yml b/docs/source/hi/_toctree.yml
index 5b9e412c50..f8b3606536 100644
--- a/docs/source/hi/_toctree.yml
+++ b/docs/source/hi/_toctree.yml
@@ -6,7 +6,3 @@
       title: जल्दी शुरू
     - local: installation
       title: इंस्टालेशन
-- title: "संकल्पना मार्गदर्शिकाएँ"
-  sections:
-    - local: concepts/git_vs_http
-      title: "संकल्पनाएँ/गिट_बनाम_एचटीटीपी"
diff --git a/docs/source/hi/concepts/git_vs_http.md b/docs/source/hi/concepts/git_vs_http.md
deleted file mode 100644
index 8a4e2a625a..0000000000
--- a/docs/source/hi/concepts/git_vs_http.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Git vs HTTP पैराडाइम
-
-`huggingface_hub` लाइब्रेरी Hugging Face Hub के साथ आदान-प्रदान करने के लिए एक लाइब्रेरी है, जो git-आधारित repositories (models, datasets या Spaces) का एक संग्रह है। `huggingface_hub` का उपयोग करके Hub तक पहुंचने के दो मुख्य तरीके हैं।
-
-पहला तरीका, जिसे "git-आधारित" तरीका कहा जाता है, [`Repository`] क्लास द्वारा संचालित है। यह विधि `git` कमांड के चारों ओर एक आवरण का उपयोग करती है जिसमें Hub के साथ आदान-प्रदान करने के लिए विशेष रूप से डिज़ाइन किए गए अतिरिक्त functions हैं। दूसरा विकल्प, जिसे "HTTP-आधारित" तरीका कहा जाता है, [`HfApi`] client का उपयोग करके HTTP requests बनाने में शामिल है। आइए प्रत्येक तरीका के फायदे और नुकसान की जांच करते हैं।
-
-## Repository: ऐतिहासिक git-आधारित तरीका
-
-शुरुआत में, `huggingface_hub` मुख्य रूप से [`Repository`] क्लास के चारों ओर बनाया गया था। यह सामान्य `git` कमांड जैसे `"git add"`, `"git commit"`, `"git push"`, `"git tag"`, `"git checkout"`, आदि के लिए Python wrappers प्रदान करता है।
-
-लाइब्रेरी विवरण सेट करने और बड़ी फाइलों को track करने में भी मदद करती है, जो अक्सर machine learning repositories में उपयोग की जाती हैं। इसके अतिरिक्त, लाइब्रेरी आपको अपनी विधियों को पृष्ठभूमि में कार्यान्वित करने की अनुमति देती है, जो training के दौरान डेटा अपलोड करने के लिए उपयोगी है।
-
-[`Repository`] का उपयोग करने का मुख्य फायदा यह है कि यह आपको अपनी मशीन पर संपूर्ण repository की एक local copy बनाए रखने की अनुमति देता है। यह एक नुकसान भी हो सकता है क्योंकि इसके लिए आपको इस local copy को लगातार update और maintain करना होता है। यह पारंपरिक software development के समान है जहां प्रत्येक developer अपनी स्वयं की local copy maintain करता है और feature पर काम करते समय changes push करता है। हालांकि, machine learning के संदर्भ में, यह हमेशा आवश्यक नहीं हो सकता क्योंकि users को केवल inference के लिए weights download करने या weights को एक format से दूसरे में convert करने की आवश्यकता हो सकती है, बिना पूरी repository को clone करने की आवश्यकता के।
-
-> [!WARNING]
-> [`Repository`] अब http-आधारित विकल्पों के पक्ष में deprecated है। legacy code में इसकी बड़ी अपनाई जाने के कारण, [`Repository`] का पूर्ण removal केवल `v1.0` release में होगा।
-
-## HfApi: एक लचीला और सुविधाजनक HTTP client
-
-[`HfApi`] क्लास को local git repositories का एक विकल्प प्रदान करने के लिए विकसित किया गया था, जो maintain करना मुश्किल हो सकता है, विशेष रूप से बड़े models या datasets के साथ व्यवहार करते समय। [`HfApi`] क्लास git-आधारित तरीकाों की समान functionality प्रदान करती है, जैसे files download और push करना और branches तथा tags बनाना, लेकिन एक local folder की आवश्यकता के बिना जिसे sync में रखना पड़ता है।
-
-`git` द्वारा पहले से प्रदान की गई functionalities के अलावा, [`HfApi`] क्लास अतिरिक्त features प्रदान करती है, जैसे repos manage करने की क्षमता, efficient reuse के लिए caching का उपयोग करके files download करना, repos और metadata के लिए Hub को search करना, discussions, PRs, और comments जैसी community features तक पहुंच, और Spaces hardware और secrets को configure करना।
-
-## मुझे क्या उपयोग करना चाहिए? और कब?
-
-कुल मिलाकर, **HTTP-आधारित तरीका सभी cases में** `huggingface_hub` का उपयोग करने का **अनुशंसित तरीका है**। [`HfApi`] changes को pull और push करने, PRs, tags और branches के साथ काम करने, discussions के साथ interact करने और बहुत कुछ करने की अनुमति देता है। `0.16` release के बाद से, http-आधारित methods भी पृष्ठभूमि में चल सकती हैं, जो [`Repository`] क्लास का अंतिम प्रमुख फायदा था।
-
-हालांकि, सभी git commands [`HfApi`] के माध्यम से उपलब्ध नहीं हैं। कुछ को कभी भी implement नहीं किया जा सकता है, लेकिन हम हमेशा सुधार करने और gap को बंद करने की कोशिश कर रहे हैं। यदि आपको अपना use case covered नहीं दिखता है, तो कृपया [Github पर एक issue खोलें](https://github.com/huggingface/huggingface_hub)! हम अपने users के साथ और उनके लिए 🤗 ecosystem बनाने में मदद करने के लिए feedback का स्वागत करते हैं।
-
-git-आधारित [`Repository`] पर http-आधारित [`HfApi`] की यह प्राथमिकता का मतलब यह नहीं है कि git versioning Hugging Face Hub से जल्द ही गायब हो जाएगी। workflows में जहां यह समझ में आता है, वहां `git` commands का locally उपयोग करना हमेशा संभव होगा।
\ No newline at end of file
diff --git a/docs/source/hi/installation.md b/docs/source/hi/installation.md
index 1659e85fd7..91d3702059 100644
--- a/docs/source/hi/installation.md
+++ b/docs/source/hi/installation.md
@@ -6,7 +6,7 @@ rendered properly in your Markdown viewer.
 
 आरंभ करने से पहले, आपको उपयुक्त पैकेज स्थापित करके अपना परिवेश सेटअप करना होगा।
 
-`huggingface_hub` का परीक्षण **Python 3.8+** पर किया गया है।
+`huggingface_hub` का परीक्षण **Python 3.9+** पर किया गया है।
 
 ## पिप के साथ स्थापित करें
 
@@ -46,17 +46,13 @@ pip install --upgrade huggingface_hub
 
 आप `pip` के माध्यम से वैकल्पिक निर्भरताएँ स्थापित कर सकते हैं:
 ```bash
-# Install dependencies for tensorflow-specific features
-# /!\ Warning: this is not equivalent to `pip install tensorflow`
-pip install 'huggingface_hub[tensorflow]'
-
 # Install dependencies for both torch-specific and CLI-specific features.
 pip install 'huggingface_hub[cli,torch]'
 ```
 
 यहां `huggingface_hub` में वैकल्पिक निर्भरताओं की सूची दी गई है:
 - `cli`: `huggingface_hub` के लिए अधिक सुविधाजनक CLI इंटरफ़ेस प्रदान करें।
-- `fastai`, `torch`, `tensorflow`: फ्रेमवर्क-विशिष्ट सुविधाओं को चलाने के लिए निर्भरताएँ।
+- `fastai`, `torch`: फ्रेमवर्क-विशिष्ट सुविधाओं को चलाने के लिए निर्भरताएँ।
 - `dev`: lib में योगदान करने के लिए निर्भरताएँ। इसमें 'परीक्षण' (परीक्षण चलाने के लिए), 'टाइपिंग' (टाइप चेकर चलाने के लिए) और 'गुणवत्ता' (लिंटर चलाने के लिए) शामिल हैं।
 
 
diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml
index 2c7a4da702..e67d69af38 100644
--- a/docs/source/ko/_toctree.yml
+++ b/docs/source/ko/_toctree.yml
@@ -18,8 +18,6 @@
       title: 명령줄 인터페이스(CLI) 사용하기
     - local: guides/hf_file_system
       title: Hf파일시스템
-    - local: guides/repository
-      title: 리포지토리
     - local: guides/search
       title: Hub에서 검색하기
     - local: guides/inference
@@ -40,10 +38,6 @@
       title: 라이브러리 통합
     - local: guides/webhooks_server
       title: 웹훅 서버
-- title: "개념 가이드"
-  sections:
-    - local: concepts/git_vs_http
-      title: Git 대 HTTP 패러다임
 - title: "라이브러리 레퍼런스"
   sections:
     - local: package_reference/overview
@@ -52,8 +46,6 @@
       title: 로그인 및 로그아웃
     - local: package_reference/environment_variables
       title: 환경 변수
-    - local: package_reference/repository
-      title: 로컬 및 온라인 리포지토리 관리
     - local: package_reference/hf_api
       title: 허깅페이스 Hub API
     - local: package_reference/file_download
diff --git a/docs/source/ko/concepts/git_vs_http.md b/docs/source/ko/concepts/git_vs_http.md
deleted file mode 100644
index c9812cb0f8..0000000000
--- a/docs/source/ko/concepts/git_vs_http.md
+++ /dev/null
@@ -1,50 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# Git 대 HTTP 패러다임
-
-`huggingface_hub` 라이브러리는 git 기반의 저장소(Models, Datasets 또는 Spaces)로 구성된 Hugging Face Hub과 상호 작용하기 위한 라이브러리입니다.
-`huggingface_hub`를 사용하여 Hub에 접근하는 방법은 크게 두 가지입니다.
-
-첫 번째 접근 방식인 소위 "git 기반" 접근 방식은 [`Repository`] 클래스가 주도합니다.
-이 방법은 허브와 상호 작용하도록 특별히 설계된 추가 기능이 있는 `git` 명령에 랩퍼를 사용합니다.
-두 번째 방법은 "HTTP 기반" 접근 방식이며, [`HfApi`] 클라이언트를 사용하여 HTTP 요청을 수행합니다.
-각 방법의 장단점을 살펴보겠습니다.
-
-## Repository: 역사적인 Git 기반 접근 방식
-
-먼저, `huggingface_hub`는 주로 [`Repository`] 클래스를 기반으로 구축되었습니다.
-이 클래스는 `"git add"`, `"git commit"`, `"git push"`, `"git tag"`, `"git checkout"` 등과 같은 일반적인 `git` 명령에 대한 Python 랩퍼를 제공합니다.
-
-이 라이브러리는 머신러닝 저장소에서 자주 사용되는 큰 파일을 추적하고 자격 증명을 설정하는 데 도움이 됩니다.
-또한, 이 라이브러리는 백그라운드에서 메소드를 실행할 수 있어, 훈련 중에 데이터를 업로드할 때 유용합니다.
-
-로컬 머신에 전체 저장소의 로컬 복사본을 유지할 수 있다는 것은 [`Repository`]를 사용하는 가장 큰 장점입니다.
-하지만 동시에 로컬 복사본을 지속적으로 업데이트하고 유지해야 한다는 단점이 될 수도 있습니다.
-이는 각 개발자가 자체 로컬 복사본을 유지하고 기능을 개발할 때 변경 사항을 push하는 전통적인 소프트웨어 개발과 유사합니다.
-그러나 머신러닝의 경우, 사용자가 전체 저장소를 복제할 필요 없이 추론을 위해 가중치만 다운로드하거나 가중치를 한 형식에서 다른 형식으로 변환하기만 하면 되기 때문에 이런 방식이 항상 필요한 것은 아닙니다.
-
-> [!WARNING]
-> [`Repository`]는 지원이 중단될 예정이므로 HTTP 기반 대안을 사용하는 것을 권장합니다. 기존 코드에서 널리 사용되기 때문에 [`Repository`]의 완전한 제거는 릴리스 `v1.0`에서 이루어질 예정입니다.
-
-## HfApi: 유연하고 편리한 HTTP 클라이언트
-
-[`HfApi`] 클래스는 특히 큰 모델이나 데이터셋을 처리할 때 유지하기 어려운 로컬 git 저장소의 대안으로 개발되었습니다.
-[`HfApi`] 클래스는 파일 다운로드 및 push, 브랜치 및 태그 생성과 같은 git 기반 접근 방식과 동일한 기능을 제공하지만, 동기화 상태를 유지해야 하는 로컬 폴더가 필요하지 않습니다.
-
-[`HfApi`] 클래스는 `git`이 제공하는 기능 외에도 추가적인 기능을 제공합니다.
-저장소를 관리하고, 효율적인 재사용을 위해 캐싱을 사용하여 파일을 다운로드하고, Hub에서 저장소 및 메타데이터를 검색하고, 토론, PR 및 코멘트와 같은 커뮤니티 기능에 접근하고, Spaces 하드웨어 및 시크릿을 구성할 수 있습니다.
-
-## 무엇을 사용해야 하나요? 언제 사용하나요?
-
-전반적으로, **HTTP 기반 접근 방식은 모든 경우에** `huggingface_hub`를 사용하는 것이 좋습니다.
-[`HfApi`]를 사용하면 변경 사항을 pull하고 push하고, PR, 태그 및 브랜치로 작업하고, 토론과 상호 작용하는 등의 작업을 할 수 있습니다.
-`0.16` 릴리스부터는 [`Repository`] 클래스의 마지막 주요 장점이었던 http 기반 메소드도 백그라운드에서 실행할 수 있습니다.
-
-그러나 모든 git 명령이 [`HfApi`]를 통해 사용 가능한 것은 아닙니다. 일부는 구현되지 않을 수도 있지만, 저희는 항상 개선하고 격차를 줄이기 위해 노력하고 있습니다.
-사용 사례에 해당되지 않는 경우, [Github에서 이슈](https://github.com/huggingface/huggingface_hub)를 개설해 주세요!
-사용자와 함께, 사용자를 위한 🤗 생태계를 구축하는 데 도움이 되는 피드백을 환영합니다.
-
-git 기반 [`Repository`]보다 http 기반 [`HfApi`]를 선호한다고 해서 Hugging Face Hub에서 git 버전 관리가 바로 사라지는 것은 아닙니다.
-워크플로우 상 합당하다면 언제든 로컬에서 `git` 명령을 사용할 수 있습니다.
diff --git a/docs/source/ko/guides/cli.md b/docs/source/ko/guides/cli.md
index a8096f948b..af88b1ac99 100644
--- a/docs/source/ko/guides/cli.md
+++ b/docs/source/ko/guides/cli.md
@@ -448,7 +448,6 @@ Copy-and-paste the text below in your GitHub issue.
 - Who am I ?: Wauplin
 - Configured git credential helpers: store
 - FastAI: N/A
-- Tensorflow: 2.11.0
 - Torch: 1.12.1
 - Jinja2: 3.1.2
 - Graphviz: 0.20.1
diff --git a/docs/source/ko/guides/inference.md b/docs/source/ko/guides/inference.md
index 55a89a7801..7444e2b306 100644
--- a/docs/source/ko/guides/inference.md
+++ b/docs/source/ko/guides/inference.md
@@ -8,7 +8,6 @@ rendered properly in your Markdown viewer.
 - [추론 API](https://huggingface.co/docs/api-inference/index): Hugging Face의 인프라에서 가속화된 추론을 실행할 수 있는 서비스로 무료로 제공됩니다. 이 서비스는 추론을 시작하고 다양한 모델을 테스트하며 AI 제품의 프로토타입을 만드는 빠른 방법입니다.
 - [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index): 모델을 제품 환경에 쉽게 배포할 수 있는 제품입니다. 사용자가 선택한 클라우드 환경에서 완전 관리되는 전용 인프라에서 Hugging Face를 통해 추론이 실행됩니다.
 
-이러한 서비스들은 [`InferenceClient`] 객체를 사용하여 호출할 수 있습니다. 이는 이전의 [`InferenceApi`] 클라이언트를 대체하는 역할을 하며, 작업에 대한 특별한 지원을 추가하고 [추론 API](https://huggingface.co/docs/api-inference/index) 및 [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index)에서 추론 작업을 처리합니다. 새 클라이언트로의 마이그레이션에 대한 자세한 내용은 [레거시 InferenceAPI 클라이언트](#legacy-inferenceapi-client) 섹션을 참조하세요.
 
 > [!TIP]
 > [`InferenceClient`]는 API에 HTTP 호출을 수행하는 Python 클라이언트입니다. HTTP 호출을 원하는 툴을 이용하여 직접 사용하려면 (curl, postman 등) [추론 API](https://huggingface.co/docs/api-inference/index) 또는 [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index) 문서 페이지를 참조하세요.
@@ -77,35 +76,35 @@ text-to-image 작업을 시작해보겠습니다.
 
 [`InferenceClient`]의 목표는 Hugging Face 모델에서 추론을 실행하기 위한 가장 쉬운 인터페이스를 제공하는 것입니다. 이는 가장 일반적인 작업들을 지원하는 간단한 API를 가지고 있습니다. 현재 지원되는 작업 목록은 다음과 같습니다:
 
-| 도메인 | 작업                           | 지원 여부    | 문서                             |
-|--------|--------------------------------|--------------|------------------------------------|
-| 오디오 | [오디오 분류](https://huggingface.co/tasks/audio-classification)           | ✅ | [`~InferenceClient.audio_classification`] |
-| 오디오 | [오디오 투 오디오](https://huggingface.co/tasks/audio-to-audio)           | ✅ | [`~InferenceClient.audio_to_audio`] |
-| | [자동 음성 인식](https://huggingface.co/tasks/automatic-speech-recognition)   | ✅ | [`~InferenceClient.automatic_speech_recognition`] |
-| | [텍스트 투 스피치](https://huggingface.co/tasks/text-to-speech)                 | ✅ | [`~InferenceClient.text_to_speech`] |
-| 컴퓨터 비전 | [이미지 분류](https://huggingface.co/tasks/image-classification)           | ✅ | [`~InferenceClient.image_classification`] |
-| | [이미지 분할](https://huggingface.co/tasks/image-segmentation)             | ✅ | [`~InferenceClient.image_segmentation`] |
-| | [이미지 투 이미지](https://huggingface.co/tasks/image-to-image)                 | ✅ | [`~InferenceClient.image_to_image`] |
-| | [이미지 투 텍스트](https://huggingface.co/tasks/image-to-text)                  | ✅ | [`~InferenceClient.image_to_text`] |
-| | [객체 탐지](https://huggingface.co/tasks/object-detection)            | ✅ | [`~InferenceClient.object_detection`] |
-| | [텍스트 투 이미지](https://huggingface.co/tasks/text-to-image)                  | ✅ | [`~InferenceClient.text_to_image`] |
-| | [제로샷 이미지 분류](https://huggingface.co/tasks/zero-shot-image-classification)                  | ✅ | [`~InferenceClient.zero_shot_image_classification`] |
-| 멀티모달 | [문서 질의 응답](https://huggingface.co/tasks/document-question-answering) | ✅ | [`~InferenceClient.document_question_answering`] |
-| | [시각적 질의 응답](https://huggingface.co/tasks/visual-question-answering)      | ✅ | [`~InferenceClient.visual_question_answering`] |
-| 자연어 처리 | [대화형](https://huggingface.co/tasks/conversational)                 | ✅ | [`~InferenceClient.conversational`] |
-| | [특성 추출](https://huggingface.co/tasks/feature-extraction)             | ✅ | [`~InferenceClient.feature_extraction`] |
-| | [마스크 채우기](https://huggingface.co/tasks/fill-mask)                      | ✅ | [`~InferenceClient.fill_mask`] |
-| | [질의 응답](https://huggingface.co/tasks/question-answering)             | ✅ | [`~InferenceClient.question_answering`] |
-| | [문장 유사도](https://huggingface.co/tasks/sentence-similarity)            | ✅ | [`~InferenceClient.sentence_similarity`] |
-| | [요약](https://huggingface.co/tasks/summarization)                  | ✅ | [`~InferenceClient.summarization`] |
-| | [테이블 질의 응답](https://huggingface.co/tasks/table-question-answering)       | ✅ | [`~InferenceClient.table_question_answering`] |
-| | [텍스트 분류](https://huggingface.co/tasks/text-classification)            | ✅ | [`~InferenceClient.text_classification`] |
-| | [텍스트 생성](https://huggingface.co/tasks/text-generation)   | ✅ | [`~InferenceClient.text_generation`] |
-| | [토큰 분류](https://huggingface.co/tasks/token-classification)           | ✅ | [`~InferenceClient.token_classification`] |
-| | [번역](https://huggingface.co/tasks/translation)       | ✅ | [`~InferenceClient.translation`] |
-| | [제로샷 분류](https://huggingface.co/tasks/zero-shot-classification)       | ✅ | [`~InferenceClient.zero_shot_classification`] |
-| 타블로 | [타블로 작업 분류](https://huggingface.co/tasks/tabular-classification)         | ✅ | [`~InferenceClient.tabular_classification`] |
-| | [타블로 회귀](https://huggingface.co/tasks/tabular-regression)             | ✅ | [`~InferenceClient.tabular_regression`] |
+| 도메인      | 작업                                                                              | 지원 여부 | 문서                                                |
+| ----------- | --------------------------------------------------------------------------------- | --------- | --------------------------------------------------- |
+| 오디오      | [오디오 분류](https://huggingface.co/tasks/audio-classification)                  | ✅         | [`~InferenceClient.audio_classification`]           |
+| 오디오      | [오디오 투 오디오](https://huggingface.co/tasks/audio-to-audio)                   | ✅         | [`~InferenceClient.audio_to_audio`]                 |
+|             | [자동 음성 인식](https://huggingface.co/tasks/automatic-speech-recognition)       | ✅         | [`~InferenceClient.automatic_speech_recognition`]   |
+|             | [텍스트 투 스피치](https://huggingface.co/tasks/text-to-speech)                   | ✅         | [`~InferenceClient.text_to_speech`]                 |
+| 컴퓨터 비전 | [이미지 분류](https://huggingface.co/tasks/image-classification)                  | ✅         | [`~InferenceClient.image_classification`]           |
+|             | [이미지 분할](https://huggingface.co/tasks/image-segmentation)                    | ✅         | [`~InferenceClient.image_segmentation`]             |
+|             | [이미지 투 이미지](https://huggingface.co/tasks/image-to-image)                   | ✅         | [`~InferenceClient.image_to_image`]                 |
+|             | [이미지 투 텍스트](https://huggingface.co/tasks/image-to-text)                    | ✅         | [`~InferenceClient.image_to_text`]                  |
+|             | [객체 탐지](https://huggingface.co/tasks/object-detection)                        | ✅         | [`~InferenceClient.object_detection`]               |
+|             | [텍스트 투 이미지](https://huggingface.co/tasks/text-to-image)                    | ✅         | [`~InferenceClient.text_to_image`]                  |
+|             | [제로샷 이미지 분류](https://huggingface.co/tasks/zero-shot-image-classification) | ✅         | [`~InferenceClient.zero_shot_image_classification`] |
+| 멀티모달    | [문서 질의 응답](https://huggingface.co/tasks/document-question-answering)        | ✅         | [`~InferenceClient.document_question_answering`]    |
+|             | [시각적 질의 응답](https://huggingface.co/tasks/visual-question-answering)        | ✅         | [`~InferenceClient.visual_question_answering`]      |
+| 자연어 처리 | [대화형](https://huggingface.co/tasks/conversational)                             | ✅         | [`~InferenceClient.conversational`]                 |
+|             | [특성 추출](https://huggingface.co/tasks/feature-extraction)                      | ✅         | [`~InferenceClient.feature_extraction`]             |
+|             | [마스크 채우기](https://huggingface.co/tasks/fill-mask)                           | ✅         | [`~InferenceClient.fill_mask`]                      |
+|             | [질의 응답](https://huggingface.co/tasks/question-answering)                      | ✅         | [`~InferenceClient.question_answering`]             |
+|             | [문장 유사도](https://huggingface.co/tasks/sentence-similarity)                   | ✅         | [`~InferenceClient.sentence_similarity`]            |
+|             | [요약](https://huggingface.co/tasks/summarization)                                | ✅         | [`~InferenceClient.summarization`]                  |
+|             | [테이블 질의 응답](https://huggingface.co/tasks/table-question-answering)         | ✅         | [`~InferenceClient.table_question_answering`]       |
+|             | [텍스트 분류](https://huggingface.co/tasks/text-classification)                   | ✅         | [`~InferenceClient.text_classification`]            |
+|             | [텍스트 생성](https://huggingface.co/tasks/text-generation)                       | ✅         | [`~InferenceClient.text_generation`]                |
+|             | [토큰 분류](https://huggingface.co/tasks/token-classification)                    | ✅         | [`~InferenceClient.token_classification`]           |
+|             | [번역](https://huggingface.co/tasks/translation)                                  | ✅         | [`~InferenceClient.translation`]                    |
+|             | [제로샷 분류](https://huggingface.co/tasks/zero-shot-classification)              | ✅         | [`~InferenceClient.zero_shot_classification`]       |
+| 타블로      | [타블로 작업 분류](https://huggingface.co/tasks/tabular-classification)           | ✅         | [`~InferenceClient.tabular_classification`]         |
+|             | [타블로 회귀](https://huggingface.co/tasks/tabular-regression)                    | ✅         | [`~InferenceClient.tabular_regression`]             |
 
 > [!TIP]
 > 각 작업에 대해 더 자세히 알고 싶거나 사용 방법 및 각 작업에 대한 가장 인기 있는 모델을 알아보려면 [Tasks](https://huggingface.co/tasks) 페이지를 확인하세요.
@@ -175,70 +174,3 @@ pip install --upgrade huggingface_hub[inference]
 >>> client.image_classification("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg")
 [{'score': 0.9779096841812134, 'label': 'Blenheim spaniel'}, ...]
 ```
-
-## 레거시 InferenceAPI 클라이언트[[legacy-inferenceapi-client]]
-
-[`InferenceClient`]는 레거시 [`InferenceApi`] 클라이언트를 대체하여 작동합니다. 특정 작업에 대한 지원을 제공하고 [추론 API](https://huggingface.co/docs/api-inference/index) 및 [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index)에서 추론을 처리합니다.
-
-아래는 [`InferenceApi`]에서 [`InferenceClient`]로 마이그레이션하는 데 도움이 되는 간단한 가이드입니다.
-
-### 초기화[[initialization]]
-
-변경 전:
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="bert-base-uncased", token=API_TOKEN)
-```
-
-변경 후:
-
-```python
->>> from huggingface_hub import InferenceClient
->>> inference = InferenceClient(model="bert-base-uncased", token=API_TOKEN)
-```
-
-### 특정 작업에서 실행하기[[run-on-a-specific-task]]
-
-변경 전:
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="paraphrase-xlm-r-multilingual-v1", task="feature-extraction")
->>> inference(...)
-```
-
-변경 후:
-
-```python
->>> from huggingface_hub import InferenceClient
->>> inference = InferenceClient()
->>> inference.feature_extraction(..., model="paraphrase-xlm-r-multilingual-v1")
-```
-
-> [!TIP]
-> 위의 방법은 코드를 [`InferenceClient`]에 맞게 조정하는 권장 방법입니다. 이렇게 하면 `feature_extraction`과 같이 작업에 특화된 메소드를 활용할 수 있습니다.
-
-### 사용자 정의 요청 실행[[run-custom-request]]
-
-변경 전:
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="bert-base-uncased")
->>> inference(inputs="The goal of life is [MASK].")
-[{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
-```
-
-### 매개변수와 함께 실행하기[[run-with-parameters]]
-
-변경 전:
-
-```python
->>> from huggingface_hub import InferenceApi
->>> inference = InferenceApi(repo_id="typeform/distilbert-base-uncased-mnli")
->>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
->>> params = {"candidate_labels":["refund", "legal", "faq"]}
->>> inference(inputs, params)
-{'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
-```
diff --git a/docs/source/ko/guides/integrations.md b/docs/source/ko/guides/integrations.md
index f0946bc298..b595a3c630 100644
--- a/docs/source/ko/guides/integrations.md
+++ b/docs/source/ko/guides/integrations.md
@@ -81,7 +81,7 @@ def push_to_hub(model: MyModelClass, repo_name: str) -> None:
 - `token`: 개인 리포지토리에서 다운로드하기 위한 토큰
 - `revision`: 특정 브랜치에서 다운로드하기 위한 리비전
 - `cache_dir`: 특정 디렉터리에 파일을 캐시하기 위한 디렉터리
-- `force_download`/`resume_download`/`local_files_only`: 캐시를 재사용할 것인지 여부를 결정하는 매개변수
+- `force_download`/`local_files_only`: 캐시를 재사용할 것인지 여부를 결정하는 매개변수
 - `proxies`: HTTP 세션 구성
 
 모델을 푸시할 때는 유사한 매개변수가 지원됩니다:
@@ -211,8 +211,7 @@ class PyTorchModelHubMixin(ModelHubMixin):
       revision: str,
       cache_dir: str,
       force_download: bool,
-      proxies: Optional[Dict],
-      resume_download: bool,
+      proxies: Optional[dict],
       local_files_only: bool,
       token: Union[str, bool, None],
       map_location: str = "cpu", # 추가 인자
@@ -232,8 +231,6 @@ class PyTorchModelHubMixin(ModelHubMixin):
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
-            proxies=proxies,
-            resume_download=resume_download,
             token=token,
             local_files_only=local_files_only,
             )
@@ -393,11 +390,11 @@ class VoiceCraft(
 두 가지 접근 방법에 대한 장단점을 간단히 정리해보겠습니다. 아래 표는 단순히 예시일 뿐입니다. 각자 다른 프레임워크에는 고려해야 할 특정 사항이 있을 수 있습니다. 이 가이드는 통합을 다루는 아이디어와 지침을 제공하기 위한 것입니다. 언제든지 궁금한 점이 있으면 문의해 주세요!
 
 <!-- Generated using https://www.tablesgenerator.com/markdown_tables -->
-| 통합 | helpers 사용 시 | [`ModelHubMixin`] 사용 시 |
-|:---:|:---:|:---:|
-| 사용자 경험 | `model = load_from_hub(...)`<br>`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)` |
-| 유연성 | 매우 유연합니다.<br>구현을 완전히 제어합니다. | 유연성이 떨어집니다.<br>프레임워크에는 모델 클래스가 있어야 합니다. |
-| 유지 관리 | 구성 및 새로운 기능에 대한 지원을 추가하기 위한 유지 관리가 더 필요합니다. 사용자가 보고한 문제를 해결해야할 수도 있습니다. | Hub와의 대부분의 상호 작용이 `huggingface_hub`에서 구현되므로 유지 관리가 줄어듭니다. |
-| 문서화 / 타입 주석 | 수동으로 작성해야 합니다. | `huggingface_hub`에서 부분적으로 처리됩니다. |
-| 다운로드 횟수 표시기 | 수동으로 처리해야 합니다. | 클래스에 `config` 속성이 있다면 기본적으로 활성화됩니다. |
-| 모델 카드 | 수동으로 처리해야 합니다. | library_name, tags 등을 활용하여 기본적으로 생성됩니다. |
+|         통합         |                                                       helpers 사용 시                                                       |                               [`ModelHubMixin`] 사용 시                               |
+| :------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------: |
+|     사용자 경험      |                                  `model = load_from_hub(...)`<br>`push_to_hub(model, ...)`                                  |          `model = MyModel.from_pretrained(...)`<br>`model.push_to_hub(...)`           |
+|        유연성        |                                        매우 유연합니다.<br>구현을 완전히 제어합니다.                                        |          유연성이 떨어집니다.<br>프레임워크에는 모델 클래스가 있어야 합니다.          |
+|      유지 관리       | 구성 및 새로운 기능에 대한 지원을 추가하기 위한 유지 관리가 더 필요합니다. 사용자가 보고한 문제를 해결해야할 수도 있습니다. | Hub와의 대부분의 상호 작용이 `huggingface_hub`에서 구현되므로 유지 관리가 줄어듭니다. |
+|  문서화 / 타입 주석  |                                                  수동으로 작성해야 합니다.                                                  |                     `huggingface_hub`에서 부분적으로 처리됩니다.                      |
+| 다운로드 횟수 표시기 |                                                  수동으로 처리해야 합니다.                                                  |               클래스에 `config` 속성이 있다면 기본적으로 활성화됩니다.                |
+|      모델 카드       |                                                  수동으로 처리해야 합니다.                                                  |                library_name, tags 등을 활용하여 기본적으로 생성됩니다.                |
diff --git a/docs/source/ko/guides/repository.md b/docs/source/ko/guides/repository.md
deleted file mode 100644
index 343dec799a..0000000000
--- a/docs/source/ko/guides/repository.md
+++ /dev/null
@@ -1,217 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# 리포지토리 생성과 관리[[create-and-manage-a-repository]]
-
-Hugging Face Hub는 Git 리포지토리 모음입니다. [Git](https://git-scm.com/)은 협업을 할 때 여러 프로젝트 버전을 쉽게 관리하기 위해 널리 사용되는 소프트웨어 개발 도구입니다. 이 가이드에서는 Hub의 리포지토리 사용법인 다음 내용을 다룹니다:
-
-- 리포지토리 생성과 삭제.
-- 태그 및 브랜치 관리.
-- 리포지토리 이름 변경.
-- 리포지토리 공개 여부.
-- 리포지토리 복사본 관리.
-
-> [!WARNING]
-> GitLab/GitHub/Bitbucket과 같은 플랫폼을 사용해 본 경험이 있다면, 모델 리포지토리를 관리하기 위해 `git` CLI를 사용해 git 리포지토리를 클론(`git clone`)하고 변경 사항을 커밋(`git add, git commit`)하고 커밋한 내용을 푸시(`git push`) 하는것이 가장 먼저 떠오를 것입니다. 이 명령어들은 Hugging Face Hub에서도 사용할 수 있습니다. 하지만 소프트웨어 엔지니어링과 머신러닝은 동일한 요구 사항과 워크플로우를 공유하지 않습니다. 모델 리포지토리는 다양한 프레임워크와 도구를 위한 대규모 모델 가중치 파일을 유지관리 할 수 있으므로, 리포지토리를 복제하면 대규모 로컬 폴더를 유지관리하고 막대한 크기의 파일을 다루게 될 수 있습니다. 결과적으로 Hugging Face의 커스텀 HTTP 방법을 사용하는 것이 더욱 효율적일 수 있습니다. 더 자세한 내용은 [Git vs HTTP paradigm](../concepts/git_vs_http) 문서를 참조하세요.
-
-Hub에 리포지토리를 생성하고 관리하려면, 로그인이 되어 있어야 합니다. 로그인이 안 되어있다면 [이 문서](../quick-start#authentication)를 참고해 주세요. 이 가이드에서는 로그인이 되어있다는 가정하에 진행됩니다.
-
-## 리포지토리 생성 및 삭제[[repo-creation-and-deletion]]
-
-첫 번째 단계는 어떻게 리포지토리를 생성하고 삭제하는지를 알아야 합니다. 사용자 이름 네임스페이스 아래에 소유한 리포지토리 또는 쓰기 권한이 있는 조직의 리포지토리만 관리할 수 있습니다.
-
-### 리포지토리 생성[[create-a-repository]]
-
-[`create_repo`] 함수로 함께 빈 리포지토리를 만들고 `repo_id` 매개변수를 사용하여 이름을 정하세요. `repo_id`는 사용자 이름 또는 조직 이름 뒤에 리포지토리 이름이 따라옵니다: `username_or_org/repo_name`.
-
-```py
->>> from huggingface_hub import create_repo
->>> create_repo("lysandre/test-model")
-'https://huggingface.co/lysandre/test-model'
-```
-
-기본적으로 [`create_repo`]는 모델 리포지토리를 만듭니다. 하지만 `repo_type` 매개변수를 사용하여 다른 유형의 리포지토리를 지정할 수 있습니다. 예를 들어 데이터셋 리포지토리를 만들고 싶다면:
-
-```py
->>> from huggingface_hub import create_repo
->>> create_repo("lysandre/test-dataset", repo_type="dataset")
-'https://huggingface.co/datasets/lysandre/test-dataset'
-```
-
-리포지토리를 만들 때, `private` 매개변수를 사용하여 가시성을 설정할 수 있습니다.
-
-```py
->>> from huggingface_hub import create_repo
->>> create_repo("lysandre/test-private", private=True)
-```
-
-추후 리포지토리 가시성을 변경하고 싶다면, [`update_repo_settings`] 함수를 이용해 바꿀 수 있습니다.
-
-### 리포지토리 삭제[[delete-a-repository]]
-
-[`delete_repo`]를 사용하여 리포지토리를 삭제할 수 있습니다. 리포지토리를 삭제하기 전에 신중히 결정하세요. 왜냐하면, 삭제하고 나서 다시 되돌릴 수 없는 프로세스이기 때문입니다!
-
-삭제하려는 리포지토리의 `repo_id`를 지정하세요:
-
-```py
->>> delete_repo(repo_id="lysandre/my-corrupted-dataset", repo_type="dataset")
-```
-
-### 리포지토리 복제(Spaces 전용)[[duplicate-a-repository-only-for-spaces]]
-
-가끔 다른 누군가의 리포지토리를 복사하여, 상황에 맞게 수정하고 싶을 때가 있습니다. 이는 [`duplicate_space`]를 사용하여 Space에 복사할 수 있습니다. 이 함수를 사용하면 리포지토리 전체를 복제할 수 있습니다. 그러나 여전히 하드웨어, 절전 시간, 리포지토리, 변수 및 비밀번호와 같은 자체 설정을 구성해야 합니다. 자세한 내용은 [Manage your Space](./manage-spaces) 문서를 참조하십시오.
-
-```py
->>> from huggingface_hub import duplicate_space
->>> duplicate_space("multimodalart/dreambooth-training", private=False)
-RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...)
-```
-
-## 파일 다운로드와 업로드[[upload-and-download-files]]
-
-이제 리포지토리를 생성했으므로, 변경 사항을 푸시하고 파일을 다운로드하는 것에 관심이 있을 것입니다.
-
-이 두 가지 주제는 각각 자체 가이드가 필요합니다. 리포지토리 사용하는 방법에 대해 알아보려면 [업로드](./upload) 및 [다운로드](./download) 문서를 참조하세요.
-
-## 브랜치와 태그[[branches-and-tags]]
-
-Git 리포지토리는 동일한 리포지토리의 다른 버전을 저장하기 위해 브랜치들을 사용합니다. 태그는 버전을 출시할 때와 같이 리포지토리의 특정 상태를 표시하는 데 사용될 수도 있습니다. 일반적으로 브랜치와 태그는 [git 참조](https://git-scm.com/book/en/v2/Git-Internals-Git-References)
-로 참조됩니다.
-
-### 브랜치 생성과 태그[[create-branches-and-tags]]
-
-[`create_branch`]와 [`create_tag`]를 이용하여 새로운 브랜치와 태그를 생성할 수 있습니다.
-
-```py
->>> from huggingface_hub import create_branch, create_tag
-
-# `main` 브랜치를 기반으로 Space 저장소에 새 브랜치를 생성합니다.
->>> create_branch("Matthijs/speecht5-tts-demo", repo_type="space", branch="handle-dog-speaker")
-
-# `v0.1-release` 브랜치를 기반으로 Dataset 저장소에 태그를 생성합니다.
->>> create_tag("bigcode/the-stack", repo_type="dataset", revision="v0.1-release", tag="v0.1.1", tag_message="Bump release version.")
-```
-
-같은 방식으로 [`delete_branch`]와 [`delete_tag`] 함수를 사용하여 브랜치 또는 태그를 삭제할 수 있습니다.
-
-### 모든 브랜치와 태그 나열[[list-all-branches-and-tags]]
-
-[`list_repo_refs`]를 사용하여 리포지토리로부터 현재 존재하는 git 참조를 나열할 수 있습니다:
-
-```py
->>> from huggingface_hub import list_repo_refs
->>> list_repo_refs("bigcode/the-stack", repo_type="dataset")
-GitRefs(
-   branches=[
-         GitRefInfo(name='main', ref='refs/heads/main', target_commit='18edc1591d9ce72aa82f56c4431b3c969b210ae3'),
-         GitRefInfo(name='v1.1.a1', ref='refs/heads/v1.1.a1', target_commit='f9826b862d1567f3822d3d25649b0d6d22ace714')
-   ],
-   converts=[],
-   tags=[
-         GitRefInfo(name='v1.0', ref='refs/tags/v1.0', target_commit='c37a8cd1e382064d8aced5e05543c5f7753834da')
-   ]
-)
-```
-
-## 리포지토리 설정 변경[[change-repository-settings]]
-
-리포지토리는 구성할 수 있는 몇 가지 설정이 있습니다. 대부분의 경우 브라우저의 리포지토리 설정 페이지에서 직접 설정할 것입니다. 설정을 바꾸려면 리포지토리에 대한 쓰기 액세스 권한이 있어야 합니다(사용자 리포지토리거나, 조직의 구성원이어야 함). 이 주제에서는 `huggingface_hub`를 사용하여 프로그래밍 방식으로 구성할 수 있는 설정을 알아보겠습니다.
-
-Spaces를 위한 특정 설정들(하드웨어, 환경변수 등)을 구성하기 위해서는 [Manage your Spaces](../guides/manage-spaces) 문서를 참조하세요.
-
-### 가시성 업데이트[[update-visibility]]
-
-리포지토리는 공개 또는 비공개로 설정할 수 있습니다. 비공개 리포지토리는 해당 저장소의 사용자 혹은 소속된 조직의 구성원만 볼 수 있습니다. 다음과 같이 리포지토리를 비공개로 변경할 수 있습니다.
-
-```py
->>> from huggingface_hub import update_repo_settings
->>> update_repo_settings(repo_id=repo_id, private=True)
-```
-
-### 리포지토리 이름 변경[[rename-your-repository]]
-
-[`move_repo`]를 사용하여 Hub에 있는 리포지토리 이름을 변경할 수 있습니다. 이 함수를 사용하여 개인에서 조직 리포지토리로 이동할 수도 있습니다. 이렇게 하면 [일부 제한 사항](https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo)이 있으므로 주의해야 합니다. 예를 들어, 다른 사용자에게 리포지토리를 이전할 수는 없습니다.
-
-```py
->>> from huggingface_hub import move_repo
->>> move_repo(from_id="Wauplin/cool-model", to_id="huggingface/cool-model")
-```
-
-## 리포지토리의 로컬 복사본 관리[[manage-a-local-copy-of-your-repository]]
-
-위에 설명한 모든 작업은 HTTP 요청을 사용하여 작업할 수 있습니다. 그러나 경우에 따라 로컬 복사본을 가지고 익숙한 Git 명령어를 사용하여 상호 작용하는 것이 편리할 수 있습니다.
-
-[`Repository`] 클래스는 Git 명령어와 유사한 기능을 제공하는 함수를 사용하여 Hub의 파일 및 리포지토리와 상호 작용할 수 있습니다. 이는 이미 알고 있고 좋아하는 Git 및 Git-LFS 방법을 사용하는 래퍼(wrapper)입니다. 시작하기 전에 Git-LFS가 설치되어 있는지 확인하세요([여기서](https://git-lfs.github.com/) 설치 지침을 확인할 수 있습니다).
-
-> [!WARNING]
-> [`Repository`]는 [`HfApi`]에 구현된 HTTP 기반 대안을 선호하여 중단되었습니다. 아직 많은 레거시 코드에서 사용되고 있기 때문에 [`Repository`]가 완전히 제거되는 건 `v1.0` 릴리스에서만 이루어집니다. 자세한 내용은 [해당 설명 페이지](./concepts/git_vs_http)를 참조하세요.
-
-### 로컬 리포지토리 사용[[use-a-local-repository]]
-
-로컬 리포지토리 경로를 사용하여 [`Repository`] 객체를 생성하세요:
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="<path>/<to>/<folder>")
-```
-
-### 복제[[clone]]
-
-`clone_from` 매개변수는 Hugging Face 리포지토리 ID에서 로컬 디렉터리로 리포지토리를 복제합니다. 이때 `local_dir` 매개변수를 사용하여 로컬 디렉터리에 저장합니다:
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
-```
-
-`clone_from`은 URL을 사용해 리포지토리를 복제할 수 있습니다.
-
-```py
->>> repo = Repository(local_dir="huggingface-hub", clone_from="https://huggingface.co/facebook/wav2vec2-large-960h-lv60")
-```
-
-`clone_from` 매개변수를 [`create_repo`]와 결합하여 리포지토리를 만들고 복제할 수 있습니다.
-
-```py
->>> repo_url = create_repo(repo_id="repo_name")
->>> repo = Repository(local_dir="repo_local_path", clone_from=repo_url)
-```
-
-리포지토리를 복제할 때 `git_user` 및 `git_email` 매개변수를 지정함으로써 복제한 리포지토리에 Git 사용자 이름과 이메일을 설정할 수 있습니다. 사용자가 해당 리포지토리에 커밋하면 Git은 커밋 작성자를 인식합니다.
-
-```py
->>> repo = Repository(
-...   "my-dataset",
-...   clone_from="<user>/<dataset_id>",
-...   token=True,
-...   repo_type="dataset",
-...   git_user="MyName",
-...   git_email="me@cool.mail"
-... )
-```
-
-### 브랜치[[branch]]
-
-브랜치는 현재 코드와 파일에 영향을 미치지 않으면서 협업과 실험에 중요합니다.[`~Repository.git_checkout`]을 사용하여 브랜치 간에 전환할 수 있습니다. 예를 들어, `branch1`에서 `branch2`로 전환하려면:
-
-```py
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="huggingface-hub", clone_from="<user>/<dataset_id>", revision='branch1')
->>> repo.git_checkout("branch2")
-```
-
-### 끌어오기[[pull]]
-
-[`~Repository.git_pull`]은 원격 리포지토리로부터의 변경사항을 현재 로컬 브랜치에 업데이트하게 합니다.
-
-```py
->>> from huggingface_hub import Repository
->>> repo.git_pull()
-```
-
-브랜치가 원격에서의 새 커밋으로 업데이트 된 후에 로컬 커밋을 수행하고자 한다면 `rebase=True`를 설정하세요:
-
-```py
->>> repo.git_pull(rebase=True)
-```
diff --git a/docs/source/ko/guides/upload.md b/docs/source/ko/guides/upload.md
index a55d14c646..13de9b04f8 100644
--- a/docs/source/ko/guides/upload.md
+++ b/docs/source/ko/guides/upload.md
@@ -4,12 +4,7 @@ rendered properly in your Markdown viewer.
 
 # Hub에 파일 업로드하기[[upload-files-to-the-hub]]
 
-파일과 작업물을 공유하는 것은 Hub의 주요 특성 중 하나입니다. `huggingface_hub`는 Hub에 파일을 업로드하기 위한 몇 가지 옵션을 제공합니다. 이러한 기능을 단독으로 사용하거나 라이브러리에 통합하여 해당 라이브러리의 사용자가 Hub와 더 편리하게 상호작용할 수 있도록 도울 수 있습니다. 이 가이드에서는 파일을 푸시하는 다양한 방법에 대해 설명합니다:
-
-- Git을 사용하지 않고 푸시하기.
-- [Git LFS](https://git-lfs.github.com/)를 사용하여 매우 큰 파일을 푸시하기.
-- `commit` 컨텍스트 매니저를 사용하여 푸시하기.
-- [`~Repository.push_to_hub`] 함수를 사용하여 푸시하기.
+파일과 작업물을 공유하는 것은 Hub의 주요 특성 중 하나입니다. `huggingface_hub`는 Hub에 파일을 업로드하기 위한 몇 가지 옵션을 제공합니다. 이러한 기능을 단독으로 사용하거나 라이브러리에 통합하여 해당 라이브러리의 사용자가 Hub와 더 편리하게 상호작용할 수 있도록 도울 수 있습니다.
 
 Hub에 파일을 업로드 하려면 허깅페이스 계정으로 로그인해야 합니다. 인증에 대한 자세한 내용은 [이 페이지](../quick-start#authentication)를 참조해 주세요.
 
@@ -420,115 +415,3 @@ Hub에서 리포지토리를 구성하는 방법에 대한 모범 사례는 [리
 > `hf_transfer`는 고급 사용자 도구입니다!
 > 테스트 및 프로덕션 준비가 완료되었지만, 고급 오류 처리나 프록시와 같은 사용자 친화적인 기능이 부족합니다.
 > 자세한 내용은 [이 섹션](https://huggingface.co/docs/huggingface_hub/hf_transfer)을 참조하세요.
-
-## (레거시) Git LFS로 파일 업로드하기[[legacy-upload-files-with-git-lfs]]
-
-위에서 설명한 모든 방법은 Hub의 API를 사용하여 파일을 업로드하며, 이는 Hub에 파일을 업로드하는 데 권장되는 방법입니다.
-이뿐만 아니라 로컬 리포지토리를 관리하기 위하여 git 도구의 래퍼인 [`Repository`]또한 제공합니다.
-
-> [!WARNING]
-> [`Repository`]는 공식적으로 지원 종료된 것은 아니지만, 가급적이면 위에서 설명한 HTTP 기반 방법들을 사용할 것을 권장합니다.
-> 이 권장 사항에 대한 자세한 내용은 HTTP 기반 방식과 Git 기반 방식 간의 핵심적인 차이점을 설명하는 [이 가이드](../concepts/git_vs_http)를 참조하세요.
-
-Git LFS는 10MB보다 큰 파일을 자동으로 처리합니다. 하지만 매우 큰 파일(5GB 이상)의 경우, Git LFS용 사용자 지정 전송 에이전트를 설치해야 합니다:
-
-```bash
-hf lfs-enable-largefiles
-```
-
-매우 큰 파일이 있는 각 리포지토리에 대해 이 옵션을 설치해야 합니다.
-설치가 완료되면 5GB보다 큰 파일을 푸시할 수 있습니다.
-
-### 커밋 컨텍스트 관리자[[commit-context-manager]]
-
-`commit` 컨텍스트 관리자는 가장 일반적인 네 가지 Git 명령인 pull, add, commit, push를 처리합니다. 
-`git-lfs`는 10MB보다 큰 파일을 자동으로 추적합니다.
-다음 예제에서는 `commit` 컨텍스트 관리자가 다음과 같은 작업을 수행합니다:
-
-1. `text-files` 리포지토리에서 pull.
-2. `file.txt`에 변경 내용을 add.
-3. 변경 내용을 commit.
-4. 변경 내용을 `text-files` 리포지토리에 push.
-
-```python
->>> from huggingface_hub import Repository
->>> with Repository(local_dir="text-files", clone_from="<user>/text-files").commit(commit_message="My first file :)"):
-...     with open("file.txt", "w+") as f:
-...         f.write(json.dumps({"hey": 8}))
-```
-
-다음은 `commit` 컨텍스트 관리자를 사용하여 파일을 저장하고 리포지토리에 업로드하는 방법의 또 다른 예입니다:
-
-```python
->>> import torch
->>> model = torch.nn.Transformer()
->>> with Repository("torch-model", clone_from="<user>/torch-model", token=True).commit(commit_message="My cool model :)"):
-...     torch.save(model.state_dict(), "model.pt")
-```
-
-커밋을 비동기적으로 푸시하려면 `blocking=False`를 설정하세요.
-커밋을 푸시하는 동안 스크립트를 계속 실행하고 싶을 때 논 블로킹 동작이 유용합니다.
-
-```python
->>> with repo.commit(commit_message="My cool model :)", blocking=False)
-```
-
-`command_queue` 메서드로 푸시 상태를 확인할 수 있습니다:
-
-```python
->>> last_command = repo.command_queue[-1]
->>> last_command.status
-```
-
-가능한 상태는 아래 표를 참조하세요:
-
-| 상태      | 설명                          |
-| -------- | ----------------------------- |
-| -1       | 푸시가 진행 중입니다.           |
-| 0        | 푸시가 성공적으로 완료되었습니다.|
-| Non-zero | 오류가 발생했습니다.            |
-
-`blocking=False`인 경우, 명령이 추적되며 스크립트에서 다른 오류가 발생하더라도 모든 푸시가 완료된 경우에만 스크립트가 종료됩니다.
-푸시 상태를 확인하는 데 유용한 몇 가지 추가 명령은 다음과 같습니다:
-
-```python
-# 오류를 검사합니다.
->>> last_command.stderr
-
-# 푸시 진행여부를 확인합니다.
->>> last_command.is_done
-
-# 푸시 명령의 에러여부를 파악합니다.
->>> last_command.failed
-```
-
-### push_to_hub[[pushtohub]]
-
-[`Repository`] 클래스에는 파일을 추가하고 커밋한 후 리포지토리로 푸시하는 [`~Repository.push_to_hub`] 함수가 있습니다. [`~Repository.push_to_hub`]는 `commit` 컨텍스트 관리자와는 달리 호출하기 전에 먼저 리포지토리에서 업데이트(pull) 작업을 수행 해야 합니다.
-
-예를 들어 Hub에서 리포지토리를 이미 복제했다면 로컬 디렉터리에서 `repo`를 초기화할 수 있습니다:
-
-```python
->>> from huggingface_hub import Repository
->>> repo = Repository(local_dir="path/to/local/repo")
-```
-
-로컬 클론을 [`~Repository.git_pull`]로 업데이트한 다음 파일을 Hub로 푸시합니다:
-
-```py
->>> repo.git_pull()
->>> repo.push_to_hub(commit_message="Commit my-awesome-file to the Hub")
-```
-
-그러나 아직 파일을 푸시할 준비가 되지 않았다면 [`~Repository.git_add`] 와 [`~Repository.git_commit`]을 사용하여 파일만 추가하고 커밋할 수 있습니다:
-
-```py
->>> repo.git_add("path/to/file")
->>> repo.git_commit(commit_message="add my first model config file :)")
-```
-
-준비가 완료되면 [`~Repository.git_push`]를 사용하여 파일을 리포지토리에 푸시합니다:
-
-```py
->>> repo.git_push()
-```
diff --git a/docs/source/ko/installation.md b/docs/source/ko/installation.md
index 720346b1a1..d9cd8a46dd 100644
--- a/docs/source/ko/installation.md
+++ b/docs/source/ko/installation.md
@@ -6,7 +6,7 @@ rendered properly in your Markdown viewer.
 
 시작하기 전에 적절한 패키지를 설치하여 환경을 설정해야 합니다.
 
-`huggingface_hub`는 **Python 3.8+**에서 테스트되었습니다.
+`huggingface_hub`는 **Python 3.9+**에서 테스트되었습니다.
 
 ## pip로 설치하기 [[install-with-pip]]
 
@@ -46,17 +46,13 @@ pip install --upgrade huggingface_hub
 
 선택적 의존성은 `pip`을 통해 설치할 수 있습니다:
 ```bash
-# TensorFlow 관련 기능에 대한 의존성을 설치합니다.
-# /!\ 경고: `pip install tensorflow`와 동일하지 않습니다.
-pip install 'huggingface_hub[tensorflow]'
-
 # PyTorch와 CLI와 관련된 기능에 대한 의존성을 모두 설치합니다.
 pip install 'huggingface_hub[cli,torch]'
 ```
 
 다음은 `huggingface_hub`의 선택 의존성 목록입니다:
 - `cli`: 보다 편리한 `huggingface_hub`의 CLI 인터페이스입니다.
-- `fastai`, `torch`, `tensorflow`: 프레임워크별 기능을 실행하려면 필요합니다.
+- `fastai`, `torch`: 프레임워크별 기능을 실행하려면 필요합니다.
 - `dev`: 라이브러리에 기여하고 싶다면 필요합니다. 테스트 실행을 위한 `testing`, 타입 검사기 실행을 위한 `typing`, 린터 실행을 위한 `quality`가 포함됩니다.
 
 ### 소스에서 설치 [[install-from-source]]
diff --git a/docs/source/ko/package_reference/inference_client.md b/docs/source/ko/package_reference/inference_client.md
index 686c9282a9..0930a75351 100644
--- a/docs/source/ko/package_reference/inference_client.md
+++ b/docs/source/ko/package_reference/inference_client.md
@@ -35,13 +35,3 @@ pip install --upgrade huggingface_hub[inference]
 ## 반환 유형[[return-types]]
 
 대부분의 작업에 대해, 반환 값은 내장된 유형(string, list, image...)을 갖습니다. 보다 복잡한 유형을 위한 목록은 다음과 같습니다.
-
-
-## 추론 API[[huggingface_hub.InferenceApi]]
-
-[`InferenceAPI`]는 추론 API를 호출하는 레거시 방식입니다. 이 인터페이스는 더 간단하며 각 작업의 입력 매개변수와 출력 형식을 알아야 합니다. 또한 추론 엔드포인트나 AWS SageMaker와 같은 다른 서비스에 연결할 수 있는 기능이 없습니다. [`InferenceAPI`]는 곧 폐지될 예정이므로 가능한 경우 [`InferenceClient`]를 사용하는 것을 권장합니다. 스크립트에서 [`InferenceAPI`]를 [`InferenceClient`]로 전환하는 방법에 대해 알아보려면 [이 가이드](../guides/inference#legacy-inferenceapi-client)를 참조하세요.
-
-[[autodoc]] InferenceApi
-    - __init__
-    - __call__
-    - all
diff --git a/docs/source/ko/package_reference/mixins.md b/docs/source/ko/package_reference/mixins.md
index 4a4a84ad9e..a5f8162eff 100644
--- a/docs/source/ko/package_reference/mixins.md
+++ b/docs/source/ko/package_reference/mixins.md
@@ -20,16 +20,6 @@ ML 프레임워크를 Hub와 통합하는 방법은 [통합 가이드](../guides
 
 [[autodoc]] PyTorchModelHubMixin
 
-### Keras[[huggingface_hub.KerasModelHubMixin]]
-
-[[autodoc]] KerasModelHubMixin
-
-[[autodoc]] from_pretrained_keras
-
-[[autodoc]] push_to_hub_keras
-
-[[autodoc]] save_pretrained_keras
-
 ### Fastai[[huggingface_hub.from_pretrained_fastai]]
 
 [[autodoc]] from_pretrained_fastai
diff --git a/docs/source/ko/package_reference/repository.md b/docs/source/ko/package_reference/repository.md
deleted file mode 100644
index fc70e3e203..0000000000
--- a/docs/source/ko/package_reference/repository.md
+++ /dev/null
@@ -1,49 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# 로컬 및 온라인 리포지토리 관리[[managing-local-and-online-repositories]]
-
-`Repository` 클래스는 `git` 및 `git-lfs` 명령을 감싸는 도우미 클래스로, 매우 큰 리포지토리를 관리하는 데 적합한 툴링을 제공합니다.
-
-`git` 작업이 포함되거나 리포지토리에서의 협업이 중점이 될 때 권장되는 도구입니다.
-
-## 리포지토리 클래스[[the-repository-class]]
-
-[[autodoc]] Repository
-    - __init__
-    - current_branch
-    - all
-
-## 도우미 메소드[[helper-methods]]
-
-[[autodoc]] huggingface_hub.repository.is_git_repo
-
-[[autodoc]] huggingface_hub.repository.is_local_clone
-
-[[autodoc]] huggingface_hub.repository.is_tracked_with_lfs
-
-[[autodoc]] huggingface_hub.repository.is_git_ignored
-
-[[autodoc]] huggingface_hub.repository.files_to_be_staged
-
-[[autodoc]] huggingface_hub.repository.is_tracked_upstream
-
-[[autodoc]] huggingface_hub.repository.commits_to_push
-
-## 후속 비동기 명령[[following-asynchronous-commands]]
-
-`Repository` 유틸리티는 비동기적으로 시작할 수 있는 여러 메소드를 제공합니다.
-- `git_push`
-- `git_pull`
-- `push_to_hub`
-- `commit` 컨텍스트 관리자
-
-이러한 비동기 메소드를 관리하는 유틸리티는 아래를 참조하세요.
-
-[[autodoc]] Repository
-    - commands_failed
-    - commands_in_progress
-    - wait_for_commands
-
-[[autodoc]] huggingface_hub.repository.CommandInProgress
diff --git a/docs/source/ko/package_reference/serialization.md b/docs/source/ko/package_reference/serialization.md
index 25901237bf..9dd7a6ce7b 100644
--- a/docs/source/ko/package_reference/serialization.md
+++ b/docs/source/ko/package_reference/serialization.md
@@ -8,11 +8,7 @@ rendered properly in your Markdown viewer.
 
 ## 상태 사전을 샤드로 나누기[[split-state-dict-into-shards]]
 
-현재 이 모듈은 상태 딕셔너리(예: 레이어 이름과 관련 텐서 간의 매핑)를 받아 여러 샤드로 나누고, 이 과정에서 적절한 인덱스를 생성하는 단일 헬퍼를 포함하고 있습니다. 이 헬퍼는 `torch`, `tensorflow`, `numpy` 텐서에 사용 가능하며, 다른 ML 프레임워크로 쉽게 확장될 수 있도록 설계되었습니다.
-
-### split_tf_state_dict_into_shards[[huggingface_hub.split_tf_state_dict_into_shards]]
-
-[[autodoc]] huggingface_hub.split_tf_state_dict_into_shards
+현재 이 모듈은 상태 딕셔너리(예: 레이어 이름과 관련 텐서 간의 매핑)를 받아 여러 샤드로 나누고, 이 과정에서 적절한 인덱스를 생성하는 단일 헬퍼를 포함하고 있습니다. 이 헬퍼는 `torch` 텐서에 사용 가능하며, 다른 ML 프레임워크로 쉽게 확장될 수 있도록 설계되었습니다.
 
 ### split_torch_state_dict_into_shards[[huggingface_hub.split_torch_state_dict_into_shards]]
 
diff --git a/docs/source/ko/package_reference/utilities.md b/docs/source/ko/package_reference/utilities.md
index a76e9d474b..4390a90718 100644
--- a/docs/source/ko/package_reference/utilities.md
+++ b/docs/source/ko/package_reference/utilities.md
@@ -84,16 +84,6 @@ True
 
 [[autodoc]] huggingface_hub.utils.enable_progress_bars
 
-## HTTP 백엔드 구성[[huggingface_hub.configure_http_backend]]
-
-일부 환경에서는 HTTP 호출이 이루어지는 방식을 구성할 수 있습니다. 예를 들어, 프록시를 사용하는 경우가 그렇습니다. `huggingface_hub`는 [`configure_http_backend`]를 사용하여 전역적으로 이를 구성할 수 있게 합니다. 그러면 Hub로의 모든 요청이 사용자가 설정한 설정을 사용합니다. 내부적으로 `huggingface_hub`는 `requests.Session`을 사용하므로 사용 가능한 매개변수에 대해 자세히 알아보려면 [requests 문서](https://requests.readthedocs.io/en/latest/user/advanced)를 참조하는 것이 좋습니다.
-
-`requests.Session`이 스레드 안전을 보장하지 않기 때문에 `huggingface_hub`는 스레드당 하나의 세션 인스턴스를 생성합니다. 세션을 사용하면 HTTP 호출 사이에 연결을 유지하고 최종적으로 시간을 절약할 수 있습니다. `huggingface_hub`를 서드 파티 라이브러리에 통합하고 사용자 지정 호출을 Hub로 만들려는 경우, [`get_session`]을 사용하여 사용자가 구성한 세션을 가져옵니다 (즉, 모든 `requests.get(...)` 호출을 `get_session().get(...)`으로 대체합니다).
-
-[[autodoc]] configure_http_backend
-
-[[autodoc]] get_session
-
 
 ## HTTP 오류 다루기[[handle-http-errors]]
 
@@ -125,39 +115,43 @@ except HfHubHTTPError as e:
 
 여기에는 `huggingface_hub`에서 발생하는 HTTP 오류 목록이 있습니다.
 
-#### HfHubHTTPError[[huggingface_hub.utils.HfHubHTTPError]]
+#### HfHubHTTPError[[huggingface_hub.errors.HfHubHTTPError]]
 
 `HfHubHTTPError`는 HF Hub HTTP 오류에 대한 부모 클래스입니다. 이 클래스는 서버 응답을 구문 분석하고 오류 메시지를 형식화하여 사용자에게 가능한 많은 정보를 제공합니다.
 
-[[autodoc]] huggingface_hub.utils.HfHubHTTPError
+[[autodoc]] huggingface_hub.errors.HfHubHTTPError
+
+#### RepositoryNotFoundError[[huggingface_hub.errors.RepositoryNotFoundError]]
 
-#### RepositoryNotFoundError[[huggingface_hub.utils.RepositoryNotFoundError]]
+[[autodoc]] huggingface_hub.errors.RepositoryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.RepositoryNotFoundError
+#### GatedRepoError[[huggingface_hub.errors.GatedRepoError]]
 
-#### GatedRepoError[[huggingface_hub.utils.GatedRepoError]]
+[[autodoc]] huggingface_hub.errors.GatedRepoError
 
-[[autodoc]] huggingface_hub.utils.GatedRepoError
+#### RevisionNotFoundError[[huggingface_hub.errors.RevisionNotFoundError]]
 
-#### RevisionNotFoundError[[huggingface_hub.utils.RevisionNotFoundError]]
+[[autodoc]] huggingface_hub.errors.RevisionNotFoundError
 
-[[autodoc]] huggingface_hub.utils.RevisionNotFoundError
+#### BadRequestError[[huggingface_hub.errors.BadRequestError]]
 
-#### EntryNotFoundError[[huggingface_hub.utils.EntryNotFoundError]]
+[[autodoc]] huggingface_hub.errors.BadRequestError
 
-[[autodoc]] huggingface_hub.utils.EntryNotFoundError
+#### EntryNotFoundError[[huggingface_hub.errors.EntryNotFoundError]]
 
-#### BadRequestError[[huggingface_hub.utils.BadRequestError]]
+[[autodoc]] huggingface_hub.errors.EntryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.BadRequestError
+#### RemoteEntryNotFoundError[[huggingface_hub.errors.RemoteEntryNotFoundError]]
 
-#### LocalEntryNotFoundError[[huggingface_hub.utils.LocalEntryNotFoundError]]
+[[autodoc]] huggingface_hub.errors.RemoteEntryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.LocalEntryNotFoundError
+#### LocalEntryNotFoundError[[huggingface_hub.errors.LocalEntryNotFoundError]]
 
-#### OfflineModeIsEnabledd[[huggingface_hub.utils.OfflineModeIsEnabled]]
+[[autodoc]] huggingface_hub.errors.LocalEntryNotFoundError
 
-[[autodoc]] huggingface_hub.utils.OfflineModeIsEnabled
+#### OfflineModeIsEnabledd[[huggingface_hub.errors.OfflineModeIsEnabled]]
+
+[[autodoc]] huggingface_hub.errors.OfflineModeIsEnabled
 
 ## 원격 측정[[huggingface_hub.utils.send_telemetry]]
 
@@ -195,20 +189,6 @@ huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in rep
 
 >>> my_cool_method(repo_id="other..repo..id")
 huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
-
->>> @validate_hf_hub_args
-... def my_cool_auth_method(token: str):
-...     print(token)
-
->>> my_cool_auth_method(token="a token")
-"a token"
-
->>> my_cool_auth_method(use_auth_token="a use_auth_token")
-"a use_auth_token"
-
->>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token")
-UserWarning: Both `token` and `use_auth_token` are passed (...). `use_auth_token` value will be ignored.
-"a token"
 ```
 
 #### validate_hf_hub_args[[huggingface_hub.utils.validate_hf_hub_args]]
@@ -226,9 +206,3 @@ UserWarning: Both `token` and `use_auth_token` are passed (...). `use_auth_token
 #### repo_id[[huggingface_hub.utils.validate_repo_id]]
 
 [[autodoc]] utils.validate_repo_id
-
-#### smoothly_deprecate_use_auth_token[[huggingface_hub.utils.smoothly_deprecate_use_auth_token]]
-
-정확히 검증기는 아니지만, 잘 실행됩니다.
-
-[[autodoc]] utils.smoothly_deprecate_use_auth_token
diff --git a/docs/source/tm/installation.md b/docs/source/tm/installation.md
index f16ac74667..479b2c3e4c 100644
--- a/docs/source/tm/installation.md
+++ b/docs/source/tm/installation.md
@@ -2,7 +2,7 @@
 
 நீங்கள் தொடங்குவதற்கு முன், தகுந்த தொகுப்புகளை நிறுவுவதன் மூலம் உங்கள் சூழலை அமைக்க வேண்டும்.
 
-`huggingface_hub` **Python 3.8+** மின்பொருள்களில் சோதிக்கப்பட்டுள்ளது.
+`huggingface_hub` **Python 3.9+** மின்பொருள்களில் சோதிக்கப்பட்டுள்ளது.
 
 ### பிப் மூலம் நிறுவு
 
@@ -43,17 +43,13 @@ pip install --upgrade huggingface_hub
 நீங்கள் விருப்பத் தேவைப்படும் சார்புகளை `pip` மூலம் நிறுவலாம்:
 
 ```bash
-# டென்சர்‌ஃபிளோவுக்கான குறிப்பிட்ட அம்சங்களுக்கு சார்ந்த பொறுப்பு நிறுவவும்
-# /!\ எச்சரிக்கை: இது `pip install tensorflow` க்கு சமமாகக் கருதப்படாது
-pip install 'huggingface_hub[tensorflow]'
-
 # டார்ச்-குறிப்பிட்ட மற்றும் CLI-குறிப்பிட்ட அம்சங்களுக்கு தேவையான பொறுப்புகளை நிறுவவும்.
 pip install 'huggingface_hub[cli,torch]'
 ```
 `huggingface_hub`-இல் உள்ள விருப்பத் தேவைப்படும் சார்புகளின் பட்டியல்:
 
 - `cli`: `huggingface_hub`-க்கு மிகவும் வசதியான CLI இடைமுகத்தை வழங்குகிறது.
-- `fastai`, `torch`, `tensorflow`: வடிவமைப்பு குறிப்பிட்ட அம்சங்களை இயக்க தேவையான சார்புகள்.
+- `fastai`, `torch`: வடிவமைப்பு குறிப்பிட்ட அம்சங்களை இயக்க தேவையான சார்புகள்.
 - `dev`: நூலகத்திற்கு பங்களிக்க தேவையான சார்புகள். இதில் சோதனை (சோதனைகளை இயக்க), வகை சோதனை (வகை சரிபார்ப்பு ஐ இயக்க) மற்றும் தரம் (லிண்டர்கள் ஐ இயக்க) உள்ளன.
 
 ### மூலத்திலிருந்து நிறுவல்
diff --git a/setup.py b/setup.py
index 028c67be08..9862deb896 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,5 @@
+import sys
+
 from setuptools import find_packages, setup
 
 
@@ -17,8 +19,9 @@ def get_version() -> str:
     "hf-xet>=1.1.3,<2.0.0; platform_machine=='x86_64' or platform_machine=='amd64' or platform_machine=='arm64' or platform_machine=='aarch64'",
     "packaging>=20.9",
     "pyyaml>=5.1",
-    "requests",
+    "httpx>=0.23.0, <1",
     "tqdm>=4.42.1",
+    "typer-slim",
     "typing-extensions>=3.7.4.3",  # to be able to import TypeAlias
 ]
 
@@ -26,6 +29,7 @@ def get_version() -> str:
 
 extras["cli"] = [
     "InquirerPy==0.3.4",  # Note: installs `prompt-toolkit` in the background
+    "shellingham",
 ]
 
 extras["inference"] = [
@@ -52,17 +56,6 @@ def get_version() -> str:
     "fastcore>=1.3.27",
 ]
 
-extras["tensorflow"] = [
-    "tensorflow",
-    "pydot",
-    "graphviz",
-]
-
-extras["tensorflow-testing"] = [
-    "tensorflow",
-    "keras<3.0",
-]
-
 extras["hf_xet"] = ["hf-xet>=1.1.2,<2.0.0"]
 
 extras["mcp"] = [
@@ -77,7 +70,7 @@ def get_version() -> str:
     + [
         "jedi",
         "Jinja2",
-        "pytest>=8.1.1,<8.2.2",  # at least until 8.2.3 is released with https://github.com/pytest-dev/pytest/pull/12436
+        "pytest>=8.4.2",  # we need https://github.com/pytest-dev/pytest/pull/12436
         "pytest-cov",
         "pytest-env",
         "pytest-xdist",
@@ -88,18 +81,23 @@ def get_version() -> str:
         "urllib3<2.0",  # VCR.py broken with urllib3 2.0 (see https://urllib3.readthedocs.io/en/stable/v2-migration-guide.html)
         "soundfile",
         "Pillow",
-        "gradio>=4.0.0",  # to test webhooks # pin to avoid issue on Python3.12
+        "requests",  # for gradio
         "numpy",  # for embeddings
         "fastapi",  # To build the documentation
     ]
 )
 
+if sys.version_info >= (3, 10):
+    # We need gradio to test webhooks server
+    # But gradio 5.0+ only supports python 3.10+ so we don't want to test earlier versions
+    extras["testing"].append("gradio>=5.0.0")
+    extras["testing"].append("requests")  # see https://github.com/gradio-app/gradio/pull/11830
+
 # Typing extra dependencies list is duplicated in `.pre-commit-config.yaml`
 # Please make sure to update the list there when adding a new typing dependency.
 extras["typing"] = [
     "typing-extensions>=4.8.0",
     "types-PyYAML",
-    "types-requests",
     "types-simplejson",
     "types-toml",
     "types-tqdm",
@@ -108,8 +106,7 @@ def get_version() -> str:
 
 extras["quality"] = [
     "ruff>=0.9.0",
-    "mypy>=1.14.1,<1.15.0; python_version=='3.8'",
-    "mypy==1.15.0; python_version>='3.9'",
+    "mypy==1.15.0",
     "libcst>=1.4.0",
     "ty",
 ]
@@ -134,13 +131,12 @@ def get_version() -> str:
     extras_require=extras,
     entry_points={
         "console_scripts": [
-            "huggingface-cli=huggingface_hub.commands.huggingface_cli:main",
             "hf=huggingface_hub.cli.hf:main",
             "tiny-agents=huggingface_hub.inference._mcp.cli:app",
         ],
         "fsspec.specs": "hf=huggingface_hub.HfFileSystem",
     },
-    python_requires=">=3.8.0",
+    python_requires=">=3.9.0",
     install_requires=install_requires,
     classifiers=[
         "Intended Audience :: Developers",
@@ -150,7 +146,6 @@ def get_version() -> str:
         "Operating System :: OS Independent",
         "Programming Language :: Python :: 3",
         "Programming Language :: Python :: 3 :: Only",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
diff --git a/src/huggingface_hub/README.md b/src/huggingface_hub/README.md
index cd5c1e2beb..b0e5cd65d9 100644
--- a/src/huggingface_hub/README.md
+++ b/src/huggingface_hub/README.md
@@ -112,242 +112,3 @@ With the `HfApi` class there are methods to query models, datasets, and Spaces b
   - `space_info()`
 
 These lightly wrap around the API Endpoints. Documentation for valid parameters and descriptions can be found [here](https://huggingface.co/docs/hub/endpoints).
-
-
-### Advanced programmatic repository management
-
-The `Repository` class helps manage both offline Git repositories and Hugging
-Face Hub repositories. Using the `Repository` class requires `git` and `git-lfs`
-to be installed.
-
-Instantiate a `Repository` object by calling it with a path to a local Git
-clone/repository:
-
-```python
->>> from huggingface_hub import Repository
->>> repo = Repository("<path>/<to>/<folder>")
-```
-
-The `Repository` takes a `clone_from` string as parameter. This can stay as
-`None` for offline management, but can also be set to any URL pointing to a Git
-repo to clone that repository in the specified directory:
-
-```python
->>> repo = Repository("huggingface-hub", clone_from="https://github.com/huggingface/huggingface_hub")
-```
-
-The `clone_from` method can also take any Hugging Face model ID as input, and
-will clone that repository:
-
-```python
->>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
-```
-
-If the repository you're cloning is one of yours or one of your organisation's, then having the ability to commit and push to that repository is important. In order to do that, you should make sure to be logged-in using `hf auth login`,:
-
-```python
->>> repo = Repository("my-model", clone_from="<user>/<model_id>")
-```
-
-This works for models, datasets and spaces repositories; but you will need to
-explicitely specify the type for the last two options:
-
-```python
->>> repo = Repository("my-dataset", clone_from="<user>/<dataset_id>", repo_type="dataset")
-```
-
-You can also change between branches:
-
-```python
->>> repo = Repository("huggingface-hub", clone_from="<user>/<dataset_id>", revision='branch1')
->>> repo.git_checkout("branch2")
-```
-
-The `clone_from` method can also take any Hugging Face model ID as input, and
-will clone that repository:
-
-```python
->>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60")
-```
-
-Finally, you can choose to specify the Git username and email attributed to that
-clone directly by using the `git_user` and `git_email` parameters. When
-committing to that repository, Git will therefore be aware of who you are and
-who will be the author of the commits:
-
-```python
->>> repo = Repository(
-...   "my-dataset",
-...   clone_from="<user>/<dataset_id>",
-...   repo_type="dataset",
-...   git_user="MyName",
-...   git_email="me@cool.mail"
-... )
-```
-
-The repository can be managed through this object, through wrappers of
-traditional Git methods:
-
-- `git_add(pattern: str, auto_lfs_track: bool)`. The `auto_lfs_track` flag
-  triggers auto tracking of large files (>10MB) with `git-lfs`
-- `git_commit(commit_message: str)`
-- `git_pull(rebase: bool)`
-- `git_push()`
-- `git_checkout(branch)`
-
-The `git_push` method has a parameter `blocking` which is `True` by default. When set to `False`, the push will
-happen behind the scenes - which can be helpful if you would like your script to continue on while the push is
-happening.
-
-LFS-tracking methods:
-
-- `lfs_track(pattern: Union[str, List[str]], filename: bool)`. Setting
-  `filename` to `True` will use the `--filename` parameter, which will consider
-  the pattern(s) as filenames, even if they contain special glob characters.
-- `lfs_untrack()`.
-- `auto_track_large_files()`: automatically tracks files that are larger than
-  10MB. Make sure to call this after adding files to the index.
-
-On top of these unitary methods lie some useful additional methods:
-
-- `push_to_hub(commit_message)`: consecutively does `git_add`, `git_commit` and
-  `git_push`.
-- `commit(commit_message: str, track_large_files: bool)`: this is a context
-  manager utility that handles committing to a repository. This automatically
-  tracks large files (>10Mb) with `git-lfs`. The `track_large_files` argument can
-  be set to `False` if you wish to ignore that behavior.
-
-These two methods also have support for the `blocking` parameter.
-
-Examples using the `commit` context manager:
-```python
->>> with Repository("text-files", clone_from="<user>/text-files").commit("My first file :)"):
-...     with open("file.txt", "w+") as f:
-...         f.write(json.dumps({"hey": 8}))
-```
-
-```python
->>> import torch
->>> model = torch.nn.Transformer()
->>> with Repository("torch-model", clone_from="<user>/torch-model").commit("My cool model :)"):
-...     torch.save(model.state_dict(), "model.pt")
-  ```
-
-### Non-blocking behavior
-
-The pushing methods have access to a `blocking` boolean parameter to indicate whether the push should happen
-asynchronously.
-
-In order to see if the push has finished or its status code (to spot a failure), one should use the `command_queue`
-property on the `Repository` object.
-
-For example:
-
-```python
-from huggingface_hub import Repository
-
-repo = Repository("<local_folder>", clone_from="<user>/<model_name>")
-
-with repo.commit("Commit message", blocking=False):
-    # Save data
-
-last_command = repo.command_queue[-1]
-
-# Status of the push command
-last_command.status
-# Will return the status code
-#     -> -1 will indicate the push is still ongoing
-#     -> 0 will indicate the push has completed successfully
-#     -> non-zero code indicates the error code if there was an error
-
-# if there was an error, the stderr may be inspected
-last_command.stderr
-
-# Whether the command finished or if it is still ongoing
-last_command.is_done
-
-# Whether the command errored-out.
-last_command.failed
-```
-
-When using `blocking=False`, the commands will be tracked and your script will exit only when all pushes are done, even
-if other errors happen in your script (a failed push counts as done).
-
-
-### Need to upload very large (>5GB) files?
-
-To upload large files (>5GB 🔥) from git command-line, you need to install the custom transfer agent
-for git-lfs, bundled in this package.
-
-To install, just run:
-
-```bash
-$ hf lfs-enable-largefiles .
-```
-
-This should be executed once for each model repo that contains a model file
->5GB. If you just try to push a file bigger than 5GB without running that
-command, you will get an error with a message reminding you to run it.
-
-Finally, there's a `hf lfs-multipart-upload` command but that one
-is internal (called by lfs directly) and is not meant to be called by the user.
-
-<br>
-
-## Using the Inference API wrapper
-
-`huggingface_hub` comes with a wrapper client to make calls to the Inference
-API! You can find some examples below, but we encourage you to visit the
-Inference API
-[documentation](https://api-inference.huggingface.co/docs/python/html/detailed_parameters.html)
-to review the specific parameters for the different tasks.
-
-When you instantiate the wrapper to the Inference API, you specify the model
-repository id. The pipeline (`text-classification`,  `text-to-speech`, etc) is
-automatically extracted from the
-[repository](https://huggingface.co/docs/hub/main#how-is-a-models-type-of-inference-api-and-widget-determined),
-but you can also override it as shown below.
-
-
-### Examples
-
-Here is a basic example of calling the Inference API for a `fill-mask` task
-using the `bert-base-uncased` model. The `fill-mask` task only expects a string
-(or list of strings) as input.
-
-```python
-from huggingface_hub.inference_api import InferenceApi
-inference = InferenceApi("bert-base-uncased", token=API_TOKEN)
-inference(inputs="The goal of life is [MASK].")
->> [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
-```
-
-This is an example of a task (`question-answering`) which requires a dictionary
-as input thas has the `question` and `context` keys.
-
-```python
-inference = InferenceApi("deepset/roberta-base-squad2", token=API_TOKEN)
-inputs = {"question":"What's my name?", "context":"My name is Clara and I live in Berkeley."}
-inference(inputs)
->> {'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'}
-```
-
-Some tasks might also require additional params in the request. Here is an
-example using a `zero-shot-classification` model.
-
-```python
-inference = InferenceApi("typeform/distilbert-base-uncased-mnli", token=API_TOKEN)
-inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
-params = {"candidate_labels":["refund", "legal", "faq"]}
-inference(inputs, params)
->> {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
-```
-
-Finally, there are some models that might support multiple tasks. For example,
-`sentence-transformers` models can do `sentence-similarity` and
-`feature-extraction`. You can override the configured task when initializing the
-API.
-
-```python
-inference = InferenceApi("bert-base-uncased", task="feature-extraction", token=API_TOKEN)
-```
diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index 2f1c10a873..2dde730333 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -46,7 +46,7 @@
 from typing import TYPE_CHECKING
 
 
-__version__ = "0.36.0.dev0"
+__version__ = "1.0.0.rc4"
 
 # Alphabetical order of definitions is ensured in tests
 # WARNING: any comment added in this dictionary definition will be lost when
@@ -138,6 +138,7 @@
         "push_to_hub_fastai",
     ],
     "file_download": [
+        "DryRunFileInfo",
         "HfFileMetadata",
         "_CACHED_NO_EXIST",
         "get_hf_file_metadata",
@@ -217,7 +218,6 @@
         "get_safetensors_metadata",
         "get_space_runtime",
         "get_space_variables",
-        "get_token_permission",
         "get_user_overview",
         "get_webhook",
         "grant_access",
@@ -278,7 +278,6 @@
         "update_collection_metadata",
         "update_inference_endpoint",
         "update_repo_settings",
-        "update_repo_visibility",
         "update_webhook",
         "upload_file",
         "upload_folder",
@@ -471,15 +470,6 @@
     "inference._mcp.mcp_client": [
         "MCPClient",
     ],
-    "inference_api": [
-        "InferenceApi",
-    ],
-    "keras_mixin": [
-        "KerasModelHubMixin",
-        "from_pretrained_keras",
-        "push_to_hub_keras",
-        "save_pretrained_keras",
-    ],
     "repocard": [
         "DatasetCard",
         "ModelCard",
@@ -497,12 +487,8 @@
         "ModelCardData",
         "SpaceCardData",
     ],
-    "repository": [
-        "Repository",
-    ],
     "serialization": [
         "StateDictSplit",
-        "get_tf_storage_size",
         "get_torch_storage_id",
         "get_torch_storage_size",
         "load_state_dict_from_file",
@@ -510,7 +496,6 @@
         "save_torch_model",
         "save_torch_state_dict",
         "split_state_dict_into_shards_factory",
-        "split_tf_state_dict_into_shards",
         "split_torch_state_dict_into_shards",
     ],
     "serialization._dduf": [
@@ -520,6 +505,8 @@
         "read_dduf_file",
     ],
     "utils": [
+        "ASYNC_CLIENT_FACTORY_T",
+        "CLIENT_FACTORY_T",
         "CacheNotFound",
         "CachedFileInfo",
         "CachedRepoInfo",
@@ -527,14 +514,17 @@
         "CorruptedCacheException",
         "DeleteCacheStrategy",
         "HFCacheInfo",
-        "HfFolder",
         "cached_assets_path",
-        "configure_http_backend",
+        "close_session",
         "dump_environment_info",
+        "get_async_session",
         "get_session",
         "get_token",
+        "hf_raise_for_status",
         "logging",
         "scan_cache_dir",
+        "set_async_client_factory",
+        "set_client_factory",
     ],
 }
 
@@ -550,6 +540,7 @@
 # ```
 
 __all__ = [
+    "ASYNC_CLIENT_FACTORY_T",
     "Agent",
     "AsyncInferenceClient",
     "AudioClassificationInput",
@@ -564,6 +555,7 @@
     "AutomaticSpeechRecognitionOutput",
     "AutomaticSpeechRecognitionOutputChunk",
     "AutomaticSpeechRecognitionParameters",
+    "CLIENT_FACTORY_T",
     "CONFIG_NAME",
     "CacheNotFound",
     "CachedFileInfo",
@@ -632,6 +624,7 @@
     "DocumentQuestionAnsweringInputData",
     "DocumentQuestionAnsweringOutputElement",
     "DocumentQuestionAnsweringParameters",
+    "DryRunFileInfo",
     "EvalResult",
     "FLAX_WEIGHTS_NAME",
     "FeatureExtractionInput",
@@ -652,7 +645,6 @@
     "HfFileSystemFile",
     "HfFileSystemResolvedPath",
     "HfFileSystemStreamFile",
-    "HfFolder",
     "ImageClassificationInput",
     "ImageClassificationOutputElement",
     "ImageClassificationOutputTransform",
@@ -674,7 +666,6 @@
     "ImageToVideoOutput",
     "ImageToVideoParameters",
     "ImageToVideoTargetSize",
-    "InferenceApi",
     "InferenceClient",
     "InferenceEndpoint",
     "InferenceEndpointError",
@@ -686,7 +677,6 @@
     "JobOwner",
     "JobStage",
     "JobStatus",
-    "KerasModelHubMixin",
     "MCPClient",
     "ModelCard",
     "ModelCardData",
@@ -711,7 +701,6 @@
     "REPO_TYPE_SPACE",
     "RepoCard",
     "RepoUrl",
-    "Repository",
     "SentenceSimilarityInput",
     "SentenceSimilarityInputData",
     "SpaceCard",
@@ -824,8 +813,8 @@
     "cancel_access_request",
     "cancel_job",
     "change_discussion_status",
+    "close_session",
     "comment_discussion",
-    "configure_http_backend",
     "create_branch",
     "create_collection",
     "create_commit",
@@ -862,7 +851,7 @@
     "fetch_job_logs",
     "file_exists",
     "from_pretrained_fastai",
-    "from_pretrained_keras",
+    "get_async_session",
     "get_collection",
     "get_dataset_tags",
     "get_discussion_details",
@@ -876,9 +865,7 @@
     "get_session",
     "get_space_runtime",
     "get_space_variables",
-    "get_tf_storage_size",
     "get_token",
-    "get_token_permission",
     "get_torch_storage_id",
     "get_torch_storage_size",
     "get_user_overview",
@@ -886,6 +873,7 @@
     "grant_access",
     "hf_hub_download",
     "hf_hub_url",
+    "hf_raise_for_status",
     "inspect_job",
     "inspect_scheduled_job",
     "interpreter_login",
@@ -932,7 +920,6 @@
     "permanently_delete_lfs_files",
     "preupload_lfs_files",
     "push_to_hub_fastai",
-    "push_to_hub_keras",
     "read_dduf_file",
     "reject_access_request",
     "rename_discussion",
@@ -948,16 +935,16 @@
     "run_as_future",
     "run_job",
     "run_uv_job",
-    "save_pretrained_keras",
     "save_torch_model",
     "save_torch_state_dict",
     "scale_to_zero_inference_endpoint",
     "scan_cache_dir",
+    "set_async_client_factory",
+    "set_client_factory",
     "set_space_sleep_time",
     "snapshot_download",
     "space_info",
     "split_state_dict_into_shards_factory",
-    "split_tf_state_dict_into_shards",
     "split_torch_state_dict_into_shards",
     "super_squash_history",
     "suspend_scheduled_job",
@@ -967,7 +954,6 @@
     "update_collection_metadata",
     "update_inference_endpoint",
     "update_repo_settings",
-    "update_repo_visibility",
     "update_webhook",
     "upload_file",
     "upload_folder",
@@ -1159,6 +1145,7 @@ def __dir__():
     )
     from .file_download import (
         _CACHED_NO_EXIST,  # noqa: F401
+        DryRunFileInfo,  # noqa: F401
         HfFileMetadata,  # noqa: F401
         get_hf_file_metadata,  # noqa: F401
         hf_hub_download,  # noqa: F401
@@ -1237,7 +1224,6 @@ def __dir__():
         get_safetensors_metadata,  # noqa: F401
         get_space_runtime,  # noqa: F401
         get_space_variables,  # noqa: F401
-        get_token_permission,  # noqa: F401
         get_user_overview,  # noqa: F401
         get_webhook,  # noqa: F401
         grant_access,  # noqa: F401
@@ -1298,7 +1284,6 @@ def __dir__():
         update_collection_metadata,  # noqa: F401
         update_inference_endpoint,  # noqa: F401
         update_repo_settings,  # noqa: F401
-        update_repo_visibility,  # noqa: F401
         update_webhook,  # noqa: F401
         upload_file,  # noqa: F401
         upload_folder,  # noqa: F401
@@ -1485,13 +1470,6 @@ def __dir__():
     )
     from .inference._mcp.agent import Agent  # noqa: F401
     from .inference._mcp.mcp_client import MCPClient  # noqa: F401
-    from .inference_api import InferenceApi  # noqa: F401
-    from .keras_mixin import (
-        KerasModelHubMixin,  # noqa: F401
-        from_pretrained_keras,  # noqa: F401
-        push_to_hub_keras,  # noqa: F401
-        save_pretrained_keras,  # noqa: F401
-    )
     from .repocard import (
         DatasetCard,  # noqa: F401
         ModelCard,  # noqa: F401
@@ -1509,10 +1487,8 @@ def __dir__():
         ModelCardData,  # noqa: F401
         SpaceCardData,  # noqa: F401
     )
-    from .repository import Repository  # noqa: F401
     from .serialization import (
         StateDictSplit,  # noqa: F401
-        get_tf_storage_size,  # noqa: F401
         get_torch_storage_id,  # noqa: F401
         get_torch_storage_size,  # noqa: F401
         load_state_dict_from_file,  # noqa: F401
@@ -1520,7 +1496,6 @@ def __dir__():
         save_torch_model,  # noqa: F401
         save_torch_state_dict,  # noqa: F401
         split_state_dict_into_shards_factory,  # noqa: F401
-        split_tf_state_dict_into_shards,  # noqa: F401
         split_torch_state_dict_into_shards,  # noqa: F401
     )
     from .serialization._dduf import (
@@ -1530,6 +1505,8 @@ def __dir__():
         read_dduf_file,  # noqa: F401
     )
     from .utils import (
+        ASYNC_CLIENT_FACTORY_T,  # noqa: F401
+        CLIENT_FACTORY_T,  # noqa: F401
         CachedFileInfo,  # noqa: F401
         CachedRepoInfo,  # noqa: F401
         CachedRevisionInfo,  # noqa: F401
@@ -1537,12 +1514,15 @@ def __dir__():
         CorruptedCacheException,  # noqa: F401
         DeleteCacheStrategy,  # noqa: F401
         HFCacheInfo,  # noqa: F401
-        HfFolder,  # noqa: F401
         cached_assets_path,  # noqa: F401
-        configure_http_backend,  # noqa: F401
+        close_session,  # noqa: F401
         dump_environment_info,  # noqa: F401
+        get_async_session,  # noqa: F401
         get_session,  # noqa: F401
         get_token,  # noqa: F401
+        hf_raise_for_status,  # noqa: F401
         logging,  # noqa: F401
         scan_cache_dir,  # noqa: F401
+        set_async_client_factory,  # noqa: F401
+        set_client_factory,  # noqa: F401
     )
diff --git a/src/huggingface_hub/_commit_api.py b/src/huggingface_hub/_commit_api.py
index 7ed64b0e5e..ecd7e0a2b5 100644
--- a/src/huggingface_hub/_commit_api.py
+++ b/src/huggingface_hub/_commit_api.py
@@ -11,7 +11,7 @@
 from dataclasses import dataclass, field
 from itertools import groupby
 from pathlib import Path, PurePosixPath
-from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Iterator, Literal, Optional, Union
 
 from tqdm.contrib.concurrent import thread_map
 
@@ -236,7 +236,7 @@ def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]:
         config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
 
         >>> with operation.as_file(with_tqdm=True) as file:
-        ...     requests.put(..., data=file)
+        ...     httpx.put(..., data=file)
         config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
         ```
         """
@@ -307,7 +307,7 @@ def _validate_path_in_repo(path_in_repo: str) -> str:
 CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
 
 
-def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
+def _warn_on_overwriting_operations(operations: list[CommitOperation]) -> None:
     """
     Warn user when a list of operations is expected to overwrite itself in a single
     commit.
@@ -322,7 +322,7 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
       delete before upload) but can happen if a user deletes an entire folder and then
       add new files to it.
     """
-    nb_additions_per_path: Dict[str, int] = defaultdict(int)
+    nb_additions_per_path: dict[str, int] = defaultdict(int)
     for operation in operations:
         path_in_repo = operation.path_in_repo
         if isinstance(operation, CommitOperationAdd):
@@ -356,10 +356,10 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
 @validate_hf_hub_args
 def _upload_files(
     *,
-    additions: List[CommitOperationAdd],
+    additions: list[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     endpoint: Optional[str] = None,
     num_threads: int = 5,
     revision: Optional[str] = None,
@@ -368,14 +368,14 @@ def _upload_files(
     """
     Negotiates per-file transfer (LFS vs Xet) and uploads in batches.
     """
-    xet_additions: List[CommitOperationAdd] = []
-    lfs_actions: List[Dict] = []
-    lfs_oid2addop: Dict[str, CommitOperationAdd] = {}
+    xet_additions: list[CommitOperationAdd] = []
+    lfs_actions: list[dict[str, Any]] = []
+    lfs_oid2addop: dict[str, CommitOperationAdd] = {}
 
     for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
         chunk_list = [op for op in chunk]
 
-        transfers: List[str] = ["basic", "multipart"]
+        transfers: list[str] = ["basic", "multipart"]
         has_buffered_io_data = any(isinstance(op.path_or_fileobj, io.BufferedIOBase) for op in chunk_list)
         if is_xet_available():
             if not has_buffered_io_data:
@@ -438,9 +438,9 @@ def _upload_files(
 @validate_hf_hub_args
 def _upload_lfs_files(
     *,
-    actions: List[Dict],
-    oid2addop: Dict[str, CommitOperationAdd],
-    headers: Dict[str, str],
+    actions: list[dict[str, Any]],
+    oid2addop: dict[str, CommitOperationAdd],
+    headers: dict[str, str],
     endpoint: Optional[str] = None,
     num_threads: int = 5,
 ):
@@ -451,11 +451,11 @@ def _upload_lfs_files(
         - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
 
     Args:
-        actions (`List[Dict]`):
+        actions (`list[dict[str, Any]]`):
             LFS batch actions returned by the server.
-        oid2addop (`Dict[str, CommitOperationAdd]`):
+        oid2addop (`dict[str, CommitOperationAdd]`):
             A dictionary mapping the OID of the file to the corresponding `CommitOperationAdd` object.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         endpoint (`str`, *optional*):
             The endpoint to use for the request. Defaults to `constants.ENDPOINT`.
@@ -470,7 +470,7 @@ def _upload_lfs_files(
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         num_threads (`int`, *optional*):
             The number of concurrent threads to use when uploading. Defaults to 5.
@@ -482,7 +482,7 @@ def _upload_lfs_files(
             If an upload failed for any reason
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If the server returns malformed responses
-        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+        [`HfHubHTTPError`]
             If the LFS batch endpoint returned an HTTP error.
     """
     # Filter out files already present upstream
@@ -526,10 +526,10 @@ def _wrapped_lfs_upload(batch_action) -> None:
 @validate_hf_hub_args
 def _upload_xet_files(
     *,
-    additions: List[CommitOperationAdd],
+    additions: list[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     endpoint: Optional[str] = None,
     revision: Optional[str] = None,
     create_pr: Optional[bool] = None,
@@ -539,14 +539,14 @@ def _upload_xet_files(
     This chunks the files and deduplicates the chunks before uploading them to xetcas storage.
 
     Args:
-        additions (`List` of `CommitOperationAdd`):
+        additions (`` of `CommitOperationAdd`):
             The files to be uploaded.
         repo_type (`str`):
             Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         endpoint: (`str`, *optional*):
             The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`.
@@ -560,7 +560,7 @@ def _upload_xet_files(
             If an upload failed for any reason.
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If the server returns malformed responses or if the user is unauthorized to upload to xet storage.
-        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+        [`HfHubHTTPError`]
             If the LFS batch endpoint returned an HTTP error.
 
     **How it works:**
@@ -615,7 +615,7 @@ def _upload_xet_files(
     xet_endpoint = xet_connection_info.endpoint
     access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch)
 
-    def token_refresher() -> Tuple[str, int]:
+    def token_refresher() -> tuple[str, int]:
         new_xet_connection = fetch_xet_connection_info_from_repo_info(
             token_type=XetTokenType.WRITE,
             repo_id=repo_id,
@@ -688,7 +688,7 @@ def _fetch_upload_modes(
     additions: Iterable[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     revision: str,
     endpoint: Optional[str] = None,
     create_pr: bool = False,
@@ -707,7 +707,7 @@ def _fetch_upload_modes(
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         revision (`str`):
             The git revision to upload the files to. Can be any valid git revision.
@@ -725,12 +725,12 @@ def _fetch_upload_modes(
     endpoint = endpoint if endpoint is not None else constants.ENDPOINT
 
     # Fetch upload mode (LFS or regular) chunk by chunk.
-    upload_modes: Dict[str, UploadMode] = {}
-    should_ignore_info: Dict[str, bool] = {}
-    oid_info: Dict[str, Optional[str]] = {}
+    upload_modes: dict[str, UploadMode] = {}
+    should_ignore_info: dict[str, bool] = {}
+    oid_info: dict[str, Optional[str]] = {}
 
     for chunk in chunk_iterable(additions, 256):
-        payload: Dict = {
+        payload: dict = {
             "files": [
                 {
                     "path": op.path_in_repo,
@@ -773,10 +773,10 @@ def _fetch_files_to_copy(
     copies: Iterable[CommitOperationCopy],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     revision: str,
     endpoint: Optional[str] = None,
-) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
+) -> dict[tuple[str, Optional[str]], Union["RepoFile", bytes]]:
     """
     Fetch information about the files to copy.
 
@@ -792,12 +792,12 @@ def _fetch_files_to_copy(
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         revision (`str`):
             The git revision to upload the files to. Can be any valid git revision.
 
-    Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
+    Returns: `dict[tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
         Key is the file path and revision of the file to copy.
         Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
 
@@ -810,9 +810,9 @@ def _fetch_files_to_copy(
     from .hf_api import HfApi, RepoFolder
 
     hf_api = HfApi(endpoint=endpoint, headers=headers)
-    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
+    files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
     # Store (path, revision) -> oid mapping
-    oid_info: Dict[Tuple[str, Optional[str]], Optional[str]] = {}
+    oid_info: dict[tuple[str, Optional[str]], Optional[str]] = {}
     # 1. Fetch OIDs for destination paths in batches.
     dest_paths = [op.path_in_repo for op in copies]
     for offset in range(0, len(dest_paths), FETCH_LFS_BATCH_SIZE):
@@ -872,11 +872,11 @@ def _fetch_files_to_copy(
 
 def _prepare_commit_payload(
     operations: Iterable[CommitOperation],
-    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
+    files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]],
     commit_message: str,
     commit_description: Optional[str] = None,
     parent_commit: Optional[str] = None,
-) -> Iterable[Dict[str, Any]]:
+) -> Iterable[dict[str, Any]]:
     """
     Builds the payload to POST to the `/commit` API of the Hub.
 
diff --git a/src/huggingface_hub/_commit_scheduler.py b/src/huggingface_hub/_commit_scheduler.py
index 1bc8db6a8a..497c9a0be5 100644
--- a/src/huggingface_hub/_commit_scheduler.py
+++ b/src/huggingface_hub/_commit_scheduler.py
@@ -7,7 +7,7 @@
 from io import SEEK_END, SEEK_SET, BytesIO
 from pathlib import Path
 from threading import Lock, Thread
-from typing import Dict, List, Optional, Union
+from typing import Optional, Union
 
 from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi
 from .utils import filter_repo_objects
@@ -53,9 +53,9 @@ class CommitScheduler:
             Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
         token (`str`, *optional*):
             The token to use to commit to the repo. Defaults to the token saved on the machine.
-        allow_patterns (`List[str]` or `str`, *optional*):
+        allow_patterns (`list[str]` or `str`, *optional*):
             If provided, only files matching at least one pattern are uploaded.
-        ignore_patterns (`List[str]` or `str`, *optional*):
+        ignore_patterns (`list[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not uploaded.
         squash_history (`bool`, *optional*):
             Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
@@ -108,8 +108,8 @@ def __init__(
         revision: Optional[str] = None,
         private: Optional[bool] = None,
         token: Optional[str] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
         squash_history: bool = False,
         hf_api: Optional["HfApi"] = None,
     ) -> None:
@@ -138,7 +138,7 @@ def __init__(
         self.token = token
 
         # Keep track of already uploaded files
-        self.last_uploaded: Dict[Path, float] = {}  # key is local path, value is timestamp
+        self.last_uploaded: dict[Path, float] = {}  # key is local path, value is timestamp
 
         # Scheduler
         if not every > 0:
@@ -229,7 +229,7 @@ def push_to_hub(self) -> Optional[CommitInfo]:
             prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
 
             # Filter with pattern + filter out unchanged files + retrieve current file size
-            files_to_upload: List[_FileToUpload] = []
+            files_to_upload: list[_FileToUpload] = []
             for relpath in filter_repo_objects(
                 relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns
             ):
@@ -312,10 +312,13 @@ def __len__(self) -> int:
         return self._size_limit
 
     def __getattribute__(self, name: str):
-        if name.startswith("_") or name in ("read", "tell", "seek"):  # only 3 public methods supported
+        if name.startswith("_") or name in ("read", "tell", "seek", "fileno"):  # only 4 public methods supported
             return super().__getattribute__(name)
         raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
 
+    def fileno(self):
+        raise AttributeError("PartialFileIO does not have a fileno.")
+
     def tell(self) -> int:
         """Return the current file position."""
         return self._file.tell()
diff --git a/src/huggingface_hub/_inference_endpoints.py b/src/huggingface_hub/_inference_endpoints.py
index 37f772bfbe..4422cac7c3 100644
--- a/src/huggingface_hub/_inference_endpoints.py
+++ b/src/huggingface_hub/_inference_endpoints.py
@@ -2,7 +2,7 @@
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, Optional, Union
+from typing import TYPE_CHECKING, Optional, Union
 
 from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
 
@@ -62,7 +62,7 @@ class InferenceEndpoint:
             The timestamp of the last update of the Inference Endpoint.
         type ([`InferenceEndpointType`]):
             The type of the Inference Endpoint (public, protected, private).
-        raw (`Dict`):
+        raw (`dict`):
             The raw dictionary data returned from the API.
         token (`str` or `bool`, *optional*):
             Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
@@ -112,7 +112,7 @@ class InferenceEndpoint:
     type: InferenceEndpointType = field(repr=False, init=False)
 
     # Raw dict from the API
-    raw: Dict = field(repr=False)
+    raw: dict = field(repr=False)
 
     # Internal fields
     _token: Union[str, bool, None] = field(repr=False, compare=False)
@@ -120,7 +120,7 @@ class InferenceEndpoint:
 
     @classmethod
     def from_raw(
-        cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
+        cls, raw: dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
     ) -> "InferenceEndpoint":
         """Initialize object from raw dictionary."""
         if api is None:
@@ -260,8 +260,8 @@ def update(
         framework: Optional[str] = None,
         revision: Optional[str] = None,
         task: Optional[str] = None,
-        custom_image: Optional[Dict] = None,
-        secrets: Optional[Dict[str, str]] = None,
+        custom_image: Optional[dict] = None,
+        secrets: Optional[dict[str, str]] = None,
     ) -> "InferenceEndpoint":
         """Update the Inference Endpoint.
 
@@ -293,10 +293,10 @@ def update(
                 The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
             task (`str`, *optional*):
                 The task on which to deploy the model (e.g. `"text-classification"`).
-            custom_image (`Dict`, *optional*):
+            custom_image (`dict`, *optional*):
                 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
                 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
-            secrets (`Dict[str, str]`, *optional*):
+            secrets (`dict[str, str]`, *optional*):
                 Secret values to inject in the container environment.
         Returns:
             [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
diff --git a/src/huggingface_hub/_jobs_api.py b/src/huggingface_hub/_jobs_api.py
index 623fd9dc9d..c85324ce1c 100644
--- a/src/huggingface_hub/_jobs_api.py
+++ b/src/huggingface_hub/_jobs_api.py
@@ -15,7 +15,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub import constants
 from huggingface_hub._space_api import SpaceHardware
@@ -71,13 +71,13 @@ class JobInfo:
         space_id (`str` or `None`):
             The Docker image from Hugging Face Spaces used for the Job.
             Can be None if docker_image is present instead.
-        command (`List[str]` or `None`):
+        command (`list[str]` or `None`):
             Command of the Job, e.g. `["python", "-c", "print('hello world')"]`
-        arguments (`List[str]` or `None`):
+        arguments (`list[str]` or `None`):
             Arguments passed to the command
-        environment (`Dict[str]` or `None`):
+        environment (`dict[str]` or `None`):
             Environment variables of the Job as a dictionary.
-        secrets (`Dict[str]` or `None`):
+        secrets (`dict[str]` or `None`):
             Secret environment variables of the Job (encrypted).
         flavor (`str` or `None`):
             Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
@@ -111,10 +111,10 @@ class JobInfo:
     created_at: Optional[datetime]
     docker_image: Optional[str]
     space_id: Optional[str]
-    command: Optional[List[str]]
-    arguments: Optional[List[str]]
-    environment: Optional[Dict[str, Any]]
-    secrets: Optional[Dict[str, Any]]
+    command: Optional[list[str]]
+    arguments: Optional[list[str]]
+    environment: Optional[dict[str, Any]]
+    secrets: Optional[dict[str, Any]]
     flavor: Optional[SpaceHardware]
     status: JobStatus
     owner: JobOwner
@@ -148,13 +148,13 @@ def __init__(self, **kwargs) -> None:
 class JobSpec:
     docker_image: Optional[str]
     space_id: Optional[str]
-    command: Optional[List[str]]
-    arguments: Optional[List[str]]
-    environment: Optional[Dict[str, Any]]
-    secrets: Optional[Dict[str, Any]]
+    command: Optional[list[str]]
+    arguments: Optional[list[str]]
+    environment: Optional[dict[str, Any]]
+    secrets: Optional[dict[str, Any]]
     flavor: Optional[SpaceHardware]
     timeout: Optional[int]
-    tags: Optional[List[str]]
+    tags: Optional[list[str]]
     arch: Optional[str]
 
     def __init__(self, **kwargs) -> None:
@@ -202,7 +202,7 @@ class ScheduledJobInfo:
             Scheduled Job ID.
         created_at (`datetime` or `None`):
             When the scheduled Job was created.
-        tags (`List[str]` or `None`):
+        tags (`list[str]` or `None`):
             The tags of the scheduled Job.
         schedule (`str` or `None`):
             One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
@@ -263,14 +263,14 @@ def __init__(self, **kwargs) -> None:
 def _create_job_spec(
     *,
     image: str,
-    command: List[str],
-    env: Optional[Dict[str, Any]],
-    secrets: Optional[Dict[str, Any]],
+    command: list[str],
+    env: Optional[dict[str, Any]],
+    secrets: Optional[dict[str, Any]],
     flavor: Optional[SpaceHardware],
     timeout: Optional[Union[int, float, str]],
-) -> Dict[str, Any]:
+) -> dict[str, Any]:
     # prepare job spec to send to HF Jobs API
-    job_spec: Dict[str, Any] = {
+    job_spec: dict[str, Any] = {
         "command": command,
         "arguments": [],
         "environment": env or {},
diff --git a/src/huggingface_hub/_login.py b/src/huggingface_hub/_login.py
index 8f721b6834..7700ab5b23 100644
--- a/src/huggingface_hub/_login.py
+++ b/src/huggingface_hub/_login.py
@@ -20,8 +20,8 @@
 from typing import Optional
 
 from . import constants
-from .commands._cli_utils import ANSI
 from .utils import (
+    ANSI,
     capture_output,
     get_token,
     is_google_colab,
@@ -41,7 +41,6 @@
     _save_token,
     get_stored_tokens,
 )
-from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args
 
 
 logger = logging.get_logger(__name__)
@@ -55,18 +54,11 @@
 """
 
 
-@_deprecate_arguments(
-    version="1.0",
-    deprecated_args="write_permission",
-    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
-)
-@_deprecate_positional_args(version="1.0")
 def login(
     token: Optional[str] = None,
     *,
     add_to_git_credential: bool = False,
-    new_session: bool = True,
-    write_permission: bool = False,
+    skip_if_logged_in: bool = False,
 ) -> None:
     """Login the machine to access the Hub.
 
@@ -96,10 +88,8 @@ def login(
             is configured, a warning will be displayed to the user. If `token` is `None`,
             the value of `add_to_git_credential` is ignored and will be prompted again
             to the end user.
-        new_session (`bool`, defaults to `True`):
-            If `True`, will request a token even if one is already saved on the machine.
-        write_permission (`bool`):
-            Ignored and deprecated argument.
+        skip_if_logged_in (`bool`, defaults to `False`):
+            If `True`, do not prompt for token if user is already logged in.
     Raises:
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If an organization token is passed. Only personal account tokens are valid
@@ -119,9 +109,9 @@ def login(
             )
         _login(token, add_to_git_credential=add_to_git_credential)
     elif is_notebook():
-        notebook_login(new_session=new_session)
+        notebook_login(skip_if_logged_in=skip_if_logged_in)
     else:
-        interpreter_login(new_session=new_session)
+        interpreter_login(skip_if_logged_in=skip_if_logged_in)
 
 
 def logout(token_name: Optional[str] = None) -> None:
@@ -236,13 +226,7 @@ def auth_list() -> None:
 ###
 
 
-@_deprecate_arguments(
-    version="1.0",
-    deprecated_args="write_permission",
-    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
-)
-@_deprecate_positional_args(version="1.0")
-def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None:
+def interpreter_login(*, skip_if_logged_in: bool = False) -> None:
     """
     Displays a prompt to log in to the HF website and store the token.
 
@@ -253,16 +237,14 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
     For more details, see [`login`].
 
     Args:
-        new_session (`bool`, defaults to `True`):
-            If `True`, will request a token even if one is already saved on the machine.
-        write_permission (`bool`):
-            Ignored and deprecated argument.
+        skip_if_logged_in (`bool`, defaults to `False`):
+            If `True`, do not prompt for token if user is already logged in.
     """
-    if not new_session and get_token() is not None:
+    if not skip_if_logged_in and get_token() is not None:
         logger.info("User is already logged in.")
         return
 
-    from .commands.delete_cache import _ask_for_confirmation_no_tui
+    from .cli.cache import _ask_for_confirmation_no_tui
 
     print(_HF_LOGO_ASCII)
     if get_token() is not None:
@@ -308,13 +290,7 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
 notebooks. </center>"""
 
 
-@_deprecate_arguments(
-    version="1.0",
-    deprecated_args="write_permission",
-    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
-)
-@_deprecate_positional_args(version="1.0")
-def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None:
+def notebook_login(*, skip_if_logged_in: bool = False) -> None:
     """
     Displays a widget to log in to the HF website and store the token.
 
@@ -325,10 +301,8 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
     For more details, see [`login`].
 
     Args:
-        new_session (`bool`, defaults to `True`):
-            If `True`, will request a token even if one is already saved on the machine.
-        write_permission (`bool`):
-            Ignored and deprecated argument.
+        skip_if_logged_in (`bool`, defaults to `False`):
+            If `True`, do not prompt for token if user is already logged in.
     """
     try:
         import ipywidgets.widgets as widgets  # type: ignore
@@ -338,7 +312,7 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
             "The `notebook_login` function can only be used in a notebook (Jupyter or"
             " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`."
         )
-    if not new_session and get_token() is not None:
+    if not skip_if_logged_in and get_token() is not None:
         logger.info("User is already logged in.")
         return
 
diff --git a/src/huggingface_hub/_oauth.py b/src/huggingface_hub/_oauth.py
index 9f8eb60796..7bdfa6a058 100644
--- a/src/huggingface_hub/_oauth.py
+++ b/src/huggingface_hub/_oauth.py
@@ -6,7 +6,7 @@
 import urllib.parse
 import warnings
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Literal, Optional, Union
 
 from . import constants
 from .hf_api import whoami
@@ -39,7 +39,7 @@ class OAuthOrgInfo:
             Whether the org has a payment method set up. Hugging Face field.
         role_in_org (`Optional[str]`, *optional*):
             The user's role in the org. Hugging Face field.
-        security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
+        security_restrictions (`Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
             Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
     """
 
@@ -50,7 +50,7 @@ class OAuthOrgInfo:
     is_enterprise: bool
     can_pay: Optional[bool] = None
     role_in_org: Optional[str] = None
-    security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None
+    security_restrictions: Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]] = None
 
 
 @dataclass
@@ -79,7 +79,7 @@ class OAuthUserInfo:
             Whether the user is a pro user. Hugging Face field.
         can_pay (`Optional[bool]`, *optional*):
             Whether the user has a payment method set up. Hugging Face field.
-        orgs (`Optional[List[OrgInfo]]`, *optional*):
+        orgs (`Optional[list[OrgInfo]]`, *optional*):
             List of organizations the user is part of. Hugging Face field.
     """
 
@@ -93,7 +93,7 @@ class OAuthUserInfo:
     website: Optional[str]
     is_pro: bool
     can_pay: Optional[bool]
-    orgs: Optional[List[OAuthOrgInfo]]
+    orgs: Optional[list[OAuthOrgInfo]]
 
 
 @dataclass
@@ -306,7 +306,7 @@ async def oauth_redirect_callback(request: fastapi.Request) -> RedirectResponse:
             target_url = request.query_params.get("_target_url")
 
             # Build redirect URI with the same query params as before and bump nb_redirects count
-            query_params: Dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1}
+            query_params: dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1}
             if target_url:
                 query_params["_target_url"] = target_url
 
@@ -406,7 +406,7 @@ def _get_redirect_target(request: "fastapi.Request", default_target: str = "/")
     return request.query_params.get("_target_url", default_target)
 
 
-def _get_mocked_oauth_info() -> Dict:
+def _get_mocked_oauth_info() -> dict:
     token = get_token()
     if token is None:
         raise ValueError(
@@ -449,7 +449,7 @@ def _get_mocked_oauth_info() -> Dict:
     }
 
 
-def _get_oauth_uris(route_prefix: str = "/") -> Tuple[str, str, str]:
+def _get_oauth_uris(route_prefix: str = "/") -> tuple[str, str, str]:
     route_prefix = route_prefix.strip("/")
     if route_prefix:
         route_prefix = f"/{route_prefix}"
diff --git a/src/huggingface_hub/_snapshot_download.py b/src/huggingface_hub/_snapshot_download.py
index 0db8a29f7e..9b5d5cfbff 100644
--- a/src/huggingface_hub/_snapshot_download.py
+++ b/src/huggingface_hub/_snapshot_download.py
@@ -1,20 +1,21 @@
 import os
 from pathlib import Path
-from typing import Dict, Iterable, List, Literal, Optional, Type, Union
+from typing import Iterable, List, Literal, Optional, Union, overload
 
-import requests
+import httpx
 from tqdm.auto import tqdm as base_tqdm
 from tqdm.contrib.concurrent import thread_map
 
 from . import constants
 from .errors import (
+    DryRunError,
     GatedRepoError,
     HfHubHTTPError,
     LocalEntryNotFoundError,
     RepositoryNotFoundError,
     RevisionNotFoundError,
 )
-from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
+from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name
 from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo
 from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
 from .utils import tqdm as hf_tqdm
@@ -25,6 +26,81 @@
 VERY_LARGE_REPO_THRESHOLD = 50000  # After this limit, we don't consider `repo_info.siblings` to be reliable enough
 
 
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[False] = False,
+) -> str: ...
+
+
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[True] = True,
+) -> list[DryRunFileInfo]: ...
+
+
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: bool = False,
+) -> Union[str, list[DryRunFileInfo]]: ...
+
+
 @validate_hf_hub_args
 def snapshot_download(
     repo_id: str,
@@ -35,22 +111,19 @@ def snapshot_download(
     local_dir: Union[str, Path, None] = None,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Optional[Union[Dict, str]] = None,
-    proxies: Optional[Dict] = None,
+    user_agent: Optional[Union[dict, str]] = None,
     etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
     force_download: bool = False,
     token: Optional[Union[bool, str]] = None,
     local_files_only: bool = False,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
     max_workers: int = 8,
-    tqdm_class: Optional[Type[base_tqdm]] = None,
-    headers: Optional[Dict[str, str]] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
     endpoint: Optional[str] = None,
-    # Deprecated args
-    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
-    resume_download: Optional[bool] = None,
-) -> str:
+    dry_run: bool = False,
+) -> Union[str, list[DryRunFileInfo]]:
     """Download repo files.
 
     Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
@@ -85,12 +158,9 @@ def snapshot_download(
             The version of the library.
         user_agent (`str`, `dict`, *optional*):
             The user-agent info in the form of a dictionary or a string.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to
-            `requests.request`.
         etag_timeout (`float`, *optional*, defaults to `10`):
             When fetching ETag, how many seconds to wait for the server to send
-            data before giving up which is passed to `requests.request`.
+            data before giving up which is passed to `httpx.request`.
         force_download (`bool`, *optional*, defaults to `False`):
             Whether the file should be downloaded even if it already exists in the local cache.
         token (`str`, `bool`, *optional*):
@@ -103,9 +173,9 @@ def snapshot_download(
         local_files_only (`bool`, *optional*, defaults to `False`):
             If `True`, avoid downloading the file and return the path to the
             local cached file if it exists.
-        allow_patterns (`List[str]` or `str`, *optional*):
+        allow_patterns (`list[str]` or `str`, *optional*):
             If provided, only files matching at least one pattern are downloaded.
-        ignore_patterns (`List[str]` or `str`, *optional*):
+        ignore_patterns (`list[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not downloaded.
         max_workers (`int`, *optional*):
             Number of concurrent threads to download files (1 thread = 1 file download).
@@ -116,9 +186,14 @@ def snapshot_download(
             Note that the `tqdm_class` is not passed to each individual download.
             Defaults to the custom HF progress bar that can be disabled by setting
             `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
+        dry_run (`bool`, *optional*, defaults to `False`):
+            If `True`, perform a dry run without actually downloading the files. Returns a list of
+            [`DryRunFileInfo`] objects containing information about what would be downloaded.
 
     Returns:
-        `str`: folder path of the repo snapshot.
+        `str` or list of [`DryRunFileInfo`]:
+            - If `dry_run=False`: Local snapshot path.
+            - If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information.
 
     Raises:
         [`~utils.RepositoryNotFoundError`]
@@ -163,14 +238,10 @@ def snapshot_download(
         try:
             # if we have internet connection we want to list files to download
             repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision)
-        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
-            # Actually raise for those subclasses of ConnectionError
+        except httpx.ProxyError:
+            # Actually raise on proxy error
             raise
-        except (
-            requests.exceptions.ConnectionError,
-            requests.exceptions.Timeout,
-            OfflineModeIsEnabled,
-        ) as error:
+        except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
             # Internet connection is down
             # => will try to use local files only
             api_call_error = error
@@ -178,7 +249,7 @@ def snapshot_download(
         except RevisionNotFoundError:
             # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
             raise
-        except requests.HTTPError as error:
+        except HfHubHTTPError as error:
             # Multiple reasons for an http error:
             # - Repository is private and invalid/missing token sent
             # - Repository is gated and invalid/missing token sent
@@ -198,6 +269,11 @@ def snapshot_download(
     #    - f the specified revision is a branch or tag, look inside "refs".
     # => if local_dir is not None, we will return the path to the local folder if it exists.
     if repo_info is None:
+        if dry_run:
+            raise DryRunError(
+                "Dry run cannot be performed as the repository cannot be accessed. Please check your internet connection or authentication token."
+            ) from api_call_error
+
         # Try to get which commit hash corresponds to the specified revision
         commit_hash = None
         if REGEX_COMMIT_HASH.match(revision):
@@ -284,6 +360,8 @@ def snapshot_download(
         tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
     else:
         tqdm_desc = "Fetching ... files"
+    if dry_run:
+        tqdm_desc = "[dry-run] " + tqdm_desc
 
     commit_hash = repo_info.sha
     snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
@@ -299,31 +377,33 @@ def snapshot_download(
         except OSError as e:
             logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.")
 
+    results: List[Union[str, DryRunFileInfo]] = []
+
     # we pass the commit_hash to hf_hub_download
     # so no network call happens if we already
     # have the file locally.
-    def _inner_hf_hub_download(repo_file: str):
-        return hf_hub_download(
-            repo_id,
-            filename=repo_file,
-            repo_type=repo_type,
-            revision=commit_hash,
-            endpoint=endpoint,
-            cache_dir=cache_dir,
-            local_dir=local_dir,
-            local_dir_use_symlinks=local_dir_use_symlinks,
-            library_name=library_name,
-            library_version=library_version,
-            user_agent=user_agent,
-            proxies=proxies,
-            etag_timeout=etag_timeout,
-            resume_download=resume_download,
-            force_download=force_download,
-            token=token,
-            headers=headers,
+    def _inner_hf_hub_download(repo_file: str) -> None:
+        results.append(
+            hf_hub_download(  # type: ignore[no-matching-overload] # ty not happy, don't know why :/
+                repo_id,
+                filename=repo_file,
+                repo_type=repo_type,
+                revision=commit_hash,
+                endpoint=endpoint,
+                cache_dir=cache_dir,
+                local_dir=local_dir,
+                library_name=library_name,
+                library_version=library_version,
+                user_agent=user_agent,
+                etag_timeout=etag_timeout,
+                force_download=force_download,
+                token=token,
+                headers=headers,
+                dry_run=dry_run,
+            )
         )
 
-    if constants.HF_HUB_ENABLE_HF_TRANSFER:
+    if constants.HF_HUB_ENABLE_HF_TRANSFER and not dry_run:
         # when using hf_transfer we don't want extra parallelism
         # from the one hf_transfer provides
         for file in filtered_repo_files:
@@ -338,6 +418,10 @@ def _inner_hf_hub_download(repo_file: str):
             tqdm_class=tqdm_class or hf_tqdm,
         )
 
+    if dry_run:
+        assert all(isinstance(r, DryRunFileInfo) for r in results)
+        return results  # type: ignore
+
     if local_dir is not None:
         return str(os.path.realpath(local_dir))
     return snapshot_folder
diff --git a/src/huggingface_hub/_space_api.py b/src/huggingface_hub/_space_api.py
index 05fccfbc1e..6dd7976329 100644
--- a/src/huggingface_hub/_space_api.py
+++ b/src/huggingface_hub/_space_api.py
@@ -15,7 +15,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
-from typing import Dict, Optional
+from typing import Optional
 
 from huggingface_hub.utils import parse_datetime
 
@@ -128,9 +128,9 @@ class SpaceRuntime:
     requested_hardware: Optional[SpaceHardware]
     sleep_time: Optional[int]
     storage: Optional[SpaceStorage]
-    raw: Dict
+    raw: dict
 
-    def __init__(self, data: Dict) -> None:
+    def __init__(self, data: dict) -> None:
         self.stage = data["stage"]
         self.hardware = data.get("hardware", {}).get("current")
         self.requested_hardware = data.get("hardware", {}).get("requested")
@@ -160,7 +160,7 @@ class SpaceVariable:
     description: Optional[str]
     updated_at: Optional[datetime]
 
-    def __init__(self, key: str, values: Dict) -> None:
+    def __init__(self, key: str, values: dict) -> None:
         self.key = key
         self.value = values["value"]
         self.description = values.get("description")
diff --git a/src/huggingface_hub/_tensorboard_logger.py b/src/huggingface_hub/_tensorboard_logger.py
index 4d9581d8ee..2783a25001 100644
--- a/src/huggingface_hub/_tensorboard_logger.py
+++ b/src/huggingface_hub/_tensorboard_logger.py
@@ -14,7 +14,7 @@
 """Contains a logger to push training logs to the Hub, using Tensorboard."""
 
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 from ._commit_scheduler import CommitScheduler
 from .errors import EntryNotFoundError
@@ -74,10 +74,10 @@ class HFSummaryWriter(_RuntimeSummaryWriter):
             Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
         path_in_repo (`str`, *optional*):
             The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
-        repo_allow_patterns (`List[str]` or `str`, *optional*):
+        repo_allow_patterns (`list[str]` or `str`, *optional*):
             A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
             [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
-        repo_ignore_patterns (`List[str]` or `str`, *optional*):
+        repo_ignore_patterns (`list[str]` or `str`, *optional*):
             A list of patterns to exclude in the upload. Check out the
             [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
         token (`str`, *optional*):
@@ -134,8 +134,8 @@ def __init__(
         repo_revision: Optional[str] = None,
         repo_private: Optional[bool] = None,
         path_in_repo: Optional[str] = "tensorboard",
-        repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*",
-        repo_ignore_patterns: Optional[Union[List[str], str]] = None,
+        repo_allow_patterns: Optional[Union[list[str], str]] = "*.tfevents.*",
+        repo_ignore_patterns: Optional[Union[list[str], str]] = None,
         token: Optional[str] = None,
         **kwargs,
     ):
diff --git a/src/huggingface_hub/_upload_large_folder.py b/src/huggingface_hub/_upload_large_folder.py
index 1ccbc07d39..083b62f544 100644
--- a/src/huggingface_hub/_upload_large_folder.py
+++ b/src/huggingface_hub/_upload_large_folder.py
@@ -24,15 +24,14 @@
 from datetime import datetime
 from pathlib import Path
 from threading import Lock
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
 from urllib.parse import quote
 
 from . import constants
 from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
 from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
 from .constants import DEFAULT_REVISION, REPO_TYPES
-from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
-from .utils._cache_manager import _format_size
+from .utils import DEFAULT_IGNORE_PATTERNS, _format_size, filter_repo_objects, tqdm
 from .utils._runtime import is_xet_available
 from .utils.sha import sha_fileobj
 
@@ -44,7 +43,7 @@
 
 WAITING_TIME_IF_NO_TASKS = 10  # seconds
 MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
-COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
+COMMIT_SIZE_SCALE: list[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
 
 UPLOAD_BATCH_SIZE_XET = 256  # Max 256 files per upload batch for XET-enabled repos
 UPLOAD_BATCH_SIZE_LFS = 1  # Otherwise, batches of 1 for regular LFS upload
@@ -56,7 +55,7 @@
 RECOMMENDED_FILE_SIZE_GB = 20  # Recommended maximum for individual file size
 
 
-def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None:
+def _validate_upload_limits(paths_list: list[LocalUploadFilePaths]) -> None:
     """
     Validate upload against repository limits and warn about potential issues.
 
@@ -85,7 +84,7 @@ def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None:
     # Track immediate children (files and subdirs) for each folder
     from collections import defaultdict
 
-    entries_per_folder: Dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
+    entries_per_folder: dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
 
     for paths in paths_list:
         path = Path(paths.path_in_repo)
@@ -160,8 +159,8 @@ def upload_large_folder_internal(
     repo_type: str,  # Repo type is required!
     revision: Optional[str] = None,
     private: Optional[bool] = None,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
     num_workers: Optional[int] = None,
     print_report: bool = True,
     print_report_every: int = 60,
@@ -284,13 +283,13 @@ class WorkerJob(enum.Enum):
     WAIT = enum.auto()  # if no tasks are available but we don't want to exit
 
 
-JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
+JOB_ITEM_T = tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
 
 
 class LargeUploadStatus:
     """Contains information, queues and tasks for a large upload process."""
 
-    def __init__(self, items: List[JOB_ITEM_T], upload_batch_size: int = 1):
+    def __init__(self, items: list[JOB_ITEM_T], upload_batch_size: int = 1):
         self.items = items
         self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
         self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
@@ -423,7 +422,7 @@ def _worker_job(
     Read `upload_large_folder` docstring for more information on how tasks are prioritized.
     """
     while True:
-        next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
+        next_job: Optional[tuple[WorkerJob, list[JOB_ITEM_T]]] = None
 
         # Determine next task
         next_job = _determine_next_job(status)
@@ -516,7 +515,7 @@ def _worker_job(
                 status.nb_workers_waiting -= 1
 
 
-def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
+def _determine_next_job(status: LargeUploadStatus) -> Optional[tuple[WorkerJob, list[JOB_ITEM_T]]]:
     with status.lock:
         # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
         if (
@@ -639,7 +638,7 @@ def _compute_sha256(item: JOB_ITEM_T) -> None:
     metadata.save(paths)
 
 
-def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+def _get_upload_mode(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
     """Get upload mode for each file and update metadata.
 
     Also receive info if the file should be ignored.
@@ -661,7 +660,7 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
         metadata.save(paths)
 
 
-def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+def _preupload_lfs(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
     """Preupload LFS files and update metadata."""
     additions = [_build_hacky_operation(item) for item in items]
     api.preupload_lfs_files(
@@ -676,7 +675,7 @@ def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_typ
         metadata.save(paths)
 
 
-def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+def _commit(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
     """Commit files to the repo."""
     additions = [_build_hacky_operation(item) for item in items]
     api.create_commit(
@@ -721,11 +720,11 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
 ####################
 
 
-def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> list[JOB_ITEM_T]:
     return [queue.get()]
 
 
-def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
+def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> list[JOB_ITEM_T]:
     return [queue.get() for _ in range(min(queue.qsize(), n))]
 
 
diff --git a/src/huggingface_hub/_webhooks_payload.py b/src/huggingface_hub/_webhooks_payload.py
index 288f4b08b9..90f12425cb 100644
--- a/src/huggingface_hub/_webhooks_payload.py
+++ b/src/huggingface_hub/_webhooks_payload.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 """Contains data structures to parse the webhooks payload."""
 
-from typing import List, Literal, Optional
+from typing import Literal, Optional
 
 from .utils import is_pydantic_available
 
@@ -116,7 +116,7 @@ class WebhookPayloadRepo(ObjectId):
     name: str
     private: bool
     subdomain: Optional[str] = None
-    tags: Optional[List[str]] = None
+    tags: Optional[list[str]] = None
     type: Literal["dataset", "model", "space"]
     url: WebhookPayloadUrl
 
@@ -134,4 +134,4 @@ class WebhookPayload(BaseModel):
     comment: Optional[WebhookPayloadComment] = None
     webhook: WebhookPayloadWebhook
     movedTo: Optional[WebhookPayloadMovedTo] = None
-    updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None
+    updatedRefs: Optional[list[WebhookPayloadUpdatedRef]] = None
diff --git a/src/huggingface_hub/_webhooks_server.py b/src/huggingface_hub/_webhooks_server.py
index b1a89c37cd..6c761fd977 100644
--- a/src/huggingface_hub/_webhooks_server.py
+++ b/src/huggingface_hub/_webhooks_server.py
@@ -18,7 +18,7 @@
 import inspect
 import os
 from functools import wraps
-from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
+from typing import TYPE_CHECKING, Any, Callable, Optional
 
 from .utils import experimental, is_fastapi_available, is_gradio_available
 
@@ -109,7 +109,7 @@ def __init__(
         self._ui = ui
 
         self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
-        self.registered_webhooks: Dict[str, Callable] = {}
+        self.registered_webhooks: dict[str, Callable] = {}
         _warn_on_empty_secret(self.webhook_secret)
 
     def add_webhook(self, path: Optional[str] = None) -> Callable:
diff --git a/src/huggingface_hub/cli/__init__.py b/src/huggingface_hub/cli/__init__.py
index 7a1a8d793b..8568c82be1 100644
--- a/src/huggingface_hub/cli/__init__.py
+++ b/src/huggingface_hub/cli/__init__.py
@@ -11,17 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from abc import ABC, abstractmethod
-from argparse import _SubParsersAction
-
-
-class BaseHuggingfaceCLICommand(ABC):
-    @staticmethod
-    @abstractmethod
-    def register_subcommand(parser: _SubParsersAction):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def run(self):
-        raise NotImplementedError()
diff --git a/src/huggingface_hub/cli/_cli_utils.py b/src/huggingface_hub/cli/_cli_utils.py
index bd56ad6896..de34f1973c 100644
--- a/src/huggingface_hub/cli/_cli_utils.py
+++ b/src/huggingface_hub/cli/_cli_utils.py
@@ -11,59 +11,163 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Contains a utility for good-looking prints."""
+"""Contains CLI utilities (styling, helpers)."""
 
+import importlib.metadata
 import os
-from typing import List, Union
+import time
+from enum import Enum
+from pathlib import Path
+from typing import TYPE_CHECKING, Annotated, Optional
 
+import click
+import typer
 
-class ANSI:
+from huggingface_hub import __version__, constants
+from huggingface_hub.utils import ANSI, get_session, hf_raise_for_status, installation_method, logging
+
+
+logger = logging.get_logger()
+
+
+if TYPE_CHECKING:
+    from huggingface_hub.hf_api import HfApi
+
+
+def get_hf_api(token: Optional[str] = None) -> "HfApi":
+    # Import here to avoid circular import
+    from huggingface_hub.hf_api import HfApi
+
+    return HfApi(token=token, library_name="hf", library_version=__version__)
+
+
+#### TYPER UTILS
+
+
+class AlphabeticalMixedGroup(typer.core.TyperGroup):
     """
-    Helper for en.wikipedia.org/wiki/ANSI_escape_code
+    Typer Group that lists commands and sub-apps mixed and alphabetically.
     """
 
-    _bold = "\u001b[1m"
-    _gray = "\u001b[90m"
-    _red = "\u001b[31m"
-    _reset = "\u001b[0m"
-    _yellow = "\u001b[33m"
+    def list_commands(self, ctx: click.Context) -> list[str]:  # type: ignore[name-defined]
+        # click.Group stores both commands and sub-groups in `self.commands`
+        return sorted(self.commands.keys())
+
+
+def typer_factory(help: str) -> typer.Typer:
+    return typer.Typer(
+        help=help,
+        add_completion=True,
+        no_args_is_help=True,
+        cls=AlphabeticalMixedGroup,
+        # Disable rich completely for consistent experience
+        rich_markup_mode=None,
+        rich_help_panel=None,
+        pretty_exceptions_enable=False,
+    )
+
 
-    @classmethod
-    def bold(cls, s: str) -> str:
-        return cls._format(s, cls._bold)
+class RepoType(str, Enum):
+    model = "model"
+    dataset = "dataset"
+    space = "space"
 
-    @classmethod
-    def gray(cls, s: str) -> str:
-        return cls._format(s, cls._gray)
 
-    @classmethod
-    def red(cls, s: str) -> str:
-        return cls._format(s, cls._bold + cls._red)
+RepoIdArg = Annotated[
+    str,
+    typer.Argument(
+        help="The ID of the repo (e.g. `username/repo-name`).",
+    ),
+]
 
-    @classmethod
-    def yellow(cls, s: str) -> str:
-        return cls._format(s, cls._yellow)
 
-    @classmethod
-    def _format(cls, s: str, code: str) -> str:
-        if os.environ.get("NO_COLOR"):
-            # See https://no-color.org/
-            return s
-        return f"{code}{s}{cls._reset}"
+RepoTypeOpt = Annotated[
+    RepoType,
+    typer.Option(
+        help="The type of repository (model, dataset, or space).",
+    ),
+]
 
+TokenOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="A User Access Token generated from https://huggingface.co/settings/tokens.",
+    ),
+]
 
-def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
+PrivateOpt = Annotated[
+    bool,
+    typer.Option(
+        help="Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists.",
+    ),
+]
+
+RevisionOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="Git revision id which can be a branch name, a tag, or a commit hash.",
+    ),
+]
+
+
+### PyPI VERSION CHECKER
+
+
+def check_cli_update() -> None:
     """
-    Inspired by:
+    Check whether a newer version of `huggingface_hub` is available on PyPI.
+
+    If a newer version is found, notify the user and suggest updating.
+    If current version is a pre-release (e.g. `1.0.0.rc1`), or a dev version (e.g. `1.0.0.dev1`), no check is performed.
 
-    - stackoverflow.com/a/8356620/593036
-    - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
+    This function is called at the entry point of the CLI. It only performs the check once every 24 hours, and any error
+    during the check is caught and logged, to avoid breaking the CLI.
     """
-    col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
-    row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
-    lines = []
-    lines.append(row_format.format(*headers))
-    lines.append(row_format.format(*["-" * w for w in col_widths]))
-    for row in rows:
-        lines.append(row_format.format(*row))
-    return "\n".join(lines)
+    try:
+        _check_cli_update()
+    except Exception:
+        # We don't want the CLI to fail on version checks, no matter the reason.
+        logger.debug("Error while checking for CLI update.", exc_info=True)
+
+
+def _check_cli_update() -> None:
+    current_version = importlib.metadata.version("huggingface_hub")
+
+    # Skip if current version is a pre-release or dev version
+    if any(tag in current_version for tag in ["rc", "dev"]):
+        return
+
+    # Skip if already checked in the last 24 hours
+    if os.path.exists(constants.CHECK_FOR_UPDATE_DONE_PATH):
+        mtime = os.path.getmtime(constants.CHECK_FOR_UPDATE_DONE_PATH)
+        if (time.time() - mtime) < 24 * 3600:
+            return
+
+    # Touch the file to mark that we did the check now
+    Path(constants.CHECK_FOR_UPDATE_DONE_PATH).touch()
+
+    # Check latest version from PyPI
+    response = get_session().get("https://pypi.org/pypi/huggingface_hub/json", timeout=2)
+    hf_raise_for_status(response)
+    data = response.json()
+    latest_version = data["info"]["version"]
+
+    # If latest version is different from current, notify user
+    if current_version != latest_version:
+        method = installation_method()
+        if method == "brew":
+            update_command = "brew upgrade huggingface-cli"
+        elif method == "hf_installer" and os.name == "nt":
+            update_command = 'powershell -NoProfile -Command "iwr -useb https://hf.co/cli/install.ps1 | iex"'
+        elif method == "hf_installer":
+            update_command = "curl -LsSf https://hf.co/cli/install.sh | sh -"
+        else:  # unknown => likely pip
+            update_command = "pip install -U huggingface_hub"
+
+        click.echo(
+            ANSI.yellow(
+                f"A new version of huggingface_hub ({latest_version}) is available! "
+                f"You are using version {current_version}.\n"
+                f"To update, run: {ANSI.bold(update_command)}\n",
+            )
+        )
diff --git a/src/huggingface_hub/cli/auth.py b/src/huggingface_hub/cli/auth.py
index bbf475a4f8..cb522c918c 100644
--- a/src/huggingface_hub/cli/auth.py
+++ b/src/huggingface_hub/cli/auth.py
@@ -30,18 +30,17 @@
     hf auth whoami
 """
 
-from argparse import _SubParsersAction
-from typing import List, Optional
+from typing import Annotated, Optional
 
-from requests.exceptions import HTTPError
+import typer
 
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
 from huggingface_hub.constants import ENDPOINT
-from huggingface_hub.hf_api import HfApi
+from huggingface_hub.errors import HfHubHTTPError
+from huggingface_hub.hf_api import whoami
 
 from .._login import auth_list, auth_switch, login, logout
-from ..utils import get_stored_tokens, get_token, logging
-from ._cli_utils import ANSI
+from ..utils import ANSI, get_stored_tokens, get_token, logging
+from ._cli_utils import TokenOpt, typer_factory
 
 
 logger = logging.get_logger(__name__)
@@ -55,125 +54,42 @@
     _inquirer_py_available = False
 
 
-class AuthCommands(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        # Create the main 'auth' command
-        auth_parser = parser.add_parser("auth", help="Manage authentication (login, logout, etc.).")
-        auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands")
-
-        # Show help if no subcommand is provided
-        auth_parser.set_defaults(func=lambda args: auth_parser.print_help())
-
-        # Add 'login' as a subcommand of 'auth'
-        login_parser = auth_subparsers.add_parser(
-            "login", help="Log in using a token from huggingface.co/settings/tokens"
-        )
-        login_parser.add_argument(
-            "--token",
-            type=str,
-            help="Token generated from https://huggingface.co/settings/tokens",
-        )
-        login_parser.add_argument(
-            "--add-to-git-credential",
-            action="store_true",
-            help="Optional: Save token to git credential helper.",
-        )
-        login_parser.set_defaults(func=lambda args: AuthLogin(args))
-
-        # Add 'logout' as a subcommand of 'auth'
-        logout_parser = auth_subparsers.add_parser("logout", help="Log out")
-        logout_parser.add_argument(
-            "--token-name",
-            type=str,
-            help="Optional: Name of the access token to log out from.",
-        )
-        logout_parser.set_defaults(func=lambda args: AuthLogout(args))
-
-        # Add 'whoami' as a subcommand of 'auth'
-        whoami_parser = auth_subparsers.add_parser(
-            "whoami", help="Find out which huggingface.co account you are logged in as."
-        )
-        whoami_parser.set_defaults(func=lambda args: AuthWhoami(args))
-
-        # Existing subcommands
-        auth_switch_parser = auth_subparsers.add_parser("switch", help="Switch between access tokens")
-        auth_switch_parser.add_argument(
-            "--token-name",
-            type=str,
-            help="Optional: Name of the access token to switch to.",
-        )
-        auth_switch_parser.add_argument(
-            "--add-to-git-credential",
-            action="store_true",
-            help="Optional: Save token to git credential helper.",
-        )
-        auth_switch_parser.set_defaults(func=lambda args: AuthSwitch(args))
-
-        auth_list_parser = auth_subparsers.add_parser("list", help="List all stored access tokens")
-        auth_list_parser.set_defaults(func=lambda args: AuthList(args))
-
-
-class BaseAuthCommand:
-    def __init__(self, args):
-        self.args = args
-        self._api = HfApi()
-
-
-class AuthLogin(BaseAuthCommand):
-    def run(self):
-        logging.set_verbosity_info()
-        login(
-            token=self.args.token,
-            add_to_git_credential=self.args.add_to_git_credential,
-        )
-
-
-class AuthLogout(BaseAuthCommand):
-    def run(self):
-        logging.set_verbosity_info()
-        logout(token_name=self.args.token_name)
-
-
-class AuthSwitch(BaseAuthCommand):
-    def run(self):
-        logging.set_verbosity_info()
-        token_name = self.args.token_name
-        if token_name is None:
-            token_name = self._select_token_name()
-
-        if token_name is None:
-            print("No token name provided. Aborting.")
-            exit()
-        auth_switch(token_name, add_to_git_credential=self.args.add_to_git_credential)
-
-    def _select_token_name(self) -> Optional[str]:
-        token_names = list(get_stored_tokens().keys())
-
-        if not token_names:
-            logger.error("No stored tokens found. Please login first.")
-            return None
+auth_cli = typer_factory(help="Manage authentication (login, logout, etc.).")
+
+
+@auth_cli.command("login", help="Login using a token from huggingface.co/settings/tokens")
+def auth_login(
+    token: TokenOpt = None,
+    add_to_git_credential: Annotated[
+        bool,
+        typer.Option(
+            help="Save to git credential helper. Useful only if you plan to run git commands directly.",
+        ),
+    ] = False,
+) -> None:
+    login(token=token, add_to_git_credential=add_to_git_credential)
+
+
+@auth_cli.command("logout", help="Logout from a specific token")
+def auth_logout(
+    token_name: Annotated[
+        Optional[str],
+        typer.Option(
+            help="Name of token to logout",
+        ),
+    ] = None,
+) -> None:
+    logout(token_name=token_name)
+
 
-        if _inquirer_py_available:
-            return self._select_token_name_tui(token_names)
-        # if inquirer is not available, use a simpler terminal UI
-        print("Available stored tokens:")
-        for i, token_name in enumerate(token_names, 1):
-            print(f"{i}. {token_name}")
-        while True:
-            try:
-                choice = input("Enter the number of the token to switch to (or 'q' to quit): ")
-                if choice.lower() == "q":
-                    return None
-                index = int(choice) - 1
-                if 0 <= index < len(token_names):
-                    return token_names[index]
-                else:
-                    print("Invalid selection. Please try again.")
-            except ValueError:
-                print("Invalid input. Please enter a number or 'q' to quit.")
-
-    def _select_token_name_tui(self, token_names: List[str]) -> Optional[str]:
+def _select_token_name() -> Optional[str]:
+    token_names = list(get_stored_tokens().keys())
+
+    if not token_names:
+        logger.error("No stored tokens found. Please login first.")
+        return None
+
+    if _inquirer_py_available:
         choices = [Choice(token_name, name=token_name) for token_name in token_names]
         try:
             return inquirer.select(
@@ -184,30 +100,68 @@ def _select_token_name_tui(self, token_names: List[str]) -> Optional[str]:
         except KeyboardInterrupt:
             logger.info("Token selection cancelled.")
             return None
-
-
-class AuthList(BaseAuthCommand):
-    def run(self):
-        logging.set_verbosity_info()
-        auth_list()
-
-
-class AuthWhoami(BaseAuthCommand):
-    def run(self):
-        token = get_token()
-        if token is None:
-            print("Not logged in")
-            exit()
+    # if inquirer is not available, use a simpler terminal UI
+    print("Available stored tokens:")
+    for i, token_name in enumerate(token_names, 1):
+        print(f"{i}. {token_name}")
+    while True:
         try:
-            info = self._api.whoami(token)
-            print(ANSI.bold("user: "), info["name"])
-            orgs = [org["name"] for org in info["orgs"]]
-            if orgs:
-                print(ANSI.bold("orgs: "), ",".join(orgs))
-
-            if ENDPOINT != "https://huggingface.co":
-                print(f"Authenticated through private endpoint: {ENDPOINT}")
-        except HTTPError as e:
-            print(e)
-            print(ANSI.red(e.response.text))
-            exit(1)
+            choice = input("Enter the number of the token to switch to (or 'q' to quit): ")
+            if choice.lower() == "q":
+                return None
+            index = int(choice) - 1
+            if 0 <= index < len(token_names):
+                return token_names[index]
+            else:
+                print("Invalid selection. Please try again.")
+        except ValueError:
+            print("Invalid input. Please enter a number or 'q' to quit.")
+
+
+@auth_cli.command("switch", help="Switch between access tokens")
+def auth_switch_cmd(
+    token_name: Annotated[
+        Optional[str],
+        typer.Option(
+            help="Name of the token to switch to",
+        ),
+    ] = None,
+    add_to_git_credential: Annotated[
+        bool,
+        typer.Option(
+            help="Save to git credential helper. Useful only if you plan to run git commands directly.",
+        ),
+    ] = False,
+) -> None:
+    if token_name is None:
+        token_name = _select_token_name()
+    if token_name is None:
+        print("No token name provided. Aborting.")
+        raise typer.Exit()
+    auth_switch(token_name, add_to_git_credential=add_to_git_credential)
+
+
+@auth_cli.command("list", help="List all stored access tokens")
+def auth_list_cmd() -> None:
+    auth_list()
+
+
+@auth_cli.command("whoami", help="Find out which huggingface.co account you are logged in as.")
+def auth_whoami() -> None:
+    token = get_token()
+    if token is None:
+        print("Not logged in")
+        raise typer.Exit()
+    try:
+        info = whoami(token)
+        print(ANSI.bold("user: "), info["name"])
+        orgs = [org["name"] for org in info["orgs"]]
+        if orgs:
+            print(ANSI.bold("orgs: "), ",".join(orgs))
+
+        if ENDPOINT != "https://huggingface.co":
+            print(f"Authenticated through private endpoint: {ENDPOINT}")
+    except HfHubHTTPError as e:
+        print(e)
+        print(ANSI.red(e.response.text))
+        raise typer.Exit(code=1)
diff --git a/src/huggingface_hub/cli/cache.py b/src/huggingface_hub/cli/cache.py
index cc36ef5efd..35f7540821 100644
--- a/src/huggingface_hub/cli/cache.py
+++ b/src/huggingface_hub/cli/cache.py
@@ -16,14 +16,15 @@
 
 import os
 import time
-from argparse import Namespace, _SubParsersAction
+from enum import Enum
 from functools import wraps
 from tempfile import mkstemp
-from typing import Any, Callable, Iterable, List, Literal, Optional, Union
+from typing import Annotated, Any, Callable, Iterable, Optional, Union
 
-from ..utils import CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir
-from . import BaseHuggingfaceCLICommand
-from ._cli_utils import ANSI, tabulate
+import typer
+
+from ..utils import ANSI, CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir, tabulate
+from ._cli_utils import typer_factory
 
 
 # --- DELETE helpers (from delete_cache.py) ---
@@ -36,10 +37,16 @@
 except ImportError:
     _inquirer_py_available = False
 
-SortingOption_T = Literal["alphabetical", "lastUpdated", "lastUsed", "size"]
 _CANCEL_DELETION_STR = "CANCEL_DELETION"
 
 
+class SortingOption(str, Enum):
+    alphabetical = "alphabetical"
+    lastUpdated = "lastUpdated"
+    lastUsed = "lastUsed"
+    size = "size"
+
+
 def require_inquirer_py(fn: Callable) -> Callable:
     @wraps(fn)
     def _inner(*args, **kwargs):
@@ -54,122 +61,93 @@ def _inner(*args, **kwargs):
     return _inner
 
 
-class CacheCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        cache_parser = parser.add_parser("cache", help="Manage local cache directory.")
-        cache_subparsers = cache_parser.add_subparsers(dest="cache_command", help="Cache subcommands")
+cache_cli = typer_factory(help="Manage local cache directory.")
 
-        # Show help if no subcommand is provided
-        cache_parser.set_defaults(func=lambda args: cache_parser.print_help())
 
-        # Scan subcommand
-        scan_parser = cache_subparsers.add_parser("scan", help="Scan cache directory.")
-        scan_parser.add_argument(
-            "--dir",
-            type=str,
-            default=None,
-            help="cache directory to scan (optional). Default to the default HuggingFace cache.",
-        )
-        scan_parser.add_argument(
+@cache_cli.command("scan", help="Scan the cache directory")
+def cache_scan(
+    dir: Annotated[
+        Optional[str],
+        typer.Option(
+            help="Cache directory to scan (defaults to Hugging Face cache).",
+        ),
+    ] = None,
+    verbose: Annotated[
+        int,
+        typer.Option(
             "-v",
             "--verbose",
-            action="count",
-            default=0,
-            help="show a more verbose output",
-        )
-        scan_parser.set_defaults(func=CacheCommand, cache_command="scan")
-        # Delete subcommand
-        delete_parser = cache_subparsers.add_parser("delete", help="Delete revisions from the cache directory.")
-        delete_parser.add_argument(
-            "--dir",
-            type=str,
-            default=None,
-            help="cache directory (optional). Default to the default HuggingFace cache.",
-        )
-        delete_parser.add_argument(
-            "--disable-tui",
-            action="store_true",
-            help=(
-                "Disable Terminal User Interface (TUI) mode. Useful if your platform/terminal doesn't support the multiselect menu."
-            ),
-        )
-        delete_parser.add_argument(
-            "--sort",
-            nargs="?",
-            choices=["alphabetical", "lastUpdated", "lastUsed", "size"],
-            help=(
-                "Sort repositories by the specified criteria. Options: "
-                "'alphabetical' (A-Z), "
-                "'lastUpdated' (newest first), "
-                "'lastUsed' (most recent first), "
-                "'size' (largest first)."
-            ),
-        )
-        delete_parser.set_defaults(func=CacheCommand, cache_command="delete")
-
-    def __init__(self, args: Namespace) -> None:
-        self.args = args
-        self.verbosity: int = getattr(args, "verbose", 0)
-        self.cache_dir: Optional[str] = getattr(args, "dir", None)
-        self.disable_tui: bool = getattr(args, "disable_tui", False)
-        self.sort_by: Optional[SortingOption_T] = getattr(args, "sort", None)
-        self.cache_command: Optional[str] = getattr(args, "cache_command", None)
-
-    def run(self):
-        if self.cache_command == "scan":
-            self._run_scan()
-        elif self.cache_command == "delete":
-            self._run_delete()
+            count=True,
+            help="Increase verbosity (-v, -vv, -vvv).",
+        ),
+    ] = 0,
+) -> None:
+    try:
+        t0 = time.time()
+        hf_cache_info = scan_cache_dir(dir)
+        t1 = time.time()
+    except CacheNotFound as exc:
+        print(f"Cache directory not found: {str(exc.cache_dir)}")
+        return
+    print(get_table(hf_cache_info, verbosity=verbose))
+    print(
+        f"\nDone in {round(t1 - t0, 1)}s. Scanned {len(hf_cache_info.repos)} repo(s)"
+        f" for a total of {ANSI.red(hf_cache_info.size_on_disk_str)}."
+    )
+    if len(hf_cache_info.warnings) > 0:
+        message = f"Got {len(hf_cache_info.warnings)} warning(s) while scanning."
+        if verbose >= 3:
+            print(ANSI.gray(message))
+            for warning in hf_cache_info.warnings:
+                print(ANSI.gray(str(warning)))
         else:
-            print("Please specify a cache subcommand (scan or delete). Use -h for help.")
-
-    def _run_scan(self):
-        try:
-            t0 = time.time()
-            hf_cache_info = scan_cache_dir(self.cache_dir)
-            t1 = time.time()
-        except CacheNotFound as exc:
-            cache_dir = exc.cache_dir
-            print(f"Cache directory not found: {cache_dir}")
-            return
-        print(get_table(hf_cache_info, verbosity=self.verbosity))
-        print(
-            f"\nDone in {round(t1 - t0, 1)}s. Scanned {len(hf_cache_info.repos)} repo(s)"
-            f" for a total of {ANSI.red(hf_cache_info.size_on_disk_str)}."
-        )
-        if len(hf_cache_info.warnings) > 0:
-            message = f"Got {len(hf_cache_info.warnings)} warning(s) while scanning."
-            if self.verbosity >= 3:
-                print(ANSI.gray(message))
-                for warning in hf_cache_info.warnings:
-                    print(ANSI.gray(str(warning)))
-            else:
-                print(ANSI.gray(message + " Use -vvv to print details."))
-
-    def _run_delete(self):
-        hf_cache_info = scan_cache_dir(self.cache_dir)
-        if self.disable_tui:
-            selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
+            print(ANSI.gray(message + " Use -vvv to print details."))
+
+
+@cache_cli.command("delete", help="Delete revisions from the cache directory")
+def cache_delete(
+    dir: Annotated[
+        Optional[str],
+        typer.Option(
+            help="Cache directory (defaults to Hugging Face cache).",
+        ),
+    ] = None,
+    disable_tui: Annotated[
+        bool,
+        typer.Option(
+            help="Disable Terminal User Interface (TUI) mode. Useful if your platform/terminal doesn't support the multiselect menu.",
+        ),
+    ] = False,
+    sort: Annotated[
+        Optional[SortingOption],
+        typer.Option(
+            help="Sort repositories by the specified criteria. Options: 'alphabetical' (A-Z), 'lastUpdated' (newest first), 'lastUsed' (most recent first), 'size' (largest first).",
+        ),
+    ] = None,
+) -> None:
+    hf_cache_info = scan_cache_dir(dir)
+    sort_by = sort.value if sort is not None else None
+    if disable_tui:
+        selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=sort_by)
+    else:
+        selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=sort_by)
+    if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
+        confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?"
+        if disable_tui:
+            confirmed = _ask_for_confirmation_no_tui(confirm_message)
         else:
-            selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
-        if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
-            confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?"
-            if self.disable_tui:
-                confirmed = _ask_for_confirmation_no_tui(confirm_message)
-            else:
-                confirmed = _ask_for_confirmation_tui(confirm_message)
-            if confirmed:
-                strategy = hf_cache_info.delete_revisions(*selected_hashes)
-                print("Start deletion.")
-                strategy.execute()
-                print(
-                    f"Done. Deleted {len(strategy.repos)} repo(s) and"
-                    f" {len(strategy.snapshots)} revision(s) for a total of"
-                    f" {strategy.expected_freed_size_str}."
-                )
-                return
-        print("Deletion is cancelled. Do nothing.")
+            confirmed = _ask_for_confirmation_tui(confirm_message)
+        if confirmed:
+            strategy = hf_cache_info.delete_revisions(*selected_hashes)
+            print("Start deletion.")
+            strategy.execute()
+            print(
+                f"Done. Deleted {len(strategy.repos)} repo(s) and"
+                f" {len(strategy.snapshots)} revision(s) for a total of"
+                f" {strategy.expected_freed_size_str}."
+            )
+            return
+    print("Deletion is cancelled. Do nothing.")
 
 
 def get_table(hf_cache_info: HFCacheInfo, *, verbosity: int = 0) -> str:
@@ -228,7 +206,7 @@ def get_table(hf_cache_info: HFCacheInfo, *, verbosity: int = 0) -> str:
         )
 
 
-def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_T] = None):
+def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[str] = None):
     if sort_by == "alphabetical":
         return (repo.repo_type, repo.repo_id.lower())
     elif sort_by == "lastUpdated":
@@ -242,9 +220,7 @@ def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_
 
 
 @require_inquirer_py
-def _manual_review_tui(
-    hf_cache_info: HFCacheInfo, preselected: List[str], sort_by: Optional[SortingOption_T] = None
-) -> List[str]:
+def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[str] = None) -> list[str]:
     choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected, sort_by=sort_by)
     checkbox = inquirer.checkbox(
         message="Select revisions to delete:",
@@ -277,9 +253,9 @@ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
 
 
 def _get_tui_choices_from_scan(
-    repos: Iterable[CachedRepoInfo], preselected: List[str], sort_by: Optional[SortingOption_T] = None
-) -> List:
-    choices: List[Union["Choice", "Separator"]] = []
+    repos: Iterable[CachedRepoInfo], preselected: list[str], sort_by: Optional[str] = None
+) -> list:
+    choices: list[Union["Choice", "Separator"]] = []
     choices.append(
         Choice(
             _CANCEL_DELETION_STR, name="None of the following (if selected, nothing will be deleted).", enabled=False
@@ -306,8 +282,8 @@ def _get_tui_choices_from_scan(
 
 
 def _manual_review_no_tui(
-    hf_cache_info: HFCacheInfo, preselected: List[str], sort_by: Optional[SortingOption_T] = None
-) -> List[str]:
+    hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[str] = None
+) -> list[str]:
     fd, tmp_path = mkstemp(suffix=".txt")
     os.close(fd)
     lines = []
@@ -358,14 +334,14 @@ def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
         print(f"Invalid input. Must be one of {ALL}")
 
 
-def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: List[str]) -> str:
+def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: list[str]) -> str:
     if _CANCEL_DELETION_STR in selected_hashes:
         return "Nothing will be deleted."
     strategy = hf_cache_info.delete_revisions(*selected_hashes)
     return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}."
 
 
-def _read_manual_review_tmp_file(tmp_path: str) -> List[str]:
+def _read_manual_review_tmp_file(tmp_path: str) -> list[str]:
     with open(tmp_path) as f:
         content = f.read()
     lines = [line.strip() for line in content.split("\n")]
diff --git a/src/huggingface_hub/cli/download.py b/src/huggingface_hub/cli/download.py
index 3e59233da1..0a80f0caed 100644
--- a/src/huggingface_hub/cli/download.py
+++ b/src/huggingface_hub/cli/download.py
@@ -37,145 +37,150 @@
 """
 
 import warnings
-from argparse import Namespace, _SubParsersAction
-from typing import List, Optional
+from typing import Annotated, Optional, Union
+
+import typer
 
 from huggingface_hub import logging
 from huggingface_hub._snapshot_download import snapshot_download
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.file_download import hf_hub_download
-from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
-
-
-logger = logging.get_logger(__name__)
-
-
-class DownloadCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        download_parser = parser.add_parser("download", help="Download files from the Hub")
-        download_parser.add_argument(
-            "repo_id", type=str, help="ID of the repo to download from (e.g. `username/repo-name`)."
-        )
-        download_parser.add_argument(
-            "filenames", type=str, nargs="*", help="Files to download (e.g. `config.json`, `data/metadata.jsonl`)."
-        )
-        download_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Type of repo to download from (defaults to 'model').",
-        )
-        download_parser.add_argument(
-            "--revision",
-            type=str,
-            help="An optional Git revision id which can be a branch name, a tag, or a commit hash.",
-        )
-        download_parser.add_argument(
-            "--include", nargs="*", type=str, help="Glob patterns to match files to download."
-        )
-        download_parser.add_argument(
-            "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to download."
-        )
-        download_parser.add_argument(
-            "--cache-dir", type=str, help="Path to the directory where to save the downloaded files."
-        )
-        download_parser.add_argument(
-            "--local-dir",
-            type=str,
-            help=(
-                "If set, the downloaded file will be placed under this directory. Check out"
-                " https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more"
-                " details."
-            ),
-        )
-        download_parser.add_argument(
-            "--force-download",
-            action="store_true",
+from huggingface_hub.file_download import DryRunFileInfo, hf_hub_download
+from huggingface_hub.utils import _format_size, disable_progress_bars, enable_progress_bars, tabulate
+
+from ._cli_utils import RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt
+
+
+def download(
+    repo_id: RepoIdArg,
+    filenames: Annotated[
+        Optional[list[str]],
+        typer.Argument(
+            help="Files to download (e.g. `config.json`, `data/metadata.jsonl`).",
+        ),
+    ] = None,
+    repo_type: RepoTypeOpt = RepoTypeOpt.model,
+    revision: RevisionOpt = None,
+    include: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to include from files to download. eg: *.json",
+        ),
+    ] = None,
+    exclude: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to exclude from files to download.",
+        ),
+    ] = None,
+    cache_dir: Annotated[
+        Optional[str],
+        typer.Option(
+            help="Directory where to save files.",
+        ),
+    ] = None,
+    local_dir: Annotated[
+        Optional[str],
+        typer.Option(
+            help="If set, the downloaded file will be placed under this directory. Check out https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more details.",
+        ),
+    ] = None,
+    force_download: Annotated[
+        bool,
+        typer.Option(
             help="If True, the files will be downloaded even if they are already cached.",
-        )
-        download_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        download_parser.add_argument(
-            "--quiet",
-            action="store_true",
+        ),
+    ] = False,
+    dry_run: Annotated[
+        bool,
+        typer.Option(
+            help="If True, perform a dry run without actually downloading the file.",
+        ),
+    ] = False,
+    token: TokenOpt = None,
+    quiet: Annotated[
+        bool,
+        typer.Option(
             help="If True, progress bars are disabled and only the path to the download files is printed.",
-        )
-        download_parser.add_argument(
-            "--max-workers",
-            type=int,
-            default=8,
+        ),
+    ] = False,
+    max_workers: Annotated[
+        int,
+        typer.Option(
             help="Maximum number of workers to use for downloading files. Default is 8.",
-        )
-        download_parser.set_defaults(func=DownloadCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.token = args.token
-        self.repo_id: str = args.repo_id
-        self.filenames: List[str] = args.filenames
-        self.repo_type: str = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.include: Optional[List[str]] = args.include
-        self.exclude: Optional[List[str]] = args.exclude
-        self.cache_dir: Optional[str] = args.cache_dir
-        self.local_dir: Optional[str] = args.local_dir
-        self.force_download: bool = args.force_download
-        self.quiet: bool = args.quiet
-        self.max_workers: int = args.max_workers
-
-    def run(self) -> None:
-        if self.quiet:
-            disable_progress_bars()
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                print(self._download())  # Print path to downloaded files
-            enable_progress_bars()
-        else:
-            logging.set_verbosity_info()
-            print(self._download())  # Print path to downloaded files
-            logging.set_verbosity_warning()
+        ),
+    ] = 8,
+) -> None:
+    """Download files from the Hub."""
 
-    def _download(self) -> str:
+    def run_download() -> Union[str, DryRunFileInfo, list[DryRunFileInfo]]:
+        filenames_list = filenames if filenames is not None else []
         # Warn user if patterns are ignored
-        if len(self.filenames) > 0:
-            if self.include is not None and len(self.include) > 0:
+        if len(filenames_list) > 0:
+            if include is not None and len(include) > 0:
                 warnings.warn("Ignoring `--include` since filenames have being explicitly set.")
-            if self.exclude is not None and len(self.exclude) > 0:
+            if exclude is not None and len(exclude) > 0:
                 warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.")
 
         # Single file to download: use `hf_hub_download`
-        if len(self.filenames) == 1:
+        if len(filenames_list) == 1:
             return hf_hub_download(
-                repo_id=self.repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
-                filename=self.filenames[0],
-                cache_dir=self.cache_dir,
-                force_download=self.force_download,
-                token=self.token,
-                local_dir=self.local_dir,
+                repo_id=repo_id,
+                repo_type=repo_type.value,
+                revision=revision,
+                filename=filenames_list[0],
+                cache_dir=cache_dir,
+                force_download=force_download,
+                token=token,
+                local_dir=local_dir,
                 library_name="hf",
+                dry_run=dry_run,
             )
 
         # Otherwise: use `snapshot_download` to ensure all files comes from same revision
-        elif len(self.filenames) == 0:
-            allow_patterns = self.include
-            ignore_patterns = self.exclude
+        if len(filenames_list) == 0:
+            allow_patterns = include
+            ignore_patterns = exclude
         else:
-            allow_patterns = self.filenames
+            allow_patterns = filenames_list
             ignore_patterns = None
 
         return snapshot_download(
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
-            revision=self.revision,
+            repo_id=repo_id,
+            repo_type=repo_type.value,
+            revision=revision,
             allow_patterns=allow_patterns,
             ignore_patterns=ignore_patterns,
-            force_download=self.force_download,
-            cache_dir=self.cache_dir,
-            token=self.token,
-            local_dir=self.local_dir,
+            force_download=force_download,
+            cache_dir=cache_dir,
+            token=token,
+            local_dir=local_dir,
             library_name="hf",
-            max_workers=self.max_workers,
+            max_workers=max_workers,
+            dry_run=dry_run,
+        )
+
+    def _print_result(result: Union[str, DryRunFileInfo, list[DryRunFileInfo]]) -> None:
+        if isinstance(result, str):
+            print(result)
+            return
+
+        # Print dry run info
+        if isinstance(result, DryRunFileInfo):
+            result = [result]
+        print(
+            f"[dry-run] Will download {len([r for r in result if r.will_download])} files (out of {len(result)}) totalling {_format_size(sum(r.file_size for r in result if r.will_download))}."
         )
+        columns = ["File", "Bytes to download"]
+        items: list[list[Union[str, int]]] = []
+        for info in sorted(result, key=lambda x: x.filename):
+            items.append([info.filename, _format_size(info.file_size) if info.will_download else "-"])
+        print(tabulate(items, headers=columns))
+
+    if quiet:
+        disable_progress_bars()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            _print_result(run_download())
+        enable_progress_bars()
+    else:
+        _print_result(run_download())
+        logging.set_verbosity_warning()
diff --git a/src/huggingface_hub/cli/hf.py b/src/huggingface_hub/cli/hf.py
index 2587918b29..ce9205a6b8 100644
--- a/src/huggingface_hub/cli/hf.py
+++ b/src/huggingface_hub/cli/hf.py
@@ -12,51 +12,51 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from argparse import ArgumentParser
 
-from huggingface_hub.cli.auth import AuthCommands
-from huggingface_hub.cli.cache import CacheCommand
-from huggingface_hub.cli.download import DownloadCommand
-from huggingface_hub.cli.jobs import JobsCommands
-from huggingface_hub.cli.lfs import LfsCommands
-from huggingface_hub.cli.repo import RepoCommands
-from huggingface_hub.cli.repo_files import RepoFilesCommand
-from huggingface_hub.cli.system import EnvironmentCommand, VersionCommand
-from huggingface_hub.cli.upload import UploadCommand
-from huggingface_hub.cli.upload_large_folder import UploadLargeFolderCommand
+from huggingface_hub.cli._cli_utils import check_cli_update, typer_factory
+from huggingface_hub.cli.auth import auth_cli
+from huggingface_hub.cli.cache import cache_cli
+from huggingface_hub.cli.download import download
+from huggingface_hub.cli.inference_endpoints import app as inference_endpoints_cli
+from huggingface_hub.cli.jobs import jobs_cli
+from huggingface_hub.cli.lfs import lfs_enable_largefiles, lfs_multipart_upload
+from huggingface_hub.cli.repo import repo_cli
+from huggingface_hub.cli.repo_files import repo_files_cli
+from huggingface_hub.cli.system import env, version
+
+# from huggingface_hub.cli.jobs import jobs_app
+from huggingface_hub.cli.upload import upload
+from huggingface_hub.cli.upload_large_folder import upload_large_folder
+from huggingface_hub.utils import logging
+
+
+app = typer_factory(help="Hugging Face Hub CLI")
+
+
+# top level single commands (defined in their respective files)
+app.command(help="Download files from the Hub.")(download)
+app.command(help="Upload a file or a folder to the Hub.")(upload)
+app.command(help="Upload a large folder to the Hub. Recommended for resumable uploads.")(upload_large_folder)
+app.command(name="env", help="Print information about the environment.")(env)
+app.command(help="Print information about the hf version.")(version)
+app.command(help="Configure your repository to enable upload of files > 5GB.", hidden=True)(lfs_enable_largefiles)
+app.command(help="Upload large files to the Hub.", hidden=True)(lfs_multipart_upload)
+
+
+# command groups
+app.add_typer(auth_cli, name="auth")
+app.add_typer(cache_cli, name="cache")
+app.add_typer(repo_cli, name="repo")
+app.add_typer(repo_files_cli, name="repo-files")
+app.add_typer(jobs_cli, name="jobs")
+app.add_typer(inference_endpoints_cli, name="endpoints")
+app.add_typer(inference_endpoints_cli, name="inference-endpoints", hidden=True)
 
 
 def main():
-    parser = ArgumentParser("hf", usage="hf <command> [<args>]")
-    commands_parser = parser.add_subparsers(help="hf command helpers")
-
-    # Register commands
-    AuthCommands.register_subcommand(commands_parser)
-    CacheCommand.register_subcommand(commands_parser)
-    DownloadCommand.register_subcommand(commands_parser)
-    JobsCommands.register_subcommand(commands_parser)
-    RepoCommands.register_subcommand(commands_parser)
-    RepoFilesCommand.register_subcommand(commands_parser)
-    UploadCommand.register_subcommand(commands_parser)
-    UploadLargeFolderCommand.register_subcommand(commands_parser)
-
-    # System commands
-    EnvironmentCommand.register_subcommand(commands_parser)
-    VersionCommand.register_subcommand(commands_parser)
-
-    # LFS commands (hidden in --help)
-    LfsCommands.register_subcommand(commands_parser)
-
-    # Let's go
-    args = parser.parse_args()
-    if not hasattr(args, "func"):
-        parser.print_help()
-        exit(1)
-
-    # Run
-    service = args.func(args)
-    if service is not None:
-        service.run()
+    logging.set_verbosity_info()
+    check_cli_update()
+    app()
 
 
 if __name__ == "__main__":
diff --git a/src/huggingface_hub/cli/inference_endpoints.py b/src/huggingface_hub/cli/inference_endpoints.py
new file mode 100644
index 0000000000..f0d2f17b7a
--- /dev/null
+++ b/src/huggingface_hub/cli/inference_endpoints.py
@@ -0,0 +1,375 @@
+"""CLI commands for Hugging Face Inference Endpoints."""
+
+import json
+from typing import Annotated, Optional
+
+import typer
+
+from huggingface_hub._inference_endpoints import InferenceEndpoint
+from huggingface_hub.errors import HfHubHTTPError
+
+from ._cli_utils import TokenOpt, get_hf_api, typer_factory
+
+
+app = typer_factory(help="Manage Hugging Face Inference Endpoints.")
+
+catalog_app = typer_factory(help="Interact with the Inference Endpoints catalog.")
+
+NameArg = Annotated[
+    str,
+    typer.Argument(help="Endpoint name."),
+]
+
+NamespaceOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="The namespace associated with the Inference Endpoint. Defaults to the current user's namespace.",
+    ),
+]
+
+
+def _print_endpoint(endpoint: InferenceEndpoint) -> None:
+    typer.echo(json.dumps(endpoint.raw, indent=2, sort_keys=True))
+
+
+@app.command()
+def ls(
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    """Lists all Inference Endpoints for the given namespace."""
+    api = get_hf_api(token=token)
+    try:
+        endpoints = api.list_inference_endpoints(namespace=namespace, token=token)
+    except HfHubHTTPError as error:
+        typer.echo(f"Listing failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    typer.echo(
+        json.dumps(
+            {"items": [endpoint.raw for endpoint in endpoints]},
+            indent=2,
+            sort_keys=True,
+        )
+    )
+
+
+@app.command(name="deploy", help="Deploy an Inference Endpoint from a Hub repository.")
+def deploy(
+    name: NameArg,
+    repo: Annotated[
+        str,
+        typer.Option(
+            help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
+        ),
+    ],
+    framework: Annotated[
+        str,
+        typer.Option(
+            help="The machine learning framework used for the model (e.g. 'vllm').",
+        ),
+    ],
+    accelerator: Annotated[
+        str,
+        typer.Option(
+            help="The hardware accelerator to be used for inference (e.g. 'cpu').",
+        ),
+    ],
+    instance_size: Annotated[
+        str,
+        typer.Option(
+            help="The size or type of the instance to be used for hosting the model (e.g. 'x4').",
+        ),
+    ],
+    instance_type: Annotated[
+        str,
+        typer.Option(
+            help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').",
+        ),
+    ],
+    region: Annotated[
+        str,
+        typer.Option(
+            help="The cloud region in which the Inference Endpoint will be created (e.g. 'us-east-1').",
+        ),
+    ],
+    vendor: Annotated[
+        str,
+        typer.Option(
+            help="The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. 'aws').",
+        ),
+    ],
+    *,
+    namespace: NamespaceOpt = None,
+    task: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The task on which to deploy the model (e.g. 'text-classification').",
+        ),
+    ] = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    try:
+        endpoint = api.create_inference_endpoint(
+            name=name,
+            repository=repo,
+            framework=framework,
+            accelerator=accelerator,
+            instance_size=instance_size,
+            instance_type=instance_type,
+            region=region,
+            vendor=vendor,
+            namespace=namespace,
+            task=task,
+            token=token,
+        )
+    except HfHubHTTPError as error:
+        typer.echo(f"Deployment failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    _print_endpoint(endpoint)
+
+
+@catalog_app.command(name="deploy", help="Deploy an Inference Endpoint from the Model Catalog.")
+def deploy_from_catalog(
+    name: NameArg,
+    repo: Annotated[
+        str,
+        typer.Option(
+            help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
+        ),
+    ],
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    try:
+        endpoint = api.create_inference_endpoint_from_catalog(
+            repo_id=repo,
+            name=name,
+            namespace=namespace,
+            token=token,
+        )
+    except HfHubHTTPError as error:
+        typer.echo(f"Deployment failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    _print_endpoint(endpoint)
+
+
+def list_catalog(
+    token: TokenOpt = None,
+) -> None:
+    """List available Catalog models."""
+    api = get_hf_api(token=token)
+    try:
+        models = api.list_inference_catalog(token=token)
+    except HfHubHTTPError as error:
+        typer.echo(f"Catalog fetch failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    typer.echo(json.dumps({"models": models}, indent=2, sort_keys=True))
+
+
+catalog_app.command(name="ls")(list_catalog)
+app.command(name="list-catalog", help="List available Catalog models.", hidden=True)(list_catalog)
+
+
+app.add_typer(catalog_app, name="catalog")
+
+
+@app.command()
+def describe(
+    name: NameArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    """Get information about an existing endpoint."""
+    api = get_hf_api(token=token)
+    try:
+        endpoint = api.get_inference_endpoint(name=name, namespace=namespace, token=token)
+    except HfHubHTTPError as error:
+        typer.echo(f"Fetch failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    _print_endpoint(endpoint)
+
+
+@app.command()
+def update(
+    name: NameArg,
+    namespace: NamespaceOpt = None,
+    repo: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
+        ),
+    ] = None,
+    accelerator: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The hardware accelerator to be used for inference (e.g. 'cpu').",
+        ),
+    ] = None,
+    instance_size: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The size or type of the instance to be used for hosting the model (e.g. 'x4').",
+        ),
+    ] = None,
+    instance_type: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').",
+        ),
+    ] = None,
+    framework: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The machine learning framework used for the model (e.g. 'custom').",
+        ),
+    ] = None,
+    revision: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The specific model revision to deploy on the Inference Endpoint (e.g. '6c0e6080953db56375760c0471a8c5f2929baf11').",
+        ),
+    ] = None,
+    task: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The task on which to deploy the model (e.g. 'text-classification').",
+        ),
+    ] = None,
+    min_replica: Annotated[
+        Optional[int],
+        typer.Option(
+            help="The minimum number of replicas (instances) to keep running for the Inference Endpoint.",
+        ),
+    ] = None,
+    max_replica: Annotated[
+        Optional[int],
+        typer.Option(
+            help="The maximum number of replicas (instances) to scale to for the Inference Endpoint.",
+        ),
+    ] = None,
+    scale_to_zero_timeout: Annotated[
+        Optional[int],
+        typer.Option(
+            help="The duration in minutes before an inactive endpoint is scaled to zero.",
+        ),
+    ] = None,
+    token: TokenOpt = None,
+) -> None:
+    """Update an existing endpoint."""
+    api = get_hf_api(token=token)
+    try:
+        endpoint = api.update_inference_endpoint(
+            name=name,
+            namespace=namespace,
+            repository=repo,
+            framework=framework,
+            revision=revision,
+            task=task,
+            accelerator=accelerator,
+            instance_size=instance_size,
+            instance_type=instance_type,
+            min_replica=min_replica,
+            max_replica=max_replica,
+            scale_to_zero_timeout=scale_to_zero_timeout,
+            token=token,
+        )
+    except HfHubHTTPError as error:
+        typer.echo(f"Update failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+    _print_endpoint(endpoint)
+
+
+@app.command()
+def delete(
+    name: NameArg,
+    namespace: NamespaceOpt = None,
+    yes: Annotated[
+        bool,
+        typer.Option("--yes", help="Skip confirmation prompts."),
+    ] = False,
+    token: TokenOpt = None,
+) -> None:
+    """Delete an Inference Endpoint permanently."""
+    if not yes:
+        confirmation = typer.prompt(f"Delete endpoint '{name}'? Type the name to confirm.")
+        if confirmation != name:
+            typer.echo("Aborted.")
+            raise typer.Exit(code=2)
+
+    api = get_hf_api(token=token)
+    try:
+        api.delete_inference_endpoint(name=name, namespace=namespace, token=token)
+    except HfHubHTTPError as error:
+        typer.echo(f"Delete failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    typer.echo(f"Deleted '{name}'.")
+
+
+@app.command()
+def pause(
+    name: NameArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    """Pause an Inference Endpoint."""
+    api = get_hf_api(token=token)
+    try:
+        endpoint = api.pause_inference_endpoint(name=name, namespace=namespace, token=token)
+    except HfHubHTTPError as error:
+        typer.echo(f"Pause failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    _print_endpoint(endpoint)
+
+
+@app.command()
+def resume(
+    name: NameArg,
+    namespace: NamespaceOpt = None,
+    fail_if_already_running: Annotated[
+        bool,
+        typer.Option(
+            "--fail-if-already-running",
+            help="If `True`, the method will raise an error if the Inference Endpoint is already running.",
+        ),
+    ] = False,
+    token: TokenOpt = None,
+) -> None:
+    """Resume an Inference Endpoint."""
+    api = get_hf_api(token=token)
+    try:
+        endpoint = api.resume_inference_endpoint(
+            name=name,
+            namespace=namespace,
+            token=token,
+            running_ok=not fail_if_already_running,
+        )
+    except HfHubHTTPError as error:
+        typer.echo(f"Resume failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+    _print_endpoint(endpoint)
+
+
+@app.command()
+def scale_to_zero(
+    name: NameArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    """Scale an Inference Endpoint to zero."""
+    api = get_hf_api(token=token)
+    try:
+        endpoint = api.scale_to_zero_inference_endpoint(name=name, namespace=namespace, token=token)
+    except HfHubHTTPError as error:
+        typer.echo(f"Scale To Zero failed: {error}")
+        raise typer.Exit(code=error.response.status_code) from error
+
+    _print_endpoint(endpoint)
diff --git a/src/huggingface_hub/cli/jobs.py b/src/huggingface_hub/cli/jobs.py
index 3a661c7df7..07363a88a6 100644
--- a/src/huggingface_hub/cli/jobs.py
+++ b/src/huggingface_hub/cli/jobs.py
@@ -28,1073 +28,742 @@
 
     # Cancel a running job
     hf jobs cancel <job-id>
+
+    # Run a UV script
+    hf jobs uv run <script>
+
+    # Schedule a job
+    hf jobs scheduled run <schedule> <image> <command>
+
+    # List scheduled jobs
+    hf jobs scheduled ps [-a] [-f key=value] [--format TEMPLATE]
+
+    # Inspect a scheduled job
+    hf jobs scheduled inspect <scheduled_job_id>
+
+    # Suspend a scheduled job
+    hf jobs scheduled suspend <scheduled_job_id>
+
+    # Resume a scheduled job
+    hf jobs scheduled resume <scheduled_job_id>
+
+    # Delete a scheduled job
+    hf jobs scheduled delete <scheduled_job_id>
+
 """
 
 import json
 import os
 import re
-from argparse import Namespace, _SubParsersAction
 from dataclasses import asdict
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Annotated, Dict, Optional, Union
 
-import requests
+import typer
 
-from huggingface_hub import HfApi, SpaceHardware, get_token
-from huggingface_hub.utils import logging
+from huggingface_hub import SpaceHardware, get_token
+from huggingface_hub.errors import HfHubHTTPError
 from huggingface_hub.utils._dotenv import load_dotenv
 
-from . import BaseHuggingfaceCLICommand
-
+from ._cli_utils import TokenOpt, get_hf_api, typer_factory
 
-logger = logging.get_logger(__name__)
 
 SUGGESTED_FLAVORS = [item.value for item in SpaceHardware if item.value != "zero-a10g"]
 
-
-class JobsCommands(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        jobs_parser = parser.add_parser("jobs", help="Run and manage Jobs on the Hub.")
-        jobs_subparsers = jobs_parser.add_subparsers(help="huggingface.co jobs related commands")
-
-        # Show help if no subcommand is provided
-        jobs_parser.set_defaults(func=lambda args: jobs_parser.print_help())
-
-        # Register commands
-        InspectCommand.register_subcommand(jobs_subparsers)
-        LogsCommand.register_subcommand(jobs_subparsers)
-        PsCommand.register_subcommand(jobs_subparsers)
-        RunCommand.register_subcommand(jobs_subparsers)
-        CancelCommand.register_subcommand(jobs_subparsers)
-        UvCommand.register_subcommand(jobs_subparsers)
-        ScheduledJobsCommands.register_subcommand(jobs_subparsers)
-
-
-class RunCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("run", help="Run a Job")
-        run_parser.add_argument("image", type=str, help="The Docker image to use.")
-        run_parser.add_argument("-e", "--env", action="append", help="Set environment variables. E.g. --env ENV=value")
-        run_parser.add_argument(
-            "-s",
-            "--secrets",
-            action="append",
-            help=(
-                "Set secret environment variables. E.g. --secrets SECRET=value "
-                "or `--secrets HF_TOKEN` to pass your Hugging Face token."
-            ),
-        )
-        run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.")
-        run_parser.add_argument("--secrets-file", type=str, help="Read in a file of secret environment variables.")
-        run_parser.add_argument(
-            "--flavor",
-            type=str,
-            help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.",
-        )
-        run_parser.add_argument(
-            "--timeout",
-            type=str,
-            help="Max duration: int/float with s (seconds, default), m (minutes), h (hours) or d (days).",
-        )
-        run_parser.add_argument(
-            "-d",
-            "--detach",
-            action="store_true",
-            help="Run the Job in the background and print the Job ID.",
-        )
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the Job will be created. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token",
-            type=str,
-            help="A User Access Token generated from https://huggingface.co/settings/tokens",
-        )
-        run_parser.add_argument("command", nargs="...", help="The command to run.")
-        run_parser.set_defaults(func=RunCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.image: str = args.image
-        self.command: List[str] = args.command
-        self.env: dict[str, Optional[str]] = {}
-        if args.env_file:
-            self.env.update(load_dotenv(Path(args.env_file).read_text(), environ=os.environ.copy()))
-        for env_value in args.env or []:
-            self.env.update(load_dotenv(env_value, environ=os.environ.copy()))
-        self.secrets: dict[str, Optional[str]] = {}
-        extended_environ = _get_extended_environ()
-        if args.secrets_file:
-            self.secrets.update(load_dotenv(Path(args.secrets_file).read_text(), environ=extended_environ))
-        for secret in args.secrets or []:
-            self.secrets.update(load_dotenv(secret, environ=extended_environ))
-        self.flavor: Optional[SpaceHardware] = args.flavor
-        self.timeout: Optional[str] = args.timeout
-        self.detach: bool = args.detach
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        job = api.run_job(
-            image=self.image,
-            command=self.command,
-            env=self.env,
-            secrets=self.secrets,
-            flavor=self.flavor,
-            timeout=self.timeout,
-            namespace=self.namespace,
-        )
-        # Always print the job ID to the user
-        print(f"Job started with ID: {job.id}")
-        print(f"View at: {job.url}")
-
-        if self.detach:
-            return
-
-        # Now let's stream the logs
-        for log in api.fetch_job_logs(job_id=job.id):
-            print(log)
-
-
-class LogsCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("logs", help="Fetch the logs of a Job")
-        run_parser.add_argument("job_id", type=str, help="Job ID")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the job is running. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        run_parser.set_defaults(func=LogsCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.job_id: str = args.job_id
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        for log in api.fetch_job_logs(job_id=self.job_id, namespace=self.namespace):
-            print(log)
-
-
-def _tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
-    """
-    Inspired by:
-
-    - stackoverflow.com/a/8356620/593036
-    - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
-    """
-    col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
-    terminal_width = max(os.get_terminal_size().columns, len(headers) * 12)
-    while len(headers) + sum(col_widths) > terminal_width:
-        col_to_minimize = col_widths.index(max(col_widths))
-        col_widths[col_to_minimize] //= 2
-        if len(headers) + sum(col_widths) <= terminal_width:
-            col_widths[col_to_minimize] = terminal_width - sum(col_widths) - len(headers) + col_widths[col_to_minimize]
-    row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
-    lines = []
-    lines.append(row_format.format(*headers))
-    lines.append(row_format.format(*["-" * w for w in col_widths]))
-    for row in rows:
-        row_format_args = [
-            str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x)
-            for x, col_width in zip(row, col_widths)
-        ]
-        lines.append(row_format.format(*row_format_args))
-    return "\n".join(lines)
-
-
-class PsCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("ps", help="List Jobs")
-        run_parser.add_argument(
+# Common job-related options
+ImageArg = Annotated[
+    str,
+    typer.Argument(
+        help="The Docker image to use.",
+    ),
+]
+
+ImageOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="Use a custom Docker image with `uv` installed.",
+    ),
+]
+
+FlavorOpt = Annotated[
+    Optional[SpaceHardware],
+    typer.Option(
+        help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.",
+    ),
+]
+
+EnvOpt = Annotated[
+    Optional[list[str]],
+    typer.Option(
+        "-e",
+        "--env",
+        help="Set environment variables. E.g. --env ENV=value",
+    ),
+]
+
+SecretsOpt = Annotated[
+    Optional[list[str]],
+    typer.Option(
+        "-s",
+        "--secrets",
+        help="Set secret environment variables. E.g. --secrets SECRET=value or `--secrets HF_TOKEN` to pass your Hugging Face token.",
+    ),
+]
+
+EnvFileOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        "--env-file",
+        help="Read in a file of environment variables.",
+    ),
+]
+
+SecretsFileOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="Read in a file of secret environment variables.",
+    ),
+]
+
+TimeoutOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="Max duration: int/float with s (seconds, default), m (minutes), h (hours) or d (days).",
+    ),
+]
+
+DetachOpt = Annotated[
+    bool,
+    typer.Option(
+        "-d",
+        "--detach",
+        help="Run the Job in the background and print the Job ID.",
+    ),
+]
+
+NamespaceOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="The namespace where the job will be running. Defaults to the current user's namespace.",
+    ),
+]
+
+WithOpt = Annotated[
+    Optional[list[str]],
+    typer.Option(
+        "--with",
+        help="Run with the given packages installed",
+    ),
+]
+
+PythonOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        "-p",
+        "--python",
+        help="The Python interpreter to use for the run environment",
+    ),
+]
+
+SuspendOpt = Annotated[
+    Optional[bool],
+    typer.Option(
+        help="Suspend (pause) the scheduled Job",
+    ),
+]
+
+ConcurrencyOpt = Annotated[
+    Optional[bool],
+    typer.Option(
+        help="Allow multiple instances of this Job to run concurrently",
+    ),
+]
+
+ScheduleArg = Annotated[
+    str,
+    typer.Argument(
+        help="One of annually, yearly, monthly, weekly, daily, hourly, or a CRON schedule expression.",
+    ),
+]
+
+ScriptArg = Annotated[
+    str,
+    typer.Argument(
+        help="UV script to run (local file or URL)",
+    ),
+]
+
+ScriptArgsArg = Annotated[
+    Optional[list[str]],
+    typer.Argument(
+        help="Arguments for the script",
+    ),
+]
+
+CommandArg = Annotated[
+    list[str],
+    typer.Argument(
+        help="The command to run.",
+    ),
+]
+
+JobIdArg = Annotated[
+    str,
+    typer.Argument(
+        help="Job ID",
+    ),
+]
+
+ScheduledJobIdArg = Annotated[
+    str,
+    typer.Argument(
+        help="Scheduled Job ID",
+    ),
+]
+
+RepoOpt = Annotated[
+    Optional[str],
+    typer.Option(
+        help="Repository name for the script (creates ephemeral if not specified)",
+    ),
+]
+
+
+jobs_cli = typer_factory(help="Run and manage Jobs on the Hub.")
+
+
+@jobs_cli.command("run", help="Run a Job")
+def jobs_run(
+    image: ImageArg,
+    command: CommandArg,
+    env: EnvOpt = None,
+    secrets: SecretsOpt = None,
+    env_file: EnvFileOpt = None,
+    secrets_file: SecretsFileOpt = None,
+    flavor: FlavorOpt = None,
+    timeout: TimeoutOpt = None,
+    detach: DetachOpt = False,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    env_map: dict[str, Optional[str]] = {}
+    if env_file:
+        env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
+    for env_value in env or []:
+        env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
+
+    secrets_map: dict[str, Optional[str]] = {}
+    extended_environ = _get_extended_environ()
+    if secrets_file:
+        secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
+    for secret in secrets or []:
+        secrets_map.update(load_dotenv(secret, environ=extended_environ))
+
+    api = get_hf_api(token=token)
+    job = api.run_job(
+        image=image,
+        command=command,
+        env=env_map,
+        secrets=secrets_map,
+        flavor=flavor,
+        timeout=timeout,
+        namespace=namespace,
+    )
+    # Always print the job ID to the user
+    print(f"Job started with ID: {job.id}")
+    print(f"View at: {job.url}")
+
+    if detach:
+        return
+    # Now let's stream the logs
+    for log in api.fetch_job_logs(job_id=job.id):
+        print(log)
+
+
+@jobs_cli.command("logs", help="Fetch the logs of a Job")
+def jobs_logs(
+    job_id: JobIdArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    for log in api.fetch_job_logs(job_id=job_id, namespace=namespace):
+        print(log)
+
+
+def _matches_filters(job_properties: dict[str, str], filters: dict[str, str]) -> bool:
+    """Check if scheduled job matches all specified filters."""
+    for key, pattern in filters.items():
+        # Check if property exists
+        if key not in job_properties:
+            return False
+        # Support pattern matching with wildcards
+        if "*" in pattern or "?" in pattern:
+            # Convert glob pattern to regex
+            regex_pattern = pattern.replace("*", ".*").replace("?", ".")
+            if not re.search(f"^{regex_pattern}$", job_properties[key], re.IGNORECASE):
+                return False
+        # Simple substring matching
+        elif pattern.lower() not in job_properties[key].lower():
+            return False
+    return True
+
+
+def _print_output(rows: list[list[Union[str, int]]], headers: list[str], fmt: Optional[str]) -> None:
+    """Print output according to the chosen format."""
+    if fmt:
+        # Use custom template if provided
+        template = fmt
+        for row in rows:
+            line = template
+            for i, field in enumerate(["id", "image", "command", "created", "status"]):
+                placeholder = f"{{{{.{field}}}}}"
+                if placeholder in line:
+                    line = line.replace(placeholder, str(row[i]))
+            print(line)
+    else:
+        # Default tabular format
+        print(_tabulate(rows, headers=headers))
+
+
+@jobs_cli.command("ps", help="List Jobs")
+def jobs_ps(
+    all: Annotated[
+        bool,
+        typer.Option(
             "-a",
             "--all",
-            action="store_true",
             help="Show all Jobs (default shows just running)",
-        )
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace from where it lists the jobs. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token",
-            type=str,
-            help="A User Access Token generated from https://huggingface.co/settings/tokens",
-        )
-        # Add Docker-style filtering argument
-        run_parser.add_argument(
+        ),
+    ] = False,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+    filter: Annotated[
+        Optional[list[str]],
+        typer.Option(
             "-f",
             "--filter",
-            action="append",
-            default=[],
             help="Filter output based on conditions provided (format: key=value)",
-        )
-        # Add option to format output
-        run_parser.add_argument(
-            "--format",
-            type=str,
+        ),
+    ] = None,
+    format: Annotated[
+        Optional[str],
+        typer.Option(
             help="Format output using a custom template",
-        )
-        run_parser.set_defaults(func=PsCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.all: bool = args.all
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-        self.format: Optional[str] = args.format
-        self.filters: Dict[str, str] = {}
-
-        # Parse filter arguments (key=value pairs)
-        for f in args.filter:
+        ),
+    ] = None,
+) -> None:
+    try:
+        api = get_hf_api(token=token)
+        # Fetch jobs data
+        jobs = api.list_jobs(namespace=namespace)
+        # Define table headers
+        table_headers = ["JOB ID", "IMAGE/SPACE", "COMMAND", "CREATED", "STATUS"]
+        rows: list[list[Union[str, int]]] = []
+
+        filters: dict[str, str] = {}
+        for f in filter or []:
             if "=" in f:
                 key, value = f.split("=", 1)
-                self.filters[key.lower()] = value
+                filters[key.lower()] = value
             else:
                 print(f"Warning: Ignoring invalid filter format '{f}'. Use key=value format.")
+        # Process jobs data
+        for job in jobs:
+            # Extract job data for filtering
+            status = job.status.stage if job.status else "UNKNOWN"
+            if not all and status not in ("RUNNING", "UPDATING"):
+                # Skip job if not all jobs should be shown and status doesn't match criteria
+                continue
+            # Extract job data for output
+            job_id = job.id
 
-    def run(self) -> None:
-        """
-        Fetch and display job information for the current user.
-        Uses Docker-style filtering with -f/--filter flag and key=value pairs.
-        """
-        try:
-            api = HfApi(token=self.token)
-
-            # Fetch jobs data
-            jobs = api.list_jobs(namespace=self.namespace)
-
-            # Define table headers
-            table_headers = ["JOB ID", "IMAGE/SPACE", "COMMAND", "CREATED", "STATUS"]
+            # Extract image or space information
+            image_or_space = job.docker_image or "N/A"
 
-            # Process jobs data
-            rows = []
+            # Extract and format command
+            cmd = job.command or []
+            command_str = " ".join(cmd) if cmd else "N/A"
 
-            for job in jobs:
-                # Extract job data for filtering
-                status = job.status.stage if job.status else "UNKNOWN"
+            # Extract creation time
+            created_at = job.created_at.strftime("%Y-%m-%d %H:%M:%S") if job.created_at else "N/A"
 
-                # Skip job if not all jobs should be shown and status doesn't match criteria
-                if not self.all and status not in ("RUNNING", "UPDATING"):
-                    continue
-
-                # Extract job ID
-                job_id = job.id
-
-                # Extract image or space information
-                image_or_space = job.docker_image or "N/A"
-
-                # Extract and format command
-                command = job.command or []
-                command_str = " ".join(command) if command else "N/A"
-
-                # Extract creation time
-                created_at = job.created_at.strftime("%Y-%m-%d %H:%M:%S") if job.created_at else "N/A"
-
-                # Create a dict with all job properties for filtering
-                job_properties = {
-                    "id": job_id,
-                    "image": image_or_space,
-                    "status": status.lower(),
-                    "command": command_str,
-                }
-
-                # Check if job matches all filters
-                if not self._matches_filters(job_properties):
-                    continue
-
-                # Create row
-                rows.append([job_id, image_or_space, command_str, created_at, status])
-
-            # Handle empty results
-            if not rows:
-                filters_msg = ""
-                if self.filters:
-                    filters_msg = f" matching filters: {', '.join([f'{k}={v}' for k, v in self.filters.items()])}"
-
-                print(f"No jobs found{filters_msg}")
-                return
-
-            # Apply custom format if provided or use default tabular format
-            self._print_output(rows, table_headers)
-
-        except requests.RequestException as e:
-            print(f"Error fetching jobs data: {e}")
-        except (KeyError, ValueError, TypeError) as e:
-            print(f"Error processing jobs data: {e}")
-        except Exception as e:
-            print(f"Unexpected error - {type(e).__name__}: {e}")
-
-    def _matches_filters(self, job_properties: Dict[str, str]) -> bool:
-        """Check if job matches all specified filters."""
-        for key, pattern in self.filters.items():
-            # Check if property exists
-            if key not in job_properties:
-                return False
+            # Create a dict with all job properties for filtering
+            props = {"id": job_id, "image": image_or_space, "status": status.lower(), "command": command_str}
+            if not _matches_filters(props, filters):
+                continue
 
-            # Support pattern matching with wildcards
-            if "*" in pattern or "?" in pattern:
-                # Convert glob pattern to regex
-                regex_pattern = pattern.replace("*", ".*").replace("?", ".")
-                if not re.search(f"^{regex_pattern}$", job_properties[key], re.IGNORECASE):
-                    return False
-            # Simple substring matching
-            elif pattern.lower() not in job_properties[key].lower():
-                return False
+            # Create row
+            rows.append([job_id, image_or_space, command_str, created_at, status])
 
-        return True
-
-    def _print_output(self, rows, headers):
-        """Print output according to the chosen format."""
-        if self.format:
-            # Custom template formatting (simplified)
-            template = self.format
-            for row in rows:
-                line = template
-                for i, field in enumerate(["id", "image", "command", "created", "status"]):
-                    placeholder = f"{{{{.{field}}}}}"
-                    if placeholder in line:
-                        line = line.replace(placeholder, str(row[i]))
-                print(line)
-        else:
-            # Default tabular format
-            print(
-                _tabulate(
-                    rows,
-                    headers=headers,
-                )
+        # Handle empty results
+        if not rows:
+            filters_msg = (
+                f" matching filters: {', '.join([f'{k}={v}' for k, v in filters.items()])}" if filters else ""
             )
-
-
-class InspectCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("inspect", help="Display detailed information on one or more Jobs")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the job is running. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        run_parser.add_argument("job_ids", nargs="...", help="The jobs to inspect")
-        run_parser.set_defaults(func=InspectCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-        self.job_ids: List[str] = args.job_ids
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        jobs = [api.inspect_job(job_id=job_id, namespace=self.namespace) for job_id in self.job_ids]
-        print(json.dumps([asdict(job) for job in jobs], indent=4, default=str))
-
-
-class CancelCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("cancel", help="Cancel a Job")
-        run_parser.add_argument("job_id", type=str, help="Job ID")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the job is running. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        run_parser.set_defaults(func=CancelCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.job_id: str = args.job_id
-        self.namespace = args.namespace
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        api.cancel_job(job_id=self.job_id, namespace=self.namespace)
-
-
-class UvCommand(BaseHuggingfaceCLICommand):
-    """Run UV scripts on Hugging Face infrastructure."""
-
-    @staticmethod
-    def register_subcommand(parser):
-        """Register UV run subcommand."""
-        uv_parser = parser.add_parser(
-            "uv",
-            help="Run UV scripts (Python with inline dependencies) on HF infrastructure",
-        )
-
-        subparsers = uv_parser.add_subparsers(dest="uv_command", help="UV commands", required=True)
-
-        # Run command only
-        run_parser = subparsers.add_parser(
-            "run",
-            help="Run a UV script (local file or URL) on HF infrastructure",
-        )
-        run_parser.add_argument("script", help="UV script to run (local file or URL)")
-        run_parser.add_argument("script_args", nargs="...", help="Arguments for the script", default=[])
-        run_parser.add_argument("--image", type=str, help="Use a custom Docker image with `uv` installed.")
-        run_parser.add_argument(
-            "--repo",
-            help="Repository name for the script (creates ephemeral if not specified)",
-        )
-        run_parser.add_argument(
-            "--flavor",
-            type=str,
-            help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.",
-        )
-        run_parser.add_argument("-e", "--env", action="append", help="Environment variables")
-        run_parser.add_argument(
-            "-s",
-            "--secrets",
-            action="append",
-            help=(
-                "Set secret environment variables. E.g. --secrets SECRET=value "
-                "or `--secrets HF_TOKEN` to pass your Hugging Face token."
-            ),
-        )
-        run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.")
-        run_parser.add_argument(
-            "--secrets-file",
-            type=str,
-            help="Read in a file of secret environment variables.",
-        )
-        run_parser.add_argument("--timeout", type=str, help="Max duration (e.g., 30s, 5m, 1h)")
-        run_parser.add_argument("-d", "--detach", action="store_true", help="Run in background")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the Job will be created. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument("--token", type=str, help="HF token")
-        # UV options
-        run_parser.add_argument("--with", action="append", help="Run with the given packages installed", dest="with_")
-        run_parser.add_argument(
-            "-p", "--python", type=str, help="The Python interpreter to use for the run environment"
-        )
-        run_parser.set_defaults(func=UvCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        """Initialize the command with parsed arguments."""
-        self.script = args.script
-        self.script_args = args.script_args
-        self.dependencies = args.with_
-        self.python = args.python
-        self.image = args.image
-        self.env: dict[str, Optional[str]] = {}
-        if args.env_file:
-            self.env.update(load_dotenv(Path(args.env_file).read_text(), environ=os.environ.copy()))
-        for env_value in args.env or []:
-            self.env.update(load_dotenv(env_value, environ=os.environ.copy()))
-        self.secrets: dict[str, Optional[str]] = {}
-        extended_environ = _get_extended_environ()
-        if args.secrets_file:
-            self.secrets.update(load_dotenv(Path(args.secrets_file).read_text(), environ=extended_environ))
-        for secret in args.secrets or []:
-            self.secrets.update(load_dotenv(secret, environ=extended_environ))
-        self.flavor: Optional[SpaceHardware] = args.flavor
-        self.timeout: Optional[str] = args.timeout
-        self.detach: bool = args.detach
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-        self._repo = args.repo
-
-    def run(self) -> None:
-        """Execute UV command."""
-        logging.set_verbosity(logging.INFO)
-        api = HfApi(token=self.token)
-        job = api.run_uv_job(
-            script=self.script,
-            script_args=self.script_args,
-            dependencies=self.dependencies,
-            python=self.python,
-            image=self.image,
-            env=self.env,
-            secrets=self.secrets,
-            flavor=self.flavor,
-            timeout=self.timeout,
-            namespace=self.namespace,
-            _repo=self._repo,
-        )
-
-        # Always print the job ID to the user
-        print(f"Job started with ID: {job.id}")
-        print(f"View at: {job.url}")
-
-        if self.detach:
+            print(f"No jobs found{filters_msg}")
             return
-
-        # Now let's stream the logs
-        for log in api.fetch_job_logs(job_id=job.id):
-            print(log)
-
-
-def _get_extended_environ() -> Dict[str, str]:
-    extended_environ = os.environ.copy()
-    if (token := get_token()) is not None:
-        extended_environ["HF_TOKEN"] = token
-    return extended_environ
-
-
-class ScheduledJobsCommands(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        scheduled_jobs_parser = parser.add_parser("scheduled", help="Create and manage scheduled Jobs on the Hub.")
-        scheduled_jobs_subparsers = scheduled_jobs_parser.add_subparsers(
-            help="huggingface.co scheduled jobs related commands"
-        )
-
-        # Show help if no subcommand is provided
-        scheduled_jobs_parser.set_defaults(func=lambda args: scheduled_jobs_subparsers.print_help())
-
-        # Register commands
-        ScheduledRunCommand.register_subcommand(scheduled_jobs_subparsers)
-        ScheduledPsCommand.register_subcommand(scheduled_jobs_subparsers)
-        ScheduledInspectCommand.register_subcommand(scheduled_jobs_subparsers)
-        ScheduledDeleteCommand.register_subcommand(scheduled_jobs_subparsers)
-        ScheduledSuspendCommand.register_subcommand(scheduled_jobs_subparsers)
-        ScheduledResumeCommand.register_subcommand(scheduled_jobs_subparsers)
-        ScheduledUvCommand.register_subcommand(scheduled_jobs_subparsers)
-
-
-class ScheduledRunCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("run", help="Schedule a Job")
-        run_parser.add_argument(
-            "schedule",
-            type=str,
-            help="One of annually, yearly, monthly, weekly, daily, hourly, or a CRON schedule expression.",
-        )
-        run_parser.add_argument("image", type=str, help="The Docker image to use.")
-        run_parser.add_argument(
-            "--suspend",
-            action="store_true",
-            help="Suspend (pause) the scheduled Job",
-            default=None,
-        )
-        run_parser.add_argument(
-            "--concurrency",
-            action="store_true",
-            help="Allow multiple instances of this Job to run concurrently",
-            default=None,
-        )
-        run_parser.add_argument("-e", "--env", action="append", help="Set environment variables. E.g. --env ENV=value")
-        run_parser.add_argument(
-            "-s",
-            "--secrets",
-            action="append",
-            help=(
-                "Set secret environment variables. E.g. --secrets SECRET=value "
-                "or `--secrets HF_TOKEN` to pass your Hugging Face token."
-            ),
-        )
-        run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.")
-        run_parser.add_argument("--secrets-file", type=str, help="Read in a file of secret environment variables.")
-        run_parser.add_argument(
-            "--flavor",
-            type=str,
-            help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.",
-        )
-        run_parser.add_argument(
-            "--timeout",
-            type=str,
-            help="Max duration: int/float with s (seconds, default), m (minutes), h (hours) or d (days).",
-        )
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the scheduled Job will be created. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token",
-            type=str,
-            help="A User Access Token generated from https://huggingface.co/settings/tokens",
-        )
-        run_parser.add_argument("command", nargs="...", help="The command to run.")
-        run_parser.set_defaults(func=ScheduledRunCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.schedule: str = args.schedule
-        self.image: str = args.image
-        self.command: List[str] = args.command
-        self.suspend: Optional[bool] = args.suspend
-        self.concurrency: Optional[bool] = args.concurrency
-        self.env: dict[str, Optional[str]] = {}
-        if args.env_file:
-            self.env.update(load_dotenv(Path(args.env_file).read_text(), environ=os.environ.copy()))
-        for env_value in args.env or []:
-            self.env.update(load_dotenv(env_value, environ=os.environ.copy()))
-        self.secrets: dict[str, Optional[str]] = {}
-        extended_environ = _get_extended_environ()
-        if args.secrets_file:
-            self.secrets.update(load_dotenv(Path(args.secrets_file).read_text(), environ=extended_environ))
-        for secret in args.secrets or []:
-            self.secrets.update(load_dotenv(secret, environ=extended_environ))
-        self.flavor: Optional[SpaceHardware] = args.flavor
-        self.timeout: Optional[str] = args.timeout
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        scheduled_job = api.create_scheduled_job(
-            image=self.image,
-            command=self.command,
-            schedule=self.schedule,
-            suspend=self.suspend,
-            concurrency=self.concurrency,
-            env=self.env,
-            secrets=self.secrets,
-            flavor=self.flavor,
-            timeout=self.timeout,
-            namespace=self.namespace,
-        )
-        # Always print the scheduled job ID to the user
-        print(f"Scheduled Job created with ID: {scheduled_job.id}")
-
-
-class ScheduledPsCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("ps", help="List scheduled Jobs")
-        run_parser.add_argument(
+        # Apply custom format if provided or use default tabular format
+        _print_output(rows, table_headers, format)
+
+    except HfHubHTTPError as e:
+        print(f"Error fetching jobs data: {e}")
+    except (KeyError, ValueError, TypeError) as e:
+        print(f"Error processing jobs data: {e}")
+    except Exception as e:
+        print(f"Unexpected error - {type(e).__name__}: {e}")
+
+
+@jobs_cli.command("inspect", help="Display detailed information on one or more Jobs")
+def jobs_inspect(
+    job_ids: Annotated[
+        list[str],
+        typer.Argument(
+            help="The jobs to inspect",
+        ),
+    ],
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    jobs = [api.inspect_job(job_id=job_id, namespace=namespace) for job_id in job_ids]
+    print(json.dumps([asdict(job) for job in jobs], indent=4, default=str))
+
+
+@jobs_cli.command("cancel", help="Cancel a Job")
+def jobs_cancel(
+    job_id: JobIdArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    api.cancel_job(job_id=job_id, namespace=namespace)
+
+
+uv_app = typer_factory(help="Run UV scripts (Python with inline dependencies) on HF infrastructure")
+jobs_cli.add_typer(uv_app, name="uv")
+
+
+@uv_app.command("run", help="Run a UV script (local file or URL) on HF infrastructure")
+def jobs_uv_run(
+    script: ScriptArg,
+    script_args: ScriptArgsArg = None,
+    image: ImageOpt = None,
+    repo: RepoOpt = None,
+    flavor: FlavorOpt = None,
+    env: EnvOpt = None,
+    secrets: SecretsOpt = None,
+    env_file: EnvFileOpt = None,
+    secrets_file: SecretsFileOpt = None,
+    timeout: TimeoutOpt = None,
+    detach: DetachOpt = False,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+    with_: WithOpt = None,
+    python: PythonOpt = None,
+) -> None:
+    env_map: dict[str, Optional[str]] = {}
+    if env_file:
+        env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
+    for env_value in env or []:
+        env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
+    secrets_map: dict[str, Optional[str]] = {}
+    extended_environ = _get_extended_environ()
+    if secrets_file:
+        secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
+    for secret in secrets or []:
+        secrets_map.update(load_dotenv(secret, environ=extended_environ))
+
+    api = get_hf_api(token=token)
+    job = api.run_uv_job(
+        script=script,
+        script_args=script_args or [],
+        dependencies=with_,
+        python=python,
+        image=image,
+        env=env_map,
+        secrets=secrets_map,
+        flavor=flavor,  # type: ignore[arg-type]
+        timeout=timeout,
+        namespace=namespace,
+        _repo=repo,
+    )
+    # Always print the job ID to the user
+    print(f"Job started with ID: {job.id}")
+    print(f"View at: {job.url}")
+    if detach:
+        return
+    # Now let's stream the logs
+    for log in api.fetch_job_logs(job_id=job.id):
+        print(log)
+
+
+scheduled_app = typer_factory(help="Create and manage scheduled Jobs on the Hub.")
+jobs_cli.add_typer(scheduled_app, name="scheduled")
+
+
+@scheduled_app.command("run", help="Schedule a Job")
+def scheduled_run(
+    schedule: ScheduleArg,
+    image: ImageArg,
+    command: CommandArg,
+    suspend: SuspendOpt = None,
+    concurrency: ConcurrencyOpt = None,
+    env: EnvOpt = None,
+    secrets: SecretsOpt = None,
+    env_file: EnvFileOpt = None,
+    secrets_file: SecretsFileOpt = None,
+    flavor: FlavorOpt = None,
+    timeout: TimeoutOpt = None,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    env_map: dict[str, Optional[str]] = {}
+    if env_file:
+        env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
+    for env_value in env or []:
+        env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
+    secrets_map: dict[str, Optional[str]] = {}
+    extended_environ = _get_extended_environ()
+    if secrets_file:
+        secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
+    for secret in secrets or []:
+        secrets_map.update(load_dotenv(secret, environ=extended_environ))
+
+    api = get_hf_api(token=token)
+    scheduled_job = api.create_scheduled_job(
+        image=image,
+        command=command,
+        schedule=schedule,
+        suspend=suspend,
+        concurrency=concurrency,
+        env=env_map,
+        secrets=secrets_map,
+        flavor=flavor,
+        timeout=timeout,
+        namespace=namespace,
+    )
+    print(f"Scheduled Job created with ID: {scheduled_job.id}")
+
+
+@scheduled_app.command("ps", help="List scheduled Jobs")
+def scheduled_ps(
+    all: Annotated[
+        bool,
+        typer.Option(
             "-a",
             "--all",
-            action="store_true",
             help="Show all scheduled Jobs (default hides suspended)",
-        )
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace from where it lists the jobs. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token",
-            type=str,
-            help="A User Access Token generated from https://huggingface.co/settings/tokens",
-        )
-        # Add Docker-style filtering argument
-        run_parser.add_argument(
+        ),
+    ] = False,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+    filter: Annotated[
+        Optional[list[str]],
+        typer.Option(
             "-f",
             "--filter",
-            action="append",
-            default=[],
             help="Filter output based on conditions provided (format: key=value)",
-        )
-        # Add option to format output
-        run_parser.add_argument(
+        ),
+    ] = None,
+    format: Annotated[
+        Optional[str],
+        typer.Option(
             "--format",
-            type=str,
             help="Format output using a custom template",
-        )
-        run_parser.set_defaults(func=ScheduledPsCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.all: bool = args.all
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-        self.format: Optional[str] = args.format
-        self.filters: Dict[str, str] = {}
-
-        # Parse filter arguments (key=value pairs)
-        for f in args.filter:
+        ),
+    ] = None,
+) -> None:
+    try:
+        api = get_hf_api(token=token)
+        scheduled_jobs = api.list_scheduled_jobs(namespace=namespace)
+        table_headers = ["ID", "SCHEDULE", "IMAGE/SPACE", "COMMAND", "LAST RUN", "NEXT RUN", "SUSPEND"]
+        rows: list[list[Union[str, int]]] = []
+        filters: dict[str, str] = {}
+        for f in filter or []:
             if "=" in f:
                 key, value = f.split("=", 1)
-                self.filters[key.lower()] = value
+                filters[key.lower()] = value
             else:
                 print(f"Warning: Ignoring invalid filter format '{f}'. Use key=value format.")
 
-    def run(self) -> None:
-        """
-        Fetch and display scheduked job information for the current user.
-        Uses Docker-style filtering with -f/--filter flag and key=value pairs.
-        """
-        try:
-            api = HfApi(token=self.token)
-
-            # Fetch jobs data
-            scheduled_jobs = api.list_scheduled_jobs(namespace=self.namespace)
-
-            # Define table headers
-            table_headers = [
-                "ID",
-                "SCHEDULE",
-                "IMAGE/SPACE",
-                "COMMAND",
-                "LAST RUN",
-                "NEXT RUN",
-                "SUSPEND",
-            ]
-
-            # Process jobs data
-            rows = []
-
-            for scheduled_job in scheduled_jobs:
-                # Extract job data for filtering
-                suspend = scheduled_job.suspend
-
-                # Skip job if not all jobs should be shown and status doesn't match criteria
-                if not self.all and suspend:
-                    continue
-
-                # Extract job ID
-                scheduled_job_id = scheduled_job.id
-
-                # Extract schedule
-                schedule = scheduled_job.schedule
-
-                # Extract image or space information
-                image_or_space = scheduled_job.job_spec.docker_image or "N/A"
-
-                # Extract and format command
-                command = scheduled_job.job_spec.command or []
-                command_str = " ".join(command) if command else "N/A"
-
-                # Extract status
-                last_job_at = (
-                    scheduled_job.status.last_job.at.strftime("%Y-%m-%d %H:%M:%S")
-                    if scheduled_job.status.last_job
-                    else "N/A"
-                )
-                next_job_run_at = (
-                    scheduled_job.status.next_job_run_at.strftime("%Y-%m-%d %H:%M:%S")
-                    if scheduled_job.status.next_job_run_at
-                    else "N/A"
-                )
-
-                # Create a dict with all job properties for filtering
-                job_properties = {
-                    "id": scheduled_job_id,
-                    "image": image_or_space,
-                    "suspend": str(suspend),
-                    "command": command_str,
-                }
-
-                # Check if job matches all filters
-                if not self._matches_filters(job_properties):
-                    continue
-
-                # Create row
-                rows.append(
-                    [
-                        scheduled_job_id,
-                        schedule,
-                        image_or_space,
-                        command_str,
-                        last_job_at,
-                        next_job_run_at,
-                        suspend,
-                    ]
-                )
-
-            # Handle empty results
-            if not rows:
-                filters_msg = ""
-                if self.filters:
-                    filters_msg = f" matching filters: {', '.join([f'{k}={v}' for k, v in self.filters.items()])}"
-
-                print(f"No scheduled jobs found{filters_msg}")
-                return
-
-            # Apply custom format if provided or use default tabular format
-            self._print_output(rows, table_headers)
-
-        except requests.RequestException as e:
-            print(f"Error fetching scheduled jobs data: {e}")
-        except (KeyError, ValueError, TypeError) as e:
-            print(f"Error processing scheduled jobs data: {e}")
-        except Exception as e:
-            print(f"Unexpected error - {type(e).__name__}: {e}")
-
-    def _matches_filters(self, job_properties: Dict[str, str]) -> bool:
-        """Check if scheduled job matches all specified filters."""
-        for key, pattern in self.filters.items():
-            # Check if property exists
-            if key not in job_properties:
-                return False
-
-            # Support pattern matching with wildcards
-            if "*" in pattern or "?" in pattern:
-                # Convert glob pattern to regex
-                regex_pattern = pattern.replace("*", ".*").replace("?", ".")
-                if not re.search(f"^{regex_pattern}$", job_properties[key], re.IGNORECASE):
-                    return False
-            # Simple substring matching
-            elif pattern.lower() not in job_properties[key].lower():
-                return False
-
-        return True
-
-    def _print_output(self, rows, headers):
-        """Print output according to the chosen format."""
-        if self.format:
-            # Custom template formatting (simplified)
-            template = self.format
-            for row in rows:
-                line = template
-                for i, field in enumerate(
-                    ["id", "schedule", "image", "command", "last_job_at", "next_job_run_at", "suspend"]
-                ):
-                    placeholder = f"{{{{.{field}}}}}"
-                    if placeholder in line:
-                        line = line.replace(placeholder, str(row[i]))
-                print(line)
-        else:
-            # Default tabular format
-            print(
-                _tabulate(
-                    rows,
-                    headers=headers,
-                )
+        for scheduled_job in scheduled_jobs:
+            suspend = scheduled_job.suspend or False
+            if not all and suspend:
+                continue
+            sj_id = scheduled_job.id
+            schedule = scheduled_job.schedule or "N/A"
+            image_or_space = scheduled_job.job_spec.docker_image or "N/A"
+            cmd = scheduled_job.job_spec.command or []
+            command_str = " ".join(cmd) if cmd else "N/A"
+            last_job_at = (
+                scheduled_job.status.last_job.at.strftime("%Y-%m-%d %H:%M:%S")
+                if scheduled_job.status.last_job
+                else "N/A"
             )
+            next_job_run_at = (
+                scheduled_job.status.next_job_run_at.strftime("%Y-%m-%d %H:%M:%S")
+                if scheduled_job.status.next_job_run_at
+                else "N/A"
+            )
+            props = {"id": sj_id, "image": image_or_space, "suspend": str(suspend), "command": command_str}
+            if not _matches_filters(props, filters):
+                continue
+            rows.append([sj_id, schedule, image_or_space, command_str, last_job_at, next_job_run_at, suspend])
+
+        if not rows:
+            filters_msg = (
+                f" matching filters: {', '.join([f'{k}={v}' for k, v in filters.items()])}" if filters else ""
+            )
+            print(f"No scheduled jobs found{filters_msg}")
+            return
+        _print_output(rows, table_headers, format)
+
+    except HfHubHTTPError as e:
+        print(f"Error fetching scheduled jobs data: {e}")
+    except (KeyError, ValueError, TypeError) as e:
+        print(f"Error processing scheduled jobs data: {e}")
+    except Exception as e:
+        print(f"Unexpected error - {type(e).__name__}: {e}")
+
+
+@scheduled_app.command("inspect", help="Display detailed information on one or more scheduled Jobs")
+def scheduled_inspect(
+    scheduled_job_ids: Annotated[
+        list[str],
+        typer.Argument(
+            help="The scheduled jobs to inspect",
+        ),
+    ],
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    scheduled_jobs = [
+        api.inspect_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
+        for scheduled_job_id in scheduled_job_ids
+    ]
+    print(json.dumps([asdict(scheduled_job) for scheduled_job in scheduled_jobs], indent=4, default=str))
+
+
+@scheduled_app.command("delete", help="Delete a scheduled Job")
+def scheduled_delete(
+    scheduled_job_id: ScheduledJobIdArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    api.delete_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
+
+
+@scheduled_app.command("suspend", help="Suspend (pause) a scheduled Job")
+def scheduled_suspend(
+    scheduled_job_id: ScheduledJobIdArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    api.suspend_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
+
+
+@scheduled_app.command("resume", help="Resume (unpause) a scheduled Job")
+def scheduled_resume(
+    scheduled_job_id: ScheduledJobIdArg,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    api.resume_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=namespace)
+
+
+scheduled_uv_app = typer_factory(help="Schedule UV scripts on HF infrastructure")
+scheduled_app.add_typer(scheduled_uv_app, name="uv")
+
+
+@scheduled_uv_app.command("run", help="Run a UV script (local file or URL) on HF infrastructure")
+def scheduled_uv_run(
+    schedule: ScheduleArg,
+    script: ScriptArg,
+    script_args: ScriptArgsArg = None,
+    suspend: SuspendOpt = None,
+    concurrency: ConcurrencyOpt = None,
+    image: ImageOpt = None,
+    repo: RepoOpt = None,
+    flavor: FlavorOpt = None,
+    env: EnvOpt = None,
+    secrets: SecretsOpt = None,
+    env_file: EnvFileOpt = None,
+    secrets_file: SecretsFileOpt = None,
+    timeout: TimeoutOpt = None,
+    namespace: NamespaceOpt = None,
+    token: TokenOpt = None,
+    with_: WithOpt = None,
+    python: PythonOpt = None,
+) -> None:
+    env_map: dict[str, Optional[str]] = {}
+    if env_file:
+        env_map.update(load_dotenv(Path(env_file).read_text(), environ=os.environ.copy()))
+    for env_value in env or []:
+        env_map.update(load_dotenv(env_value, environ=os.environ.copy()))
+    secrets_map: dict[str, Optional[str]] = {}
+    extended_environ = _get_extended_environ()
+    if secrets_file:
+        secrets_map.update(load_dotenv(Path(secrets_file).read_text(), environ=extended_environ))
+    for secret in secrets or []:
+        secrets_map.update(load_dotenv(secret, environ=extended_environ))
+
+    api = get_hf_api(token=token)
+    job = api.create_scheduled_uv_job(
+        script=script,
+        script_args=script_args or [],
+        schedule=schedule,
+        suspend=suspend,
+        concurrency=concurrency,
+        dependencies=with_,
+        python=python,
+        image=image,
+        env=env_map,
+        secrets=secrets_map,
+        flavor=flavor,  # type: ignore[arg-type]
+        timeout=timeout,
+        namespace=namespace,
+        _repo=repo,
+    )
+    print(f"Scheduled Job created with ID: {job.id}")
+
+
+### UTILS
+
+
+def _tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str:
+    """
+    Inspired by:
 
-
-class ScheduledInspectCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("inspect", help="Display detailed information on one or more scheduled Jobs")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the scheduled job is. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        run_parser.add_argument("scheduled_job_ids", nargs="...", help="The scheduled jobs to inspect")
-        run_parser.set_defaults(func=ScheduledInspectCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-        self.scheduled_job_ids: List[str] = args.scheduled_job_ids
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        scheduled_jobs = [
-            api.inspect_scheduled_job(scheduled_job_id=scheduled_job_id, namespace=self.namespace)
-            for scheduled_job_id in self.scheduled_job_ids
+    - stackoverflow.com/a/8356620/593036
+    - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
+    """
+    col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
+    terminal_width = max(os.get_terminal_size().columns, len(headers) * 12)
+    while len(headers) + sum(col_widths) > terminal_width:
+        col_to_minimize = col_widths.index(max(col_widths))
+        col_widths[col_to_minimize] //= 2
+        if len(headers) + sum(col_widths) <= terminal_width:
+            col_widths[col_to_minimize] = terminal_width - sum(col_widths) - len(headers) + col_widths[col_to_minimize]
+    row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
+    lines = []
+    lines.append(row_format.format(*headers))
+    lines.append(row_format.format(*["-" * w for w in col_widths]))
+    for row in rows:
+        row_format_args = [
+            str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x)
+            for x, col_width in zip(row, col_widths)
         ]
-        print(json.dumps([asdict(scheduled_job) for scheduled_job in scheduled_jobs], indent=4, default=str))
-
-
-class ScheduledDeleteCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("delete", help="Delete a scheduled Job")
-        run_parser.add_argument("scheduled_job_id", type=str, help="Scheduled Job ID")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the scheduled job is. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        run_parser.set_defaults(func=ScheduledDeleteCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.scheduled_job_id: str = args.scheduled_job_id
-        self.namespace = args.namespace
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        api.delete_scheduled_job(scheduled_job_id=self.scheduled_job_id, namespace=self.namespace)
-
-
-class ScheduledSuspendCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("suspend", help="Suspend (pause) a scheduled Job")
-        run_parser.add_argument("scheduled_job_id", type=str, help="Scheduled Job ID")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the scheduled job is. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        run_parser.set_defaults(func=ScheduledSuspendCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.scheduled_job_id: str = args.scheduled_job_id
-        self.namespace = args.namespace
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        api.suspend_scheduled_job(scheduled_job_id=self.scheduled_job_id, namespace=self.namespace)
-
-
-class ScheduledResumeCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction) -> None:
-        run_parser = parser.add_parser("resume", help="Resume (unpause) a scheduled Job")
-        run_parser.add_argument("scheduled_job_id", type=str, help="Scheduled Job ID")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the scheduled job is. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        run_parser.set_defaults(func=ScheduledResumeCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.scheduled_job_id: str = args.scheduled_job_id
-        self.namespace = args.namespace
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        api = HfApi(token=self.token)
-        api.resume_scheduled_job(scheduled_job_id=self.scheduled_job_id, namespace=self.namespace)
-
-
-class ScheduledUvCommand(BaseHuggingfaceCLICommand):
-    """Schedule UV scripts on Hugging Face infrastructure."""
-
-    @staticmethod
-    def register_subcommand(parser):
-        """Register UV run subcommand."""
-        uv_parser = parser.add_parser(
-            "uv",
-            help="Schedule UV scripts (Python with inline dependencies) on HF infrastructure",
-        )
-
-        subparsers = uv_parser.add_subparsers(dest="uv_command", help="UV commands", required=True)
-
-        # Run command only
-        run_parser = subparsers.add_parser(
-            "run",
-            help="Run a UV script (local file or URL) on HF infrastructure",
-        )
-        run_parser.add_argument(
-            "schedule",
-            type=str,
-            help="One of annually, yearly, monthly, weekly, daily, hourly, or a CRON schedule expression.",
-        )
-        run_parser.add_argument("script", help="UV script to run (local file or URL)")
-        run_parser.add_argument("script_args", nargs="...", help="Arguments for the script", default=[])
-        run_parser.add_argument(
-            "--suspend",
-            action="store_true",
-            help="Suspend (pause) the scheduled Job",
-            default=None,
-        )
-        run_parser.add_argument(
-            "--concurrency",
-            action="store_true",
-            help="Allow multiple instances of this Job to run concurrently",
-            default=None,
-        )
-        run_parser.add_argument("--image", type=str, help="Use a custom Docker image with `uv` installed.")
-        run_parser.add_argument(
-            "--repo",
-            help="Repository name for the script (creates ephemeral if not specified)",
-        )
-        run_parser.add_argument(
-            "--flavor",
-            type=str,
-            help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.",
-        )
-        run_parser.add_argument("-e", "--env", action="append", help="Environment variables")
-        run_parser.add_argument(
-            "-s",
-            "--secrets",
-            action="append",
-            help=(
-                "Set secret environment variables. E.g. --secrets SECRET=value "
-                "or `--secrets HF_TOKEN` to pass your Hugging Face token."
-            ),
-        )
-        run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.")
-        run_parser.add_argument(
-            "--secrets-file",
-            type=str,
-            help="Read in a file of secret environment variables.",
-        )
-        run_parser.add_argument("--timeout", type=str, help="Max duration (e.g., 30s, 5m, 1h)")
-        run_parser.add_argument("-d", "--detach", action="store_true", help="Run in background")
-        run_parser.add_argument(
-            "--namespace",
-            type=str,
-            help="The namespace where the Job will be created. Defaults to the current user's namespace.",
-        )
-        run_parser.add_argument("--token", type=str, help="HF token")
-        # UV options
-        run_parser.add_argument("--with", action="append", help="Run with the given packages installed", dest="with_")
-        run_parser.add_argument(
-            "-p", "--python", type=str, help="The Python interpreter to use for the run environment"
-        )
-        run_parser.set_defaults(func=ScheduledUvCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        """Initialize the command with parsed arguments."""
-        self.schedule: str = args.schedule
-        self.script = args.script
-        self.script_args = args.script_args
-        self.suspend: Optional[bool] = args.suspend
-        self.concurrency: Optional[bool] = args.concurrency
-        self.dependencies = args.with_
-        self.python = args.python
-        self.image = args.image
-        self.env: dict[str, Optional[str]] = {}
-        if args.env_file:
-            self.env.update(load_dotenv(Path(args.env_file).read_text(), environ=os.environ.copy()))
-        for env_value in args.env or []:
-            self.env.update(load_dotenv(env_value, environ=os.environ.copy()))
-        self.secrets: dict[str, Optional[str]] = {}
-        extended_environ = _get_extended_environ()
-        if args.secrets_file:
-            self.secrets.update(load_dotenv(Path(args.secrets_file).read_text(), environ=extended_environ))
-        for secret in args.secrets or []:
-            self.secrets.update(load_dotenv(secret, environ=extended_environ))
-        self.flavor: Optional[SpaceHardware] = args.flavor
-        self.timeout: Optional[str] = args.timeout
-        self.detach: bool = args.detach
-        self.namespace: Optional[str] = args.namespace
-        self.token: Optional[str] = args.token
-        self._repo = args.repo
-
-    def run(self) -> None:
-        """Schedule UV command."""
-        logging.set_verbosity(logging.INFO)
-        api = HfApi(token=self.token)
-        job = api.create_scheduled_uv_job(
-            script=self.script,
-            script_args=self.script_args,
-            schedule=self.schedule,
-            suspend=self.suspend,
-            concurrency=self.concurrency,
-            dependencies=self.dependencies,
-            python=self.python,
-            image=self.image,
-            env=self.env,
-            secrets=self.secrets,
-            flavor=self.flavor,
-            timeout=self.timeout,
-            namespace=self.namespace,
-            _repo=self._repo,
-        )
-
-        # Always print the job ID to the user
-        print(f"Scheduled Job created with ID: {job.id}")
+        lines.append(row_format.format(*row_format_args))
+    return "\n".join(lines)
+
+
+def _get_extended_environ() -> Dict[str, str]:
+    extended_environ = os.environ.copy()
+    if (token := get_token()) is not None:
+        extended_environ["HF_TOKEN"] = token
+    return extended_environ
diff --git a/src/huggingface_hub/cli/lfs.py b/src/huggingface_hub/cli/lfs.py
index e4c5b900c8..f6bef3122a 100644
--- a/src/huggingface_hub/cli/lfs.py
+++ b/src/huggingface_hub/cli/lfs.py
@@ -20,10 +20,10 @@
 import os
 import subprocess
 import sys
-from argparse import _SubParsersAction
-from typing import Dict, List, Optional
+from typing import Annotated, Optional
+
+import typer
 
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
 from huggingface_hub.lfs import LFS_MULTIPART_UPLOAD_COMMAND
 
 from ..utils import get_session, hf_raise_for_status, logging
@@ -33,68 +33,45 @@
 logger = logging.get_logger(__name__)
 
 
-class LfsCommands(BaseHuggingfaceCLICommand):
+def lfs_enable_largefiles(
+    path: Annotated[
+        str,
+        typer.Argument(
+            help="Local path to repository you want to configure.",
+        ),
+    ],
+) -> None:
     """
-    Implementation of a custom transfer agent for the transfer type "multipart"
-    for git-lfs. This lets users upload large files >5GB 🔥. Spec for LFS custom
-    transfer agent is:
-    https://github.com/git-lfs/git-lfs/blob/master/docs/custom-transfers.md
-
-    This introduces two commands to the CLI:
-
-    1. $ hf lfs-enable-largefiles
-
-    This should be executed once for each model repo that contains a model file
-    >5GB. It's documented in the error message you get if you just try to git
-    push a 5GB file without having enabled it before.
-
-    2. $ hf lfs-multipart-upload
+    Configure a local git repository to use the multipart transfer agent for large files.
 
-    This command is called by lfs directly and is not meant to be called by the
-    user.
+    This command sets up git-lfs to use the custom multipart transfer agent
+    which enables efficient uploading of large files in chunks.
     """
-
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        enable_parser = parser.add_parser("lfs-enable-largefiles", add_help=False)
-        enable_parser.add_argument("path", type=str, help="Local path to repository you want to configure.")
-        enable_parser.set_defaults(func=lambda args: LfsEnableCommand(args))
-
-        # Command will get called by git-lfs, do not call it directly.
-        upload_parser = parser.add_parser(LFS_MULTIPART_UPLOAD_COMMAND, add_help=False)
-        upload_parser.set_defaults(func=lambda args: LfsUploadCommand(args))
-
-
-class LfsEnableCommand:
-    def __init__(self, args):
-        self.args = args
-
-    def run(self):
-        local_path = os.path.abspath(self.args.path)
-        if not os.path.isdir(local_path):
-            print("This does not look like a valid git repo.")
-            exit(1)
-        subprocess.run(
-            "git config lfs.customtransfer.multipart.path hf".split(),
-            check=True,
-            cwd=local_path,
-        )
-        subprocess.run(
-            f"git config lfs.customtransfer.multipart.args {LFS_MULTIPART_UPLOAD_COMMAND}".split(),
-            check=True,
-            cwd=local_path,
-        )
-        print("Local repo set up for largefiles")
-
-
-def write_msg(msg: Dict):
+    local_path = os.path.abspath(path)
+    if not os.path.isdir(local_path):
+        print("This does not look like a valid git repo.")
+        raise typer.Exit(code=1)
+    subprocess.run(
+        "git config lfs.customtransfer.multipart.path hf".split(),
+        check=True,
+        cwd=local_path,
+    )
+    subprocess.run(
+        f"git config lfs.customtransfer.multipart.args {LFS_MULTIPART_UPLOAD_COMMAND}".split(),
+        check=True,
+        cwd=local_path,
+    )
+    print("Local repo set up for largefiles")
+
+
+def write_msg(msg: dict):
     """Write out the message in Line delimited JSON."""
     msg_str = json.dumps(msg) + "\n"
     sys.stdout.write(msg_str)
     sys.stdout.flush()
 
 
-def read_msg() -> Optional[Dict]:
+def read_msg() -> Optional[dict]:
     """Read Line delimited JSON from stdin."""
     msg = json.loads(sys.stdin.readline().strip())
 
@@ -109,90 +86,90 @@ def read_msg() -> Optional[Dict]:
     return msg
 
 
-class LfsUploadCommand:
-    def __init__(self, args) -> None:
-        self.args = args
-
-    def run(self) -> None:
-        # Immediately after invoking a custom transfer process, git-lfs
-        # sends initiation data to the process over stdin.
-        # This tells the process useful information about the configuration.
-        init_msg = json.loads(sys.stdin.readline().strip())
-        if not (init_msg.get("event") == "init" and init_msg.get("operation") == "upload"):
-            write_msg({"error": {"code": 32, "message": "Wrong lfs init operation"}})
-            sys.exit(1)
-
-        # The transfer process should use the information it needs from the
-        # initiation structure, and also perform any one-off setup tasks it
-        # needs to do. It should then respond on stdout with a simple empty
-        # confirmation structure, as follows:
-        write_msg({})
-
-        # After the initiation exchange, git-lfs will send any number of
-        # transfer requests to the stdin of the transfer process, in a serial sequence.
-        while True:
-            msg = read_msg()
-            if msg is None:
-                # When all transfers have been processed, git-lfs will send
-                # a terminate event to the stdin of the transfer process.
-                # On receiving this message the transfer process should
-                # clean up and terminate. No response is expected.
-                sys.exit(0)
-
-            oid = msg["oid"]
-            filepath = msg["path"]
-            completion_url = msg["action"]["href"]
-            header = msg["action"]["header"]
-            chunk_size = int(header.pop("chunk_size"))
-            presigned_urls: List[str] = list(header.values())
-
-            # Send a "started" progress event to allow other workers to start.
-            # Otherwise they're delayed until first "progress" event is reported,
-            # i.e. after the first 5GB by default (!)
-            write_msg(
-                {
-                    "event": "progress",
-                    "oid": oid,
-                    "bytesSoFar": 1,
-                    "bytesSinceLast": 0,
-                }
-            )
-
-            parts = []
-            with open(filepath, "rb") as file:
-                for i, presigned_url in enumerate(presigned_urls):
-                    with SliceFileObj(
-                        file,
-                        seek_from=i * chunk_size,
-                        read_limit=chunk_size,
-                    ) as data:
-                        r = get_session().put(presigned_url, data=data)
-                        hf_raise_for_status(r)
-                        parts.append(
-                            {
-                                "etag": r.headers.get("etag"),
-                                "partNumber": i + 1,
-                            }
-                        )
-                        # In order to support progress reporting while data is uploading / downloading,
-                        # the transfer process should post messages to stdout
-                        write_msg(
-                            {
-                                "event": "progress",
-                                "oid": oid,
-                                "bytesSoFar": (i + 1) * chunk_size,
-                                "bytesSinceLast": chunk_size,
-                            }
-                        )
-                        # Not precise but that's ok.
-
-            r = get_session().post(
-                completion_url,
-                json={
-                    "oid": oid,
-                    "parts": parts,
-                },
-            )
-            hf_raise_for_status(r)
-
-            write_msg({"event": "complete", "oid": oid})
+def lfs_multipart_upload() -> None:
+    """Internal git-lfs custom transfer agent for multipart uploads.
+
+    This function implements the custom transfer protocol for git-lfs multipart uploads.
+    Handles chunked uploads of large files to Hugging Face Hub.
+    """
+    # Immediately after invoking a custom transfer process, git-lfs
+    # sends initiation data to the process over stdin.
+    # This tells the process useful information about the configuration.
+    init_msg = json.loads(sys.stdin.readline().strip())
+    if not (init_msg.get("event") == "init" and init_msg.get("operation") == "upload"):
+        write_msg({"error": {"code": 32, "message": "Wrong lfs init operation"}})
+        sys.exit(1)
+
+    # The transfer process should use the information it needs from the
+    # initiation structure, and also perform any one-off setup tasks it
+    # needs to do. It should then respond on stdout with a simple empty
+    # confirmation structure, as follows:
+    write_msg({})
+
+    # After the initiation exchange, git-lfs will send any number of
+    # transfer requests to the stdin of the transfer process, in a serial sequence.
+    while True:
+        msg = read_msg()
+        if msg is None:
+            # When all transfers have been processed, git-lfs will send
+            # a terminate event to the stdin of the transfer process.
+            # On receiving this message the transfer process should
+            # clean up and terminate. No response is expected.
+            sys.exit(0)
+
+        oid = msg["oid"]
+        filepath = msg["path"]
+        completion_url = msg["action"]["href"]
+        header = msg["action"]["header"]
+        chunk_size = int(header.pop("chunk_size"))
+        presigned_urls: list[str] = list(header.values())
+
+        # Send a "started" progress event to allow other workers to start.
+        # Otherwise they're delayed until first "progress" event is reported,
+        # i.e. after the first 5GB by default (!)
+        write_msg(
+            {
+                "event": "progress",
+                "oid": oid,
+                "bytesSoFar": 1,
+                "bytesSinceLast": 0,
+            }
+        )
+
+        parts = []
+        with open(filepath, "rb") as file:
+            for i, presigned_url in enumerate(presigned_urls):
+                with SliceFileObj(
+                    file,
+                    seek_from=i * chunk_size,
+                    read_limit=chunk_size,
+                ) as data:
+                    r = get_session().put(presigned_url, data=data)
+                    hf_raise_for_status(r)
+                    parts.append(
+                        {
+                            "etag": r.headers.get("etag"),
+                            "partNumber": i + 1,
+                        }
+                    )
+                    # In order to support progress reporting while data is uploading / downloading,
+                    # the transfer process should post messages to stdout
+                    write_msg(
+                        {
+                            "event": "progress",
+                            "oid": oid,
+                            "bytesSoFar": (i + 1) * chunk_size,
+                            "bytesSinceLast": chunk_size,
+                        }
+                    )
+
+        r = get_session().post(
+            completion_url,
+            json={
+                "oid": oid,
+                "parts": parts,
+            },
+        )
+        hf_raise_for_status(r)
+
+        write_msg({"event": "complete", "oid": oid})
diff --git a/src/huggingface_hub/cli/repo.py b/src/huggingface_hub/cli/repo.py
index ef0e331358..9751b22b4e 100644
--- a/src/huggingface_hub/cli/repo.py
+++ b/src/huggingface_hub/cli/repo.py
@@ -21,229 +21,291 @@
     hf repo create my-cool-model --private
 """
 
-import argparse
-from argparse import _SubParsersAction
-from typing import Optional
+import enum
+from typing import Annotated, Optional
 
-from requests.exceptions import HTTPError
+import typer
 
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.commands._cli_utils import ANSI
-from huggingface_hub.constants import REPO_TYPES, SPACES_SDK_TYPES
 from huggingface_hub.errors import HfHubHTTPError, RepositoryNotFoundError, RevisionNotFoundError
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-
-class RepoCommands(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        repo_parser = parser.add_parser("repo", help="Manage repos on the Hub.")
-        repo_subparsers = repo_parser.add_subparsers(help="huggingface.co repos related commands")
-
-        # Show help if no subcommand is provided
-        repo_parser.set_defaults(func=lambda args: repo_parser.print_help())
-
-        # CREATE
-        repo_create_parser = repo_subparsers.add_parser("create", help="Create a new repo on huggingface.co")
-        repo_create_parser.add_argument(
-            "repo_id",
-            type=str,
-            help="The ID of the repo to create to (e.g. `username/repo-name`). The username is optional and will be set to your username if not provided.",
-        )
-        repo_create_parser.add_argument(
-            "--repo-type",
-            type=str,
-            help='Optional: set to "dataset" or "space" if creating a dataset or space, default is model.',
-        )
-        repo_create_parser.add_argument(
-            "--space_sdk",
-            type=str,
-            help='Optional: Hugging Face Spaces SDK type. Required when --type is set to "space".',
-            choices=SPACES_SDK_TYPES,
-        )
-        repo_create_parser.add_argument(
-            "--private",
-            action="store_true",
-            help="Whether to create a private repository. Defaults to public unless the organization's default is private.",
-        )
-        repo_create_parser.add_argument(
-            "--token",
-            type=str,
-            help="Hugging Face token. Will default to the locally saved token if not provided.",
-        )
-        repo_create_parser.add_argument(
-            "--exist-ok",
-            action="store_true",
+from huggingface_hub.utils import ANSI
+
+from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api, typer_factory
+
+
+repo_cli = typer_factory(help="Manage repos on the Hub.")
+tag_cli = typer_factory(help="Manage tags for a repo on the Hub.")
+branch_cli = typer_factory(help="Manage branches for a repo on the Hub.")
+repo_cli.add_typer(tag_cli, name="tag")
+repo_cli.add_typer(branch_cli, name="branch")
+
+
+class GatedChoices(str, enum.Enum):
+    auto = "auto"
+    manual = "manual"
+    false = "false"
+
+
+@repo_cli.command("create", help="Create a new repo on the Hub.")
+def repo_create(
+    repo_id: RepoIdArg,
+    repo_type: RepoTypeOpt = RepoType.model,
+    space_sdk: Annotated[
+        Optional[str],
+        typer.Option(
+            help="Hugging Face Spaces SDK type. Required when --type is set to 'space'.",
+        ),
+    ] = None,
+    private: PrivateOpt = False,
+    token: TokenOpt = None,
+    exist_ok: Annotated[
+        bool,
+        typer.Option(
             help="Do not raise an error if repo already exists.",
-        )
-        repo_create_parser.add_argument(
-            "--resource-group-id",
-            type=str,
+        ),
+    ] = False,
+    resource_group_id: Annotated[
+        Optional[str],
+        typer.Option(
             help="Resource group in which to create the repo. Resource groups is only available for Enterprise Hub organizations.",
-        )
-        repo_create_parser.set_defaults(func=lambda args: RepoCreateCommand(args))
-
-        # TAG SUBCOMMANDS
-        repo_tag_parser = repo_subparsers.add_parser("tag", help="Manage tags for a repo on the Hub.")
-        tag_subparsers = repo_tag_parser.add_subparsers(help="Tag actions", dest="tag_action", required=True)
-
-        # tag create
-        tag_create_parser = tag_subparsers.add_parser("create", help="Create a tag for a repo.")
-        tag_create_parser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to tag (e.g. `username/repo-name`)."
-        )
-        tag_create_parser.add_argument("tag", type=str, help="The name of the tag to create.")
-        tag_create_parser.add_argument("-m", "--message", type=str, help="The description of the tag to create.")
-        tag_create_parser.add_argument("--revision", type=str, help="The git revision to tag.")
-        tag_create_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens."
-        )
-        tag_create_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Set the type of repository (model, dataset, or space).",
-        )
-        tag_create_parser.set_defaults(func=lambda args: RepoTagCreateCommand(args))
-
-        # tag list
-        tag_list_parser = tag_subparsers.add_parser("list", help="List tags for a repo.")
-        tag_list_parser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to list tags for (e.g. `username/repo-name`)."
-        )
-        tag_list_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens."
-        )
-        tag_list_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Set the type of repository (model, dataset, or space).",
-        )
-        tag_list_parser.set_defaults(func=lambda args: RepoTagListCommand(args))
-
-        # tag delete
-        tag_delete_parser = tag_subparsers.add_parser("delete", help="Delete a tag from a repo.")
-        tag_delete_parser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to delete the tag from (e.g. `username/repo-name`)."
-        )
-        tag_delete_parser.add_argument("tag", type=str, help="The name of the tag to delete.")
-        tag_delete_parser.add_argument("-y", "--yes", action="store_true", help="Answer Yes to prompts automatically.")
-        tag_delete_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens."
-        )
-        tag_delete_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Set the type of repository (model, dataset, or space).",
-        )
-        tag_delete_parser.set_defaults(func=lambda args: RepoTagDeleteCommand(args))
-
-
-class RepoCreateCommand:
-    def __init__(self, args: argparse.Namespace):
-        self.repo_id: str = args.repo_id
-        self.repo_type: Optional[str] = args.repo_type
-        self.space_sdk: Optional[str] = args.space_sdk
-        self.private: bool = args.private
-        self.token: Optional[str] = args.token
-        self.exist_ok: bool = args.exist_ok
-        self.resource_group_id: Optional[str] = args.resource_group_id
-        self._api = HfApi()
-
-    def run(self):
-        repo_url = self._api.create_repo(
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
-            private=self.private,
-            token=self.token,
-            exist_ok=self.exist_ok,
-            resource_group_id=self.resource_group_id,
-            space_sdk=self.space_sdk,
-        )
-        print(f"Successfully created {ANSI.bold(repo_url.repo_id)} on the Hub.")
-        print(f"Your repo is now available at {ANSI.bold(repo_url)}")
-
-
-class RepoTagCommand:
-    def __init__(self, args):
-        self.args = args
-        self.api = HfApi(token=getattr(args, "token", None))
-        self.repo_id = args.repo_id
-        self.repo_type = getattr(args, "repo_type", "model")
-        if self.repo_type not in REPO_TYPES:
-            print("Invalid repo --repo-type")
-            exit(1)
-
-
-class RepoTagCreateCommand(RepoTagCommand):
-    def run(self):
-        print(
-            f"You are about to create tag {ANSI.bold(str(self.args.tag))} on {self.repo_type} {ANSI.bold(self.repo_id)}"
-        )
-        try:
-            self.api.create_tag(
-                repo_id=self.repo_id,
-                tag=self.args.tag,
-                tag_message=getattr(self.args, "message", None),
-                revision=getattr(self.args, "revision", None),
-                repo_type=self.repo_type,
-            )
-        except RepositoryNotFoundError:
-            print(f"{self.repo_type.capitalize()} {ANSI.bold(self.repo_id)} not found.")
-            exit(1)
-        except RevisionNotFoundError:
-            print(f"Revision {ANSI.bold(str(getattr(self.args, 'revision', None)))} not found.")
-            exit(1)
-        except HfHubHTTPError as e:
-            if e.response.status_code == 409:
-                print(f"Tag {ANSI.bold(str(self.args.tag))} already exists on {ANSI.bold(self.repo_id)}")
-                exit(1)
-            raise e
-        print(f"Tag {ANSI.bold(str(self.args.tag))} created on {ANSI.bold(self.repo_id)}")
-
-
-class RepoTagListCommand(RepoTagCommand):
-    def run(self):
-        try:
-            refs = self.api.list_repo_refs(
-                repo_id=self.repo_id,
-                repo_type=self.repo_type,
-            )
-        except RepositoryNotFoundError:
-            print(f"{self.repo_type.capitalize()} {ANSI.bold(self.repo_id)} not found.")
-            exit(1)
-        except HTTPError as e:
-            print(e)
-            print(ANSI.red(e.response.text))
-            exit(1)
-        if len(refs.tags) == 0:
-            print("No tags found")
-            exit(0)
-        print(f"Tags for {self.repo_type} {ANSI.bold(self.repo_id)}:")
-        for tag in refs.tags:
-            print(tag.name)
-
-
-class RepoTagDeleteCommand(RepoTagCommand):
-    def run(self):
-        print(f"You are about to delete tag {ANSI.bold(self.args.tag)} on {self.repo_type} {ANSI.bold(self.repo_id)}")
-        if not getattr(self.args, "yes", False):
-            choice = input("Proceed? [Y/n] ").lower()
-            if choice not in ("", "y", "yes"):
-                print("Abort")
-                exit()
-        try:
-            self.api.delete_tag(repo_id=self.repo_id, tag=self.args.tag, repo_type=self.repo_type)
-        except RepositoryNotFoundError:
-            print(f"{self.repo_type.capitalize()} {ANSI.bold(self.repo_id)} not found.")
-            exit(1)
-        except RevisionNotFoundError:
-            print(f"Tag {ANSI.bold(self.args.tag)} not found on {ANSI.bold(self.repo_id)}")
-            exit(1)
-        print(f"Tag {ANSI.bold(self.args.tag)} deleted on {ANSI.bold(self.repo_id)}")
+        ),
+    ] = None,
+) -> None:
+    api = get_hf_api(token=token)
+    repo_url = api.create_repo(
+        repo_id=repo_id,
+        repo_type=repo_type.value,
+        private=private,
+        token=token,
+        exist_ok=exist_ok,
+        resource_group_id=resource_group_id,
+        space_sdk=space_sdk,
+    )
+    print(f"Successfully created {ANSI.bold(repo_url.repo_id)} on the Hub.")
+    print(f"Your repo is now available at {ANSI.bold(repo_url)}")
+
+
+@repo_cli.command("delete", help="Delete a repo from the Hub. this is an irreversible operation.")
+def repo_delete(
+    repo_id: RepoIdArg,
+    repo_type: RepoTypeOpt = RepoType.model,
+    token: TokenOpt = None,
+    missing_ok: Annotated[
+        bool,
+        typer.Option(
+            help="If set to True, do not raise an error if repo does not exist.",
+        ),
+    ] = False,
+) -> None:
+    api = get_hf_api(token=token)
+    api.delete_repo(
+        repo_id=repo_id,
+        repo_type=repo_type.value,
+        missing_ok=missing_ok,
+    )
+    print(f"Successfully deleted {ANSI.bold(repo_id)} on the Hub.")
+
+
+@repo_cli.command("move", help="Move a repository from a namespace to another namespace.")
+def repo_move(
+    from_id: RepoIdArg,
+    to_id: RepoIdArg,
+    token: TokenOpt = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+) -> None:
+    api = get_hf_api(token=token)
+    api.move_repo(
+        from_id=from_id,
+        to_id=to_id,
+        repo_type=repo_type.value,
+    )
+    print(f"Successfully moved {ANSI.bold(from_id)} to {ANSI.bold(to_id)} on the Hub.")
+
+
+@repo_cli.command("settings", help="Update the settings of a repository.")
+def repo_settings(
+    repo_id: RepoIdArg,
+    gated: Annotated[
+        Optional[GatedChoices],
+        typer.Option(
+            help="The gated status for the repository.",
+        ),
+    ] = None,
+    private: Annotated[
+        Optional[bool],
+        typer.Option(
+            help="Whether the repository should be private.",
+        ),
+    ] = None,
+    xet_enabled: Annotated[
+        Optional[bool],
+        typer.Option(
+            help=" Whether the repository should be enabled for Xet Storage.",
+        ),
+    ] = None,
+    token: TokenOpt = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+) -> None:
+    api = get_hf_api(token=token)
+    api.update_repo_settings(
+        repo_id=repo_id,
+        gated=(gated.value if gated else None),  # type: ignore [arg-type]
+        private=private,
+        xet_enabled=xet_enabled,
+        repo_type=repo_type.value,
+    )
+    print(f"Successfully updated the settings of {ANSI.bold(repo_id)} on the Hub.")
+
+
+@branch_cli.command("create", help="Create a new branch for a repo on the Hub.")
+def branch_create(
+    repo_id: RepoIdArg,
+    branch: Annotated[
+        str,
+        typer.Argument(
+            help="The name of the branch to create.",
+        ),
+    ],
+    revision: RevisionOpt = None,
+    token: TokenOpt = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+    exist_ok: Annotated[
+        bool,
+        typer.Option(
+            help="If set to True, do not raise an error if branch already exists.",
+        ),
+    ] = False,
+) -> None:
+    api = get_hf_api(token=token)
+    api.create_branch(
+        repo_id=repo_id,
+        branch=branch,
+        revision=revision,
+        repo_type=repo_type.value,
+        exist_ok=exist_ok,
+    )
+    print(f"Successfully created {ANSI.bold(branch)} branch on {repo_type.value} {ANSI.bold(repo_id)}")
+
+
+@branch_cli.command("delete", help="Delete a branch from a repo on the Hub.")
+def branch_delete(
+    repo_id: RepoIdArg,
+    branch: Annotated[
+        str,
+        typer.Argument(
+            help="The name of the branch to delete.",
+        ),
+    ],
+    token: TokenOpt = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+) -> None:
+    api = get_hf_api(token=token)
+    api.delete_branch(
+        repo_id=repo_id,
+        branch=branch,
+        repo_type=repo_type.value,
+    )
+    print(f"Successfully deleted {ANSI.bold(branch)} branch on {repo_type.value} {ANSI.bold(repo_id)}")
+
+
+@tag_cli.command("create", help="Create a tag for a repo.")
+def tag_create(
+    repo_id: RepoIdArg,
+    tag: Annotated[
+        str,
+        typer.Argument(
+            help="The name of the tag to create.",
+        ),
+    ],
+    message: Annotated[
+        Optional[str],
+        typer.Option(
+            "-m",
+            "--message",
+            help="The description of the tag to create.",
+        ),
+    ] = None,
+    revision: RevisionOpt = None,
+    token: TokenOpt = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+) -> None:
+    repo_type_str = repo_type.value
+    api = get_hf_api(token=token)
+    print(f"You are about to create tag {ANSI.bold(tag)} on {repo_type_str} {ANSI.bold(repo_id)}")
+    try:
+        api.create_tag(repo_id=repo_id, tag=tag, tag_message=message, revision=revision, repo_type=repo_type_str)
+    except RepositoryNotFoundError:
+        print(f"{repo_type_str.capitalize()} {ANSI.bold(repo_id)} not found.")
+        raise typer.Exit(code=1)
+    except RevisionNotFoundError:
+        print(f"Revision {ANSI.bold(str(revision))} not found.")
+        raise typer.Exit(code=1)
+    except HfHubHTTPError as e:
+        if e.response.status_code == 409:
+            print(f"Tag {ANSI.bold(tag)} already exists on {ANSI.bold(repo_id)}")
+            raise typer.Exit(code=1)
+        raise e
+    print(f"Tag {ANSI.bold(tag)} created on {ANSI.bold(repo_id)}")
+
+
+@tag_cli.command("list", help="List tags for a repo.")
+def tag_list(
+    repo_id: RepoIdArg,
+    token: TokenOpt = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+) -> None:
+    repo_type_str = repo_type.value
+    api = get_hf_api(token=token)
+    try:
+        refs = api.list_repo_refs(repo_id=repo_id, repo_type=repo_type_str)
+    except RepositoryNotFoundError:
+        print(f"{repo_type_str.capitalize()} {ANSI.bold(repo_id)} not found.")
+        raise typer.Exit(code=1)
+    except HfHubHTTPError as e:
+        print(e)
+        print(ANSI.red(e.response.text))
+        raise typer.Exit(code=1)
+    if len(refs.tags) == 0:
+        print("No tags found")
+        raise typer.Exit(code=0)
+    print(f"Tags for {repo_type_str} {ANSI.bold(repo_id)}:")
+    for t in refs.tags:
+        print(t.name)
+
+
+@tag_cli.command("delete", help="Delete a tag for a repo.")
+def tag_delete(
+    repo_id: RepoIdArg,
+    tag: Annotated[
+        str,
+        typer.Argument(
+            help="The name of the tag to delete.",
+        ),
+    ],
+    yes: Annotated[
+        bool,
+        typer.Option(
+            "-y",
+            "--yes",
+            help="Answer Yes to prompt automatically",
+        ),
+    ] = False,
+    token: TokenOpt = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+) -> None:
+    repo_type_str = repo_type.value
+    print(f"You are about to delete tag {ANSI.bold(tag)} on {repo_type_str} {ANSI.bold(repo_id)}")
+    if not yes:
+        choice = input("Proceed? [Y/n] ").lower()
+        if choice not in ("", "y", "yes"):
+            print("Abort")
+            raise typer.Exit()
+    api = get_hf_api(token=token)
+    try:
+        api.delete_tag(repo_id=repo_id, tag=tag, repo_type=repo_type_str)
+    except RepositoryNotFoundError:
+        print(f"{repo_type_str.capitalize()} {ANSI.bold(repo_id)} not found.")
+        raise typer.Exit(code=1)
+    except RevisionNotFoundError:
+        print(f"Tag {ANSI.bold(tag)} not found on {ANSI.bold(repo_id)}")
+        raise typer.Exit(code=1)
+    print(f"Tag {ANSI.bold(tag)} deleted on {ANSI.bold(repo_id)}")
diff --git a/src/huggingface_hub/cli/repo_files.py b/src/huggingface_hub/cli/repo_files.py
index 34fbeb09c2..787be2d48e 100644
--- a/src/huggingface_hub/cli/repo_files.py
+++ b/src/huggingface_hub/cli/repo_files.py
@@ -34,95 +34,58 @@
     hf repo-files delete <repo_id> file.txt --revision=refs/pr/1 --repo-type=dataset
 """
 
-from argparse import _SubParsersAction
-from typing import List, Optional
-
-from huggingface_hub import logging
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.hf_api import HfApi
+from typing import Annotated, Optional
 
+import typer
 
-logger = logging.get_logger(__name__)
+from huggingface_hub import logging
 
+from ._cli_utils import RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api, typer_factory
 
-class DeleteFilesSubCommand:
-    def __init__(self, args) -> None:
-        self.args = args
-        self.repo_id: str = args.repo_id
-        self.repo_type: Optional[str] = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.api: HfApi = HfApi(token=args.token, library_name="hf")
-        self.patterns: List[str] = args.patterns
-        self.commit_message: Optional[str] = args.commit_message
-        self.commit_description: Optional[str] = args.commit_description
-        self.create_pr: bool = args.create_pr
-        self.token: Optional[str] = args.token
 
-    def run(self) -> None:
-        logging.set_verbosity_info()
-        url = self.api.delete_files(
-            delete_patterns=self.patterns,
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
-            revision=self.revision,
-            commit_message=self.commit_message,
-            commit_description=self.commit_description,
-            create_pr=self.create_pr,
-        )
-        print(f"Files correctly deleted from repo. Commit: {url}.")
-        logging.set_verbosity_warning()
+repo_files_cli = typer_factory(help="Manage files in a repo on the Hub.")
 
 
-class RepoFilesCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        repo_files_parser = parser.add_parser("repo-files", help="Manage files in a repo on the Hub.")
-        repo_files_subparsers = repo_files_parser.add_subparsers(
-            help="Action to execute against the files.",
-            required=True,
-        )
-        delete_subparser = repo_files_subparsers.add_parser(
-            "delete",
-            help="Delete files from a repo on the Hub",
-        )
-        delete_subparser.set_defaults(func=lambda args: DeleteFilesSubCommand(args))
-        delete_subparser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to manage (e.g. `username/repo-name`)."
-        )
-        delete_subparser.add_argument(
-            "patterns",
-            nargs="+",
-            type=str,
+@repo_files_cli.command("delete")
+def repo_files_delete(
+    repo_id: RepoIdArg,
+    patterns: Annotated[
+        list[str],
+        typer.Argument(
             help="Glob patterns to match files to delete.",
-        )
-        delete_subparser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Type of the repo to upload to (e.g. `dataset`).",
-        )
-        delete_subparser.add_argument(
-            "--revision",
-            type=str,
-            help=(
-                "An optional Git revision to push to. It can be a branch name "
-                "or a PR reference. If revision does not"
-                " exist and `--create-pr` is not set, a branch will be automatically created."
-            ),
-        )
-        delete_subparser.add_argument(
-            "--commit-message", type=str, help="The summary / title / first line of the generated commit."
-        )
-        delete_subparser.add_argument(
-            "--commit-description", type=str, help="The description of the generated commit."
-        )
-        delete_subparser.add_argument(
-            "--create-pr", action="store_true", help="Whether to create a new Pull Request for these changes."
-        )
-        delete_subparser.add_argument(
-            "--token",
-            type=str,
-            help="A User Access Token generated from https://huggingface.co/settings/tokens",
-        )
-
-        repo_files_parser.set_defaults(func=RepoFilesCommand)
+        ),
+    ],
+    repo_type: RepoTypeOpt = RepoType.model,
+    revision: RevisionOpt = None,
+    commit_message: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The summary / title / first line of the generated commit.",
+        ),
+    ] = None,
+    commit_description: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The description of the generated commit.",
+        ),
+    ] = None,
+    create_pr: Annotated[
+        bool,
+        typer.Option(
+            help="Whether to create a new Pull Request for these changes.",
+        ),
+    ] = False,
+    token: TokenOpt = None,
+) -> None:
+    api = get_hf_api(token=token)
+    url = api.delete_files(
+        delete_patterns=patterns,
+        repo_id=repo_id,
+        repo_type=repo_type.value,
+        revision=revision,
+        commit_message=commit_message,
+        commit_description=commit_description,
+        create_pr=create_pr,
+    )
+    print(f"Files correctly deleted from repo. Commit: {url}.")
+    logging.set_verbosity_warning()
diff --git a/src/huggingface_hub/cli/system.py b/src/huggingface_hub/cli/system.py
index 03650175e9..047b8ffe47 100644
--- a/src/huggingface_hub/cli/system.py
+++ b/src/huggingface_hub/cli/system.py
@@ -18,35 +18,16 @@
     hf version
 """
 
-from argparse import _SubParsersAction
-
 from huggingface_hub import __version__
 
 from ..utils import dump_environment_info
-from . import BaseHuggingfaceCLICommand
-
-
-class EnvironmentCommand(BaseHuggingfaceCLICommand):
-    def __init__(self, args):
-        self.args = args
-
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        env_parser = parser.add_parser("env", help="Print information about the environment.")
-        env_parser.set_defaults(func=EnvironmentCommand)
-
-    def run(self) -> None:
-        dump_environment_info()
 
 
-class VersionCommand(BaseHuggingfaceCLICommand):
-    def __init__(self, args):
-        self.args = args
+def env() -> None:
+    """Print information about the environment."""
+    dump_environment_info()
 
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        version_parser = parser.add_parser("version", help="Print information about the hf version.")
-        version_parser.set_defaults(func=VersionCommand)
 
-    def run(self) -> None:
-        print(f"huggingface_hub version: {__version__}")
+def version() -> None:
+    """Print CLI version."""
+    print(__version__)
diff --git a/src/huggingface_hub/cli/upload.py b/src/huggingface_hub/cli/upload.py
index 07ab79bf24..918e58f690 100644
--- a/src/huggingface_hub/cli/upload.py
+++ b/src/huggingface_hub/cli/upload.py
@@ -49,174 +49,112 @@
 import os
 import time
 import warnings
-from argparse import Namespace, _SubParsersAction
-from typing import List, Optional
+from typing import Annotated, Optional
+
+import typer
 
 from huggingface_hub import logging
 from huggingface_hub._commit_scheduler import CommitScheduler
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
 from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
 from huggingface_hub.errors import RevisionNotFoundError
-from huggingface_hub.hf_api import HfApi
 from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
 from huggingface_hub.utils._runtime import is_xet_available
 
+from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
+
 
 logger = logging.get_logger(__name__)
 
 
-class UploadCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        upload_parser = parser.add_parser(
-            "upload", help="Upload a file or a folder to the Hub. Recommended for single-commit uploads."
-        )
-        upload_parser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
-        )
-        upload_parser.add_argument(
-            "local_path",
-            nargs="?",
+def upload(
+    repo_id: RepoIdArg,
+    local_path: Annotated[
+        Optional[str],
+        typer.Argument(
             help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
-        )
-        upload_parser.add_argument(
-            "path_in_repo",
-            nargs="?",
+        ),
+    ] = None,
+    path_in_repo: Annotated[
+        Optional[str],
+        typer.Argument(
             help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
-        )
-        upload_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Type of the repo to upload to (e.g. `dataset`).",
-        )
-        upload_parser.add_argument(
-            "--revision",
-            type=str,
-            help=(
-                "An optional Git revision to push to. It can be a branch name or a PR reference. If revision does not"
-                " exist and `--create-pr` is not set, a branch will be automatically created."
-            ),
-        )
-        upload_parser.add_argument(
-            "--private",
-            action="store_true",
-            help=(
-                "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already"
-                " exists."
-            ),
-        )
-        upload_parser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
-        upload_parser.add_argument(
-            "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload."
-        )
-        upload_parser.add_argument(
-            "--delete",
-            nargs="*",
-            type=str,
+        ),
+    ] = None,
+    repo_type: RepoTypeOpt = RepoType.model,
+    revision: RevisionOpt = None,
+    private: PrivateOpt = False,
+    include: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to match files to upload.",
+        ),
+    ] = None,
+    exclude: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to exclude from files to upload.",
+        ),
+    ] = None,
+    delete: Annotated[
+        Optional[list[str]],
+        typer.Option(
             help="Glob patterns for file to be deleted from the repo while committing.",
-        )
-        upload_parser.add_argument(
-            "--commit-message", type=str, help="The summary / title / first line of the generated commit."
-        )
-        upload_parser.add_argument("--commit-description", type=str, help="The description of the generated commit.")
-        upload_parser.add_argument(
-            "--create-pr", action="store_true", help="Whether to upload content as a new Pull Request."
-        )
-        upload_parser.add_argument(
-            "--every",
-            type=float,
-            help="If set, a background job is scheduled to create commits every `every` minutes.",
-        )
-        upload_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        upload_parser.add_argument(
-            "--quiet",
-            action="store_true",
-            help="If True, progress bars are disabled and only the path to the uploaded files is printed.",
-        )
-        upload_parser.set_defaults(func=UploadCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.repo_id: str = args.repo_id
-        self.repo_type: Optional[str] = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.private: bool = args.private
-
-        self.include: Optional[List[str]] = args.include
-        self.exclude: Optional[List[str]] = args.exclude
-        self.delete: Optional[List[str]] = args.delete
-
-        self.commit_message: Optional[str] = args.commit_message
-        self.commit_description: Optional[str] = args.commit_description
-        self.create_pr: bool = args.create_pr
-        self.api: HfApi = HfApi(token=args.token, library_name="hf")
-        self.quiet: bool = args.quiet  # disable warnings and progress bars
-
-        # Check `--every` is valid
-        if args.every is not None and args.every <= 0:
-            raise ValueError(f"`every` must be a positive value (got '{args.every}')")
-        self.every: Optional[float] = args.every
-
-        # Resolve `local_path` and `path_in_repo`
-        repo_name: str = args.repo_id.split("/")[-1]  # e.g. "Wauplin/my-cool-model" => "my-cool-model"
-        self.local_path: str
-        self.path_in_repo: str
-
-        if args.local_path is not None and any(c in args.local_path for c in ["*", "?", "["]):
-            if args.include is not None:
-                raise ValueError("Cannot set `--include` when passing a `local_path` containing a wildcard.")
-            if args.path_in_repo is not None and args.path_in_repo != ".":
-                raise ValueError("Cannot set `path_in_repo` when passing a `local_path` containing a wildcard.")
-            self.local_path = "."
-            self.include = args.local_path
-            self.path_in_repo = "."
-        elif args.local_path is None and os.path.isfile(repo_name):
-            # Implicit case 1: user provided only a repo_id which happen to be a local file as well => upload it with same name
-            self.local_path = repo_name
-            self.path_in_repo = repo_name
-        elif args.local_path is None and os.path.isdir(repo_name):
-            # Implicit case 2: user provided only a repo_id which happen to be a local folder as well => upload it at root
-            self.local_path = repo_name
-            self.path_in_repo = "."
-        elif args.local_path is None:
-            # Implicit case 3: user provided only a repo_id that does not match a local file or folder
-            # => the user must explicitly provide a local_path => raise exception
-            raise ValueError(f"'{repo_name}' is not a local file or folder. Please set `local_path` explicitly.")
-        elif args.path_in_repo is None and os.path.isfile(args.local_path):
-            # Explicit local path to file, no path in repo => upload it at root with same name
-            self.local_path = args.local_path
-            self.path_in_repo = os.path.basename(args.local_path)
-        elif args.path_in_repo is None:
-            # Explicit local path to folder, no path in repo => upload at root
-            self.local_path = args.local_path
-            self.path_in_repo = "."
-        else:
-            # Finally, if both paths are explicit
-            self.local_path = args.local_path
-            self.path_in_repo = args.path_in_repo
-
-    def run(self) -> None:
-        if self.quiet:
-            disable_progress_bars()
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                print(self._upload())
-            enable_progress_bars()
-        else:
-            logging.set_verbosity_info()
-            print(self._upload())
-            logging.set_verbosity_warning()
-
-    def _upload(self) -> str:
-        if os.path.isfile(self.local_path):
-            if self.include is not None and len(self.include) > 0:
-                warnings.warn("Ignoring `--include` since a single file is uploaded.")
-            if self.exclude is not None and len(self.exclude) > 0:
-                warnings.warn("Ignoring `--exclude` since a single file is uploaded.")
-            if self.delete is not None and len(self.delete) > 0:
-                warnings.warn("Ignoring `--delete` since a single file is uploaded.")
+        ),
+    ] = None,
+    commit_message: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The summary / title / first line of the generated commit.",
+        ),
+    ] = None,
+    commit_description: Annotated[
+        Optional[str],
+        typer.Option(
+            help="The description of the generated commit.",
+        ),
+    ] = None,
+    create_pr: Annotated[
+        bool,
+        typer.Option(
+            help="Whether to upload content as a new Pull Request.",
+        ),
+    ] = False,
+    every: Annotated[
+        Optional[float],
+        typer.Option(
+            help="f set, a background job is scheduled to create commits every `every` minutes.",
+        ),
+    ] = None,
+    token: TokenOpt = None,
+    quiet: Annotated[
+        bool,
+        typer.Option(
+            help="Disable progress bars and warnings; print only the returned path.",
+        ),
+    ] = False,
+) -> None:
+    """Upload a file or a folder to the Hub. Recommended for single-commit uploads."""
+
+    if every is not None and every <= 0:
+        raise typer.BadParameter("--every must be a positive value", param_hint="every")
+
+    repo_type_str = repo_type.value
+
+    api = get_hf_api(token=token)
+
+    # Resolve local_path and path_in_repo based on implicit/explicit rules
+    resolved_local_path, resolved_path_in_repo, resolved_include = _resolve_upload_paths(
+        repo_id=repo_id, local_path=local_path, path_in_repo=path_in_repo, include=include
+    )
+
+    def run_upload() -> str:
+        if os.path.isfile(resolved_local_path):
+            if resolved_include is not None and len(resolved_include) > 0 and isinstance(resolved_include, list):
+                warnings.warn("Ignoring --include since a single file is uploaded.")
+            if exclude is not None and len(exclude) > 0:
+                warnings.warn("Ignoring --exclude since a single file is uploaded.")
+            if delete is not None and len(delete) > 0:
+                warnings.warn("Ignoring --delete since a single file is uploaded.")
 
         if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
             logger.info(
@@ -225,39 +163,45 @@ def _upload(self) -> str:
             )
 
         # Schedule commits if `every` is set
-        if self.every is not None:
-            if os.path.isfile(self.local_path):
+        if every is not None:
+            if os.path.isfile(resolved_local_path):
                 # If file => watch entire folder + use allow_patterns
-                folder_path = os.path.dirname(self.local_path)
-                path_in_repo = (
-                    self.path_in_repo[: -len(self.local_path)]  # remove filename from path_in_repo
-                    if self.path_in_repo.endswith(self.local_path)
-                    else self.path_in_repo
+                folder_path = os.path.dirname(resolved_local_path)
+                pi = (
+                    resolved_path_in_repo[: -len(resolved_local_path)]
+                    if resolved_path_in_repo.endswith(resolved_local_path)
+                    else resolved_path_in_repo
                 )
-                allow_patterns = [self.local_path]
-                ignore_patterns = []
+                allow_patterns = [resolved_local_path]
+                ignore_patterns: Optional[list[str]] = []
             else:
-                folder_path = self.local_path
-                path_in_repo = self.path_in_repo
-                allow_patterns = self.include or []
-                ignore_patterns = self.exclude or []
-                if self.delete is not None and len(self.delete) > 0:
-                    warnings.warn("Ignoring `--delete` when uploading with scheduled commits.")
+                folder_path = resolved_local_path
+                pi = resolved_path_in_repo
+                allow_patterns = (
+                    resolved_include or []
+                    if isinstance(resolved_include, list)
+                    else [resolved_include]
+                    if isinstance(resolved_include, str)
+                    else []
+                )
+                ignore_patterns = exclude or []
+                if delete is not None and len(delete) > 0:
+                    warnings.warn("Ignoring --delete when uploading with scheduled commits.")
 
             scheduler = CommitScheduler(
                 folder_path=folder_path,
-                repo_id=self.repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
+                repo_id=repo_id,
+                repo_type=repo_type_str,
+                revision=revision,
                 allow_patterns=allow_patterns,
                 ignore_patterns=ignore_patterns,
-                path_in_repo=path_in_repo,
-                private=self.private,
-                every=self.every,
-                hf_api=self.api,
+                path_in_repo=pi,
+                private=private,
+                every=every,
+                hf_api=api,
             )
-            print(f"Scheduling commits every {self.every} minutes to {scheduler.repo_id}.")
-            try:  # Block main thread until KeyboardInterrupt
+            print(f"Scheduling commits every {every} minutes to {scheduler.repo_id}.")
+            try:
                 while True:
                     time.sleep(100)
             except KeyboardInterrupt:
@@ -265,52 +209,94 @@ def _upload(self) -> str:
                 return "Stopped scheduled commits."
 
         # Otherwise, create repo and proceed with the upload
-        if not os.path.isfile(self.local_path) and not os.path.isdir(self.local_path):
-            raise FileNotFoundError(f"No such file or directory: '{self.local_path}'.")
-        repo_id = self.api.create_repo(
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
+        if not os.path.isfile(resolved_local_path) and not os.path.isdir(resolved_local_path):
+            raise FileNotFoundError(f"No such file or directory: '{resolved_local_path}'.")
+        created = api.create_repo(
+            repo_id=repo_id,
+            repo_type=repo_type_str,
             exist_ok=True,
-            private=self.private,
-            space_sdk="gradio" if self.repo_type == "space" else None,
+            private=private,
+            space_sdk="gradio" if repo_type_str == "space" else None,
             # ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
             # ^ I'd rather not add CLI args to set it explicitly as we already have `hf repo create` for that.
         ).repo_id
 
         # Check if branch already exists and if not, create it
-        if self.revision is not None and not self.create_pr:
+        if revision is not None and not create_pr:
             try:
-                self.api.repo_info(repo_id=repo_id, repo_type=self.repo_type, revision=self.revision)
+                api.repo_info(repo_id=created, repo_type=repo_type_str, revision=revision)
             except RevisionNotFoundError:
-                logger.info(f"Branch '{self.revision}' not found. Creating it...")
-                self.api.create_branch(repo_id=repo_id, repo_type=self.repo_type, branch=self.revision, exist_ok=True)
+                logger.info(f"Branch '{revision}' not found. Creating it...")
+                api.create_branch(repo_id=created, repo_type=repo_type_str, branch=revision, exist_ok=True)
                 # ^ `exist_ok=True` to avoid race concurrency issues
 
         # File-based upload
-        if os.path.isfile(self.local_path):
-            return self.api.upload_file(
-                path_or_fileobj=self.local_path,
-                path_in_repo=self.path_in_repo,
-                repo_id=repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
-                commit_message=self.commit_message,
-                commit_description=self.commit_description,
-                create_pr=self.create_pr,
+        if os.path.isfile(resolved_local_path):
+            return api.upload_file(
+                path_or_fileobj=resolved_local_path,
+                path_in_repo=resolved_path_in_repo,
+                repo_id=created,
+                repo_type=repo_type_str,
+                revision=revision,
+                commit_message=commit_message,
+                commit_description=commit_description,
+                create_pr=create_pr,
             )
 
         # Folder-based upload
-        else:
-            return self.api.upload_folder(
-                folder_path=self.local_path,
-                path_in_repo=self.path_in_repo,
-                repo_id=repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
-                commit_message=self.commit_message,
-                commit_description=self.commit_description,
-                create_pr=self.create_pr,
-                allow_patterns=self.include,
-                ignore_patterns=self.exclude,
-                delete_patterns=self.delete,
-            )
+        return api.upload_folder(
+            folder_path=resolved_local_path,
+            path_in_repo=resolved_path_in_repo,
+            repo_id=created,
+            repo_type=repo_type_str,
+            revision=revision,
+            commit_message=commit_message,
+            commit_description=commit_description,
+            create_pr=create_pr,
+            allow_patterns=(
+                resolved_include
+                if isinstance(resolved_include, list)
+                else [resolved_include]
+                if isinstance(resolved_include, str)
+                else None
+            ),
+            ignore_patterns=exclude,
+            delete_patterns=delete,
+        )
+
+    if quiet:
+        disable_progress_bars()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            print(run_upload())
+        enable_progress_bars()
+    else:
+        print(run_upload())
+        logging.set_verbosity_warning()
+
+
+def _resolve_upload_paths(
+    *, repo_id: str, local_path: Optional[str], path_in_repo: Optional[str], include: Optional[list[str]]
+) -> tuple[str, str, Optional[list[str]]]:
+    repo_name = repo_id.split("/")[-1]
+    resolved_include = include
+
+    if local_path is not None and any(c in local_path for c in ["*", "?", "["]):
+        if include is not None:
+            raise ValueError("Cannot set --include when local_path contains a wildcard.")
+        if path_in_repo is not None and path_in_repo != ".":
+            raise ValueError("Cannot set path_in_repo when local_path contains a wildcard.")
+        return ".", local_path, ["."]  # will be adjusted below; placeholder for type
+
+    if local_path is None and os.path.isfile(repo_name):
+        return repo_name, repo_name, resolved_include
+    if local_path is None and os.path.isdir(repo_name):
+        return repo_name, ".", resolved_include
+    if local_path is None:
+        raise ValueError(f"'{repo_name}' is not a local file or folder. Please set local_path explicitly.")
+
+    if path_in_repo is None and os.path.isfile(local_path):
+        return local_path, os.path.basename(local_path), resolved_include
+    if path_in_repo is None:
+        return local_path, ".", resolved_include
+    return local_path, path_in_repo, resolved_include
diff --git a/src/huggingface_hub/cli/upload_large_folder.py b/src/huggingface_hub/cli/upload_large_folder.py
index 618cd21b52..4484fb1890 100644
--- a/src/huggingface_hub/cli/upload_large_folder.py
+++ b/src/huggingface_hub/cli/upload_large_folder.py
@@ -15,118 +15,99 @@
 """Contains command to upload a large folder with the CLI."""
 
 import os
-from argparse import Namespace, _SubParsersAction
-from typing import List, Optional
-
-from huggingface_hub import logging
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub.utils import disable_progress_bars
-
-from ._cli_utils import ANSI
-
-
-logger = logging.get_logger(__name__)
-
-
-class UploadLargeFolderCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        subparser = parser.add_parser(
-            "upload-large-folder",
-            help="Upload a large folder to the Hub. Recommended for resumable uploads.",
-        )
-        subparser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
-        )
-        subparser.add_argument("local_path", type=str, help="Local path to the file or folder to upload.")
-        subparser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            help="Type of the repo to upload to (e.g. `dataset`).",
-        )
-        subparser.add_argument(
-            "--revision",
-            type=str,
-            help=("An optional Git revision to push to. It can be a branch name or a PR reference."),
-        )
-        subparser.add_argument(
-            "--private",
-            action="store_true",
-            help=(
-                "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists."
-            ),
-        )
-        subparser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
-        subparser.add_argument("--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload.")
-        subparser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        subparser.add_argument(
-            "--num-workers", type=int, help="Number of workers to use to hash, upload and commit files."
-        )
-        subparser.add_argument("--no-report", action="store_true", help="Whether to disable regular status report.")
-        subparser.add_argument("--no-bars", action="store_true", help="Whether to disable progress bars.")
-        subparser.set_defaults(func=UploadLargeFolderCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.repo_id: str = args.repo_id
-        self.local_path: str = args.local_path
-        self.repo_type: str = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.private: bool = args.private
-
-        self.include: Optional[List[str]] = args.include
-        self.exclude: Optional[List[str]] = args.exclude
-
-        self.api: HfApi = HfApi(token=args.token, library_name="hf")
-
-        self.num_workers: Optional[int] = args.num_workers
-        self.no_report: bool = args.no_report
-        self.no_bars: bool = args.no_bars
-
-        if not os.path.isdir(self.local_path):
-            raise ValueError("Large upload is only supported for folders.")
-
-    def run(self) -> None:
-        logging.set_verbosity_info()
-
-        print(
-            ANSI.yellow(
-                "You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
-                "This is a new feature so feedback is very welcome!\n"
-                "\n"
-                "A few things to keep in mind:\n"
-                "  - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
-                "  - Do not start several processes in parallel.\n"
-                "  - You can interrupt and resume the process at any time. "
-                "The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
-                "  - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
-                "\n"
-                f"Some temporary metadata will be stored under `{self.local_path}/.cache/huggingface`.\n"
-                "  - You must not modify those files manually.\n"
-                "  - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
-                "  - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
-                "\n"
-                "If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
-                "You can also entirely disable the status report with `--no-report`.\n"
-                "\n"
-                "For more details, run `hf upload-large-folder --help` or check the documentation at "
-                "https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
-            )
-        )
-
-        if self.no_bars:
-            disable_progress_bars()
-
-        self.api.upload_large_folder(
-            repo_id=self.repo_id,
-            folder_path=self.local_path,
-            repo_type=self.repo_type,
-            revision=self.revision,
-            private=self.private,
-            allow_patterns=self.include,
-            ignore_patterns=self.exclude,
-            num_workers=self.num_workers,
-            print_report=not self.no_report,
+from typing import Annotated, Optional
+
+import typer
+
+from huggingface_hub.utils import ANSI, disable_progress_bars
+
+from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
+
+
+def upload_large_folder(
+    repo_id: RepoIdArg,
+    local_path: Annotated[
+        str,
+        typer.Argument(
+            help="Local path to the folder to upload.",
+        ),
+    ],
+    repo_type: RepoTypeOpt = RepoType.model,
+    revision: RevisionOpt = None,
+    private: PrivateOpt = False,
+    include: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to match files to upload.",
+        ),
+    ] = None,
+    exclude: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to exclude from files to upload.",
+        ),
+    ] = None,
+    token: TokenOpt = None,
+    num_workers: Annotated[
+        Optional[int],
+        typer.Option(
+            help="Number of workers to use to hash, upload and commit files.",
+        ),
+    ] = None,
+    no_report: Annotated[
+        bool,
+        typer.Option(
+            help="Whether to disable regular status report.",
+        ),
+    ] = False,
+    no_bars: Annotated[
+        bool,
+        typer.Option(
+            help="Whether to disable progress bars.",
+        ),
+    ] = False,
+) -> None:
+    """Upload a large folder to the Hub. Recommended for resumable uploads."""
+    if not os.path.isdir(local_path):
+        raise typer.BadParameter("Large upload is only supported for folders.", param_hint="local_path")
+
+    print(
+        ANSI.yellow(
+            "You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
+            "This is a new feature so feedback is very welcome!\n"
+            "\n"
+            "A few things to keep in mind:\n"
+            "  - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
+            "  - Do not start several processes in parallel.\n"
+            "  - You can interrupt and resume the process at any time. "
+            "The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
+            "  - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
+            "\n"
+            f"Some temporary metadata will be stored under `{local_path}/.cache/huggingface`.\n"
+            "  - You must not modify those files manually.\n"
+            "  - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
+            "  - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
+            "\n"
+            "If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
+            "You can also entirely disable the status report with `--no-report`.\n"
+            "\n"
+            "For more details, run `hf upload-large-folder --help` or check the documentation at "
+            "https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
         )
+    )
+
+    if no_bars:
+        disable_progress_bars()
+
+    api = get_hf_api(token=token)
+    api.upload_large_folder(
+        repo_id=repo_id,
+        folder_path=local_path,
+        repo_type=repo_type.value,
+        revision=revision,
+        private=private,
+        allow_patterns=include,
+        ignore_patterns=exclude,
+        num_workers=num_workers,
+        print_report=not no_report,
+    )
diff --git a/src/huggingface_hub/commands/__init__.py b/src/huggingface_hub/commands/__init__.py
deleted file mode 100644
index 49d0882145..0000000000
--- a/src/huggingface_hub/commands/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from abc import ABC, abstractmethod
-from argparse import _SubParsersAction
-
-
-class BaseHuggingfaceCLICommand(ABC):
-    @staticmethod
-    @abstractmethod
-    def register_subcommand(parser: _SubParsersAction):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def run(self):
-        raise NotImplementedError()
diff --git a/src/huggingface_hub/commands/delete_cache.py b/src/huggingface_hub/commands/delete_cache.py
deleted file mode 100644
index 78ea117967..0000000000
--- a/src/huggingface_hub/commands/delete_cache.py
+++ /dev/null
@@ -1,476 +0,0 @@
-# coding=utf-8
-# Copyright 2022-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to delete some revisions from the HF cache directory.
-
-Usage:
-    huggingface-cli delete-cache
-    huggingface-cli delete-cache --disable-tui
-    huggingface-cli delete-cache --dir ~/.cache/huggingface/hub
-    huggingface-cli delete-cache --sort=size
-
-NOTE:
-    This command is based on `InquirerPy` to build the multiselect menu in the terminal.
-    This dependency has to be installed with `pip install "huggingface_hub[cli]"`. Since
-    we want to avoid as much as possible cross-platform issues, I chose a library that
-    is built on top of `python-prompt-toolkit` which seems to be a reference in terminal
-    GUI (actively maintained on both Unix and Windows, 7.9k stars).
-
-    For the moment, the TUI feature is in beta.
-
-    See:
-    - https://github.com/kazhala/InquirerPy
-    - https://inquirerpy.readthedocs.io/en/latest/
-    - https://github.com/prompt-toolkit/python-prompt-toolkit
-
-    Other solutions could have been:
-    - `simple_term_menu`: would be good as well for our use case but some issues suggest
-      that Windows is less supported.
-      See: https://github.com/IngoMeyer441/simple-term-menu
-    - `PyInquirer`: very similar to `InquirerPy` but older and not maintained anymore.
-      In particular, no support of Python3.10.
-      See: https://github.com/CITGuru/PyInquirer
-    - `pick` (or `pickpack`): easy to use and flexible but built on top of Python's
-      standard library `curses` that is specific to Unix (not implemented on Windows).
-      See https://github.com/wong2/pick and https://github.com/anafvana/pickpack.
-    - `inquirer`: lot of traction (700 stars) but explicitly states "experimental
-      support of Windows". Not built on top of `python-prompt-toolkit`.
-      See https://github.com/magmax/python-inquirer
-
-TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ?
-TODO: add "--keep-last" arg to delete revisions that are not on `main` ref
-TODO: add "--filter" arg to filter repositories by name ?
-TODO: add "--limit" arg to limit to X repos ?
-TODO: add "-y" arg for immediate deletion ?
-See discussions in https://github.com/huggingface/huggingface_hub/issues/1025.
-"""
-
-import os
-from argparse import Namespace, _SubParsersAction
-from functools import wraps
-from tempfile import mkstemp
-from typing import Any, Callable, Iterable, List, Literal, Optional, Union
-
-from ..utils import CachedRepoInfo, CachedRevisionInfo, HFCacheInfo, scan_cache_dir
-from . import BaseHuggingfaceCLICommand
-from ._cli_utils import ANSI, show_deprecation_warning
-
-
-try:
-    from InquirerPy import inquirer
-    from InquirerPy.base.control import Choice
-    from InquirerPy.separator import Separator
-
-    _inquirer_py_available = True
-except ImportError:
-    _inquirer_py_available = False
-
-SortingOption_T = Literal["alphabetical", "lastUpdated", "lastUsed", "size"]
-
-
-def require_inquirer_py(fn: Callable) -> Callable:
-    """Decorator to flag methods that require `InquirerPy`."""
-
-    # TODO: refactor this + imports in a unified pattern across codebase
-    @wraps(fn)
-    def _inner(*args, **kwargs):
-        if not _inquirer_py_available:
-            raise ImportError(
-                "The `delete-cache` command requires extra dependencies to work with"
-                ' the TUI.\nPlease run `pip install "huggingface_hub[cli]"` to install'
-                " them.\nOtherwise, disable TUI using the `--disable-tui` flag."
-            )
-
-        return fn(*args, **kwargs)
-
-    return _inner
-
-
-# Possibility for the user to cancel deletion
-_CANCEL_DELETION_STR = "CANCEL_DELETION"
-
-
-class DeleteCacheCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        delete_cache_parser = parser.add_parser("delete-cache", help="Delete revisions from the cache directory.")
-
-        delete_cache_parser.add_argument(
-            "--dir",
-            type=str,
-            default=None,
-            help="cache directory (optional). Default to the default HuggingFace cache.",
-        )
-
-        delete_cache_parser.add_argument(
-            "--disable-tui",
-            action="store_true",
-            help=(
-                "Disable Terminal User Interface (TUI) mode. Useful if your"
-                " platform/terminal doesn't support the multiselect menu."
-            ),
-        )
-
-        delete_cache_parser.add_argument(
-            "--sort",
-            nargs="?",
-            choices=["alphabetical", "lastUpdated", "lastUsed", "size"],
-            help=(
-                "Sort repositories by the specified criteria. Options: "
-                "'alphabetical' (A-Z), "
-                "'lastUpdated' (newest first), "
-                "'lastUsed' (most recent first), "
-                "'size' (largest first)."
-            ),
-        )
-
-        delete_cache_parser.set_defaults(func=DeleteCacheCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.cache_dir: Optional[str] = args.dir
-        self.disable_tui: bool = args.disable_tui
-        self.sort_by: Optional[SortingOption_T] = args.sort
-
-    def run(self):
-        """Run `delete-cache` command with or without TUI."""
-        show_deprecation_warning("huggingface-cli delete-cache", "hf cache delete")
-
-        # Scan cache directory
-        hf_cache_info = scan_cache_dir(self.cache_dir)
-
-        # Manual review from the user
-        if self.disable_tui:
-            selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
-        else:
-            selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
-
-        # If deletion is not cancelled
-        if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
-            confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?"
-
-            # Confirm deletion
-            if self.disable_tui:
-                confirmed = _ask_for_confirmation_no_tui(confirm_message)
-            else:
-                confirmed = _ask_for_confirmation_tui(confirm_message)
-
-            # Deletion is confirmed
-            if confirmed:
-                strategy = hf_cache_info.delete_revisions(*selected_hashes)
-                print("Start deletion.")
-                strategy.execute()
-                print(
-                    f"Done. Deleted {len(strategy.repos)} repo(s) and"
-                    f" {len(strategy.snapshots)} revision(s) for a total of"
-                    f" {strategy.expected_freed_size_str}."
-                )
-                return
-
-        # Deletion is cancelled
-        print("Deletion is cancelled. Do nothing.")
-
-
-def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_T] = None):
-    if sort_by == "alphabetical":
-        return (repo.repo_type, repo.repo_id.lower())  # by type then name
-    elif sort_by == "lastUpdated":
-        return -max(rev.last_modified for rev in repo.revisions)  # newest first
-    elif sort_by == "lastUsed":
-        return -repo.last_accessed  # most recently used first
-    elif sort_by == "size":
-        return -repo.size_on_disk  # largest first
-    else:
-        return (repo.repo_type, repo.repo_id)  # default stable order
-
-
-@require_inquirer_py
-def _manual_review_tui(
-    hf_cache_info: HFCacheInfo,
-    preselected: List[str],
-    sort_by: Optional[SortingOption_T] = None,
-) -> List[str]:
-    """Ask the user for a manual review of the revisions to delete.
-
-    Displays a multi-select menu in the terminal (TUI).
-    """
-    # Define multiselect list
-    choices = _get_tui_choices_from_scan(
-        repos=hf_cache_info.repos,
-        preselected=preselected,
-        sort_by=sort_by,
-    )
-    checkbox = inquirer.checkbox(
-        message="Select revisions to delete:",
-        choices=choices,  # List of revisions with some pre-selection
-        cycle=False,  # No loop between top and bottom
-        height=100,  # Large list if possible
-        # We use the instruction to display to the user the expected effect of the
-        # deletion.
-        instruction=_get_expectations_str(
-            hf_cache_info,
-            selected_hashes=[c.value for c in choices if isinstance(c, Choice) and c.enabled],
-        ),
-        # We use the long instruction to should keybindings instructions to the user
-        long_instruction="Press <space> to select, <enter> to validate and <ctrl+c> to quit without modification.",
-        # Message that is displayed once the user validates its selection.
-        transformer=lambda result: f"{len(result)} revision(s) selected.",
-    )
-
-    # Add a callback to update the information line when a revision is
-    # selected/unselected
-    def _update_expectations(_) -> None:
-        # Hacky way to dynamically set an instruction message to the checkbox when
-        # a revision hash is selected/unselected.
-        checkbox._instruction = _get_expectations_str(
-            hf_cache_info,
-            selected_hashes=[choice["value"] for choice in checkbox.content_control.choices if choice["enabled"]],
-        )
-
-    checkbox.kb_func_lookup["toggle"].append({"func": _update_expectations})
-
-    # Finally display the form to the user.
-    try:
-        return checkbox.execute()
-    except KeyboardInterrupt:
-        return []  # Quit without deletion
-
-
-@require_inquirer_py
-def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
-    """Ask for confirmation using Inquirer."""
-    return inquirer.confirm(message, default=default).execute()
-
-
-def _get_tui_choices_from_scan(
-    repos: Iterable[CachedRepoInfo],
-    preselected: List[str],
-    sort_by: Optional[SortingOption_T] = None,
-) -> List:
-    """Build a list of choices from the scanned repos.
-
-    Args:
-        repos (*Iterable[`CachedRepoInfo`]*):
-            List of scanned repos on which we want to delete revisions.
-        preselected (*List[`str`]*):
-            List of revision hashes that will be preselected.
-        sort_by (*Optional[SortingOption_T]*):
-            Sorting direction. Choices: "alphabetical", "lastUpdated", "lastUsed", "size".
-
-    Return:
-        The list of choices to pass to `inquirer.checkbox`.
-    """
-    choices: List[Union[Choice, Separator]] = []
-
-    # First choice is to cancel the deletion
-    choices.append(
-        Choice(
-            _CANCEL_DELETION_STR,
-            name="None of the following (if selected, nothing will be deleted).",
-            enabled=False,
-        )
-    )
-
-    # Sort repos based on specified criteria
-    sorted_repos = sorted(repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
-
-    for repo in sorted_repos:
-        # Repo as separator
-        choices.append(
-            Separator(
-                f"\n{repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
-                f" used {repo.last_accessed_str})"
-            )
-        )
-        for revision in sorted(repo.revisions, key=_revision_sorting_order):
-            # Revision as choice
-            choices.append(
-                Choice(
-                    revision.commit_hash,
-                    name=(
-                        f"{revision.commit_hash[:8]}:"
-                        f" {', '.join(sorted(revision.refs)) or '(detached)'} #"
-                        f" modified {revision.last_modified_str}"
-                    ),
-                    enabled=revision.commit_hash in preselected,
-                )
-            )
-
-    # Return choices
-    return choices
-
-
-def _manual_review_no_tui(
-    hf_cache_info: HFCacheInfo,
-    preselected: List[str],
-    sort_by: Optional[SortingOption_T] = None,
-) -> List[str]:
-    """Ask the user for a manual review of the revisions to delete.
-
-    Used when TUI is disabled. Manual review happens in a separate tmp file that the
-    user can manually edit.
-    """
-    # 1. Generate temporary file with delete commands.
-    fd, tmp_path = mkstemp(suffix=".txt")  # suffix to make it easier to find by editors
-    os.close(fd)
-
-    lines = []
-
-    sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
-
-    for repo in sorted_repos:
-        lines.append(
-            f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
-            f" used {repo.last_accessed_str})"
-        )
-        for revision in sorted(repo.revisions, key=_revision_sorting_order):
-            lines.append(
-                # Deselect by prepending a '#'
-                f"{'' if revision.commit_hash in preselected else '#'}   "
-                f" {revision.commit_hash} # Refs:"
-                # Print `refs` as comment on same line
-                f" {', '.join(sorted(revision.refs)) or '(detached)'} # modified"
-                # Print `last_modified` as comment on same line
-                f" {revision.last_modified_str}"
-            )
-
-    with open(tmp_path, "w") as f:
-        f.write(_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS)
-        f.write("\n".join(lines))
-
-    # 2. Prompt instructions to user.
-    instructions = f"""
-    TUI is disabled. In order to select which revisions you want to delete, please edit
-    the following file using the text editor of your choice. Instructions for manual
-    editing are located at the beginning of the file. Edit the file, save it and confirm
-    to continue.
-    File to edit: {ANSI.bold(tmp_path)}
-    """
-    print("\n".join(line.strip() for line in instructions.strip().split("\n")))
-
-    # 3. Wait for user confirmation.
-    while True:
-        selected_hashes = _read_manual_review_tmp_file(tmp_path)
-        if _ask_for_confirmation_no_tui(
-            _get_expectations_str(hf_cache_info, selected_hashes) + " Continue ?",
-            default=False,
-        ):
-            break
-
-    # 4. Return selected_hashes sorted to maintain stable order
-    os.remove(tmp_path)
-    return sorted(selected_hashes)  # Sort to maintain stable order
-
-
-def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
-    """Ask for confirmation using pure-python."""
-    YES = ("y", "yes", "1")
-    NO = ("n", "no", "0")
-    DEFAULT = ""
-    ALL = YES + NO + (DEFAULT,)
-    full_message = message + (" (Y/n) " if default else " (y/N) ")
-    while True:
-        answer = input(full_message).lower()
-        if answer == DEFAULT:
-            return default
-        if answer in YES:
-            return True
-        if answer in NO:
-            return False
-        print(f"Invalid input. Must be one of {ALL}")
-
-
-def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: List[str]) -> str:
-    """Format a string to display to the user how much space would be saved.
-
-    Example:
-    ```
-    >>> _get_expectations_str(hf_cache_info, selected_hashes)
-    '7 revisions selected counting for 4.3G.'
-    ```
-    """
-    if _CANCEL_DELETION_STR in selected_hashes:
-        return "Nothing will be deleted."
-    strategy = hf_cache_info.delete_revisions(*selected_hashes)
-    return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}."
-
-
-def _read_manual_review_tmp_file(tmp_path: str) -> List[str]:
-    """Read the manually reviewed instruction file and return a list of revision hash.
-
-    Example:
-        ```txt
-        # This is the tmp file content
-        ###
-
-        # Commented out line
-        123456789 # revision hash
-
-        # Something else
-        #      a_newer_hash # 2 days ago
-            an_older_hash # 3 days ago
-        ```
-
-        ```py
-        >>> _read_manual_review_tmp_file(tmp_path)
-        ['123456789', 'an_older_hash']
-        ```
-    """
-    with open(tmp_path) as f:
-        content = f.read()
-
-    # Split lines
-    lines = [line.strip() for line in content.split("\n")]
-
-    # Filter commented lines
-    selected_lines = [line for line in lines if not line.startswith("#")]
-
-    # Select only before comment
-    selected_hashes = [line.split("#")[0].strip() for line in selected_lines]
-
-    # Return revision hashes
-    return [hash for hash in selected_hashes if len(hash) > 0]
-
-
-_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f"""
-# INSTRUCTIONS
-# ------------
-# This is a temporary file created by running `huggingface-cli delete-cache` with the
-# `--disable-tui` option. It contains a set of revisions that can be deleted from your
-# local cache directory.
-#
-# Please manually review the revisions you want to delete:
-#   - Revision hashes can be commented out with '#'.
-#   - Only non-commented revisions in this file will be deleted.
-#   - Revision hashes that are removed from this file are ignored as well.
-#   - If `{_CANCEL_DELETION_STR}` line is uncommented, the all cache deletion is cancelled and
-#     no changes will be applied.
-#
-# Once you've manually reviewed this file, please confirm deletion in the terminal. This
-# file will be automatically removed once done.
-# ------------
-
-# KILL SWITCH
-# ------------
-# Un-comment following line to completely cancel the deletion process
-# {_CANCEL_DELETION_STR}
-# ------------
-
-# REVISIONS
-# ------------
-""".strip()
-
-
-def _revision_sorting_order(revision: CachedRevisionInfo) -> Any:
-    # Sort by last modified (oldest first)
-    return revision.last_modified
diff --git a/src/huggingface_hub/commands/download.py b/src/huggingface_hub/commands/download.py
deleted file mode 100644
index 0dd2c1070e..0000000000
--- a/src/huggingface_hub/commands/download.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# coding=utf-8
-# Copyright 2023-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to download files from the Hub with the CLI.
-
-Usage:
-    huggingface-cli download --help
-
-    # Download file
-    huggingface-cli download gpt2 config.json
-
-    # Download entire repo
-    huggingface-cli download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78
-
-    # Download repo with filters
-    huggingface-cli download gpt2 --include="*.safetensors"
-
-    # Download with token
-    huggingface-cli download Wauplin/private-model --token=hf_***
-
-    # Download quietly (no progress bar, no warnings, only the returned path)
-    huggingface-cli download gpt2 config.json --quiet
-
-    # Download to local dir
-    huggingface-cli download gpt2 --local-dir=./models/gpt2
-"""
-
-import warnings
-from argparse import Namespace, _SubParsersAction
-from typing import List, Optional
-
-from huggingface_hub import logging
-from huggingface_hub._snapshot_download import snapshot_download
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.file_download import hf_hub_download
-from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
-
-from ._cli_utils import show_deprecation_warning
-
-
-logger = logging.get_logger(__name__)
-
-
-class DownloadCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        download_parser = parser.add_parser("download", help="Download files from the Hub")
-        download_parser.add_argument(
-            "repo_id", type=str, help="ID of the repo to download from (e.g. `username/repo-name`)."
-        )
-        download_parser.add_argument(
-            "filenames", type=str, nargs="*", help="Files to download (e.g. `config.json`, `data/metadata.jsonl`)."
-        )
-        download_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Type of repo to download from (defaults to 'model').",
-        )
-        download_parser.add_argument(
-            "--revision",
-            type=str,
-            help="An optional Git revision id which can be a branch name, a tag, or a commit hash.",
-        )
-        download_parser.add_argument(
-            "--include", nargs="*", type=str, help="Glob patterns to match files to download."
-        )
-        download_parser.add_argument(
-            "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to download."
-        )
-        download_parser.add_argument(
-            "--cache-dir", type=str, help="Path to the directory where to save the downloaded files."
-        )
-        download_parser.add_argument(
-            "--local-dir",
-            type=str,
-            help=(
-                "If set, the downloaded file will be placed under this directory. Check out"
-                " https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more"
-                " details."
-            ),
-        )
-        download_parser.add_argument(
-            "--local-dir-use-symlinks",
-            choices=["auto", "True", "False"],
-            help=("Deprecated and ignored. Downloading to a local directory does not use symlinks anymore."),
-        )
-        download_parser.add_argument(
-            "--force-download",
-            action="store_true",
-            help="If True, the files will be downloaded even if they are already cached.",
-        )
-        download_parser.add_argument(
-            "--resume-download",
-            action="store_true",
-            help="Deprecated and ignored. Downloading a file to local dir always attempts to resume previously interrupted downloads (unless hf-transfer is enabled).",
-        )
-        download_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        download_parser.add_argument(
-            "--quiet",
-            action="store_true",
-            help="If True, progress bars are disabled and only the path to the download files is printed.",
-        )
-        download_parser.add_argument(
-            "--max-workers",
-            type=int,
-            default=8,
-            help="Maximum number of workers to use for downloading files. Default is 8.",
-        )
-        download_parser.set_defaults(func=DownloadCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.token = args.token
-        self.repo_id: str = args.repo_id
-        self.filenames: List[str] = args.filenames
-        self.repo_type: str = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.include: Optional[List[str]] = args.include
-        self.exclude: Optional[List[str]] = args.exclude
-        self.cache_dir: Optional[str] = args.cache_dir
-        self.local_dir: Optional[str] = args.local_dir
-        self.force_download: bool = args.force_download
-        self.resume_download: Optional[bool] = args.resume_download or None
-        self.quiet: bool = args.quiet
-        self.max_workers: int = args.max_workers
-
-        if args.local_dir_use_symlinks is not None:
-            warnings.warn(
-                "Ignoring --local-dir-use-symlinks. Downloading to a local directory does not use symlinks anymore.",
-                FutureWarning,
-            )
-
-    def run(self) -> None:
-        show_deprecation_warning("huggingface-cli download", "hf download")
-
-        if self.quiet:
-            disable_progress_bars()
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                print(self._download())  # Print path to downloaded files
-            enable_progress_bars()
-        else:
-            logging.set_verbosity_info()
-            print(self._download())  # Print path to downloaded files
-            logging.set_verbosity_warning()
-
-    def _download(self) -> str:
-        # Warn user if patterns are ignored
-        if len(self.filenames) > 0:
-            if self.include is not None and len(self.include) > 0:
-                warnings.warn("Ignoring `--include` since filenames have being explicitly set.")
-            if self.exclude is not None and len(self.exclude) > 0:
-                warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.")
-
-        # Single file to download: use `hf_hub_download`
-        if len(self.filenames) == 1:
-            return hf_hub_download(
-                repo_id=self.repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
-                filename=self.filenames[0],
-                cache_dir=self.cache_dir,
-                resume_download=self.resume_download,
-                force_download=self.force_download,
-                token=self.token,
-                local_dir=self.local_dir,
-                library_name="huggingface-cli",
-            )
-
-        # Otherwise: use `snapshot_download` to ensure all files comes from same revision
-        elif len(self.filenames) == 0:
-            allow_patterns = self.include
-            ignore_patterns = self.exclude
-        else:
-            allow_patterns = self.filenames
-            ignore_patterns = None
-
-        return snapshot_download(
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
-            revision=self.revision,
-            allow_patterns=allow_patterns,
-            ignore_patterns=ignore_patterns,
-            resume_download=self.resume_download,
-            force_download=self.force_download,
-            cache_dir=self.cache_dir,
-            token=self.token,
-            local_dir=self.local_dir,
-            library_name="huggingface-cli",
-            max_workers=self.max_workers,
-        )
diff --git a/src/huggingface_hub/commands/env.py b/src/huggingface_hub/commands/env.py
deleted file mode 100644
index ad674738b2..0000000000
--- a/src/huggingface_hub/commands/env.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to print information about the environment.
-
-Usage:
-    huggingface-cli env
-"""
-
-from argparse import _SubParsersAction
-
-from ..utils import dump_environment_info
-from . import BaseHuggingfaceCLICommand
-from ._cli_utils import show_deprecation_warning
-
-
-class EnvironmentCommand(BaseHuggingfaceCLICommand):
-    def __init__(self, args):
-        self.args = args
-
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        env_parser = parser.add_parser("env", help="Print information about the environment.")
-        env_parser.set_defaults(func=EnvironmentCommand)
-
-    def run(self) -> None:
-        show_deprecation_warning("huggingface-cli env", "hf env")
-
-        dump_environment_info()
diff --git a/src/huggingface_hub/commands/huggingface_cli.py b/src/huggingface_hub/commands/huggingface_cli.py
deleted file mode 100644
index 697c85d1e3..0000000000
--- a/src/huggingface_hub/commands/huggingface_cli.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from argparse import ArgumentParser
-
-from huggingface_hub.commands._cli_utils import show_deprecation_warning
-from huggingface_hub.commands.delete_cache import DeleteCacheCommand
-from huggingface_hub.commands.download import DownloadCommand
-from huggingface_hub.commands.env import EnvironmentCommand
-from huggingface_hub.commands.lfs import LfsCommands
-from huggingface_hub.commands.repo import RepoCommands
-from huggingface_hub.commands.repo_files import RepoFilesCommand
-from huggingface_hub.commands.scan_cache import ScanCacheCommand
-from huggingface_hub.commands.tag import TagCommands
-from huggingface_hub.commands.upload import UploadCommand
-from huggingface_hub.commands.upload_large_folder import UploadLargeFolderCommand
-from huggingface_hub.commands.user import UserCommands
-from huggingface_hub.commands.version import VersionCommand
-
-
-def main():
-    parser = ArgumentParser("huggingface-cli", usage="huggingface-cli <command> [<args>]")
-    commands_parser = parser.add_subparsers(help="huggingface-cli command helpers")
-
-    # Register commands
-    DownloadCommand.register_subcommand(commands_parser)
-    UploadCommand.register_subcommand(commands_parser)
-    RepoFilesCommand.register_subcommand(commands_parser)
-    EnvironmentCommand.register_subcommand(commands_parser)
-    UserCommands.register_subcommand(commands_parser)
-    RepoCommands.register_subcommand(commands_parser)
-    LfsCommands.register_subcommand(commands_parser)
-    ScanCacheCommand.register_subcommand(commands_parser)
-    DeleteCacheCommand.register_subcommand(commands_parser)
-    TagCommands.register_subcommand(commands_parser)
-    VersionCommand.register_subcommand(commands_parser)
-
-    # Experimental
-    UploadLargeFolderCommand.register_subcommand(commands_parser)
-
-    # Let's go
-    args = parser.parse_args()
-    if not hasattr(args, "func"):
-        show_deprecation_warning("huggingface-cli", "hf")
-        parser.print_help()
-        exit(1)
-
-    # Run
-    service = args.func(args)
-    service.run()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/huggingface_hub/commands/lfs.py b/src/huggingface_hub/commands/lfs.py
deleted file mode 100644
index e510e345e6..0000000000
--- a/src/huggingface_hub/commands/lfs.py
+++ /dev/null
@@ -1,200 +0,0 @@
-"""
-Implementation of a custom transfer agent for the transfer type "multipart" for
-git-lfs.
-
-Inspired by:
-github.com/cbartz/git-lfs-swift-transfer-agent/blob/master/git_lfs_swift_transfer.py
-
-Spec is: github.com/git-lfs/git-lfs/blob/master/docs/custom-transfers.md
-
-
-To launch debugger while developing:
-
-``` [lfs "customtransfer.multipart"]
-path = /path/to/huggingface_hub/.env/bin/python args = -m debugpy --listen 5678
---wait-for-client
-/path/to/huggingface_hub/src/huggingface_hub/commands/huggingface_cli.py
-lfs-multipart-upload ```"""
-
-import json
-import os
-import subprocess
-import sys
-from argparse import _SubParsersAction
-from typing import Dict, List, Optional
-
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.lfs import LFS_MULTIPART_UPLOAD_COMMAND
-
-from ..utils import get_session, hf_raise_for_status, logging
-from ..utils._lfs import SliceFileObj
-
-
-logger = logging.get_logger(__name__)
-
-
-class LfsCommands(BaseHuggingfaceCLICommand):
-    """
-    Implementation of a custom transfer agent for the transfer type "multipart"
-    for git-lfs. This lets users upload large files >5GB 🔥. Spec for LFS custom
-    transfer agent is:
-    https://github.com/git-lfs/git-lfs/blob/master/docs/custom-transfers.md
-
-    This introduces two commands to the CLI:
-
-    1. $ huggingface-cli lfs-enable-largefiles
-
-    This should be executed once for each model repo that contains a model file
-    >5GB. It's documented in the error message you get if you just try to git
-    push a 5GB file without having enabled it before.
-
-    2. $ huggingface-cli lfs-multipart-upload
-
-    This command is called by lfs directly and is not meant to be called by the
-    user.
-    """
-
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        enable_parser = parser.add_parser(
-            "lfs-enable-largefiles", help="Configure your repository to enable upload of files > 5GB."
-        )
-        enable_parser.add_argument("path", type=str, help="Local path to repository you want to configure.")
-        enable_parser.set_defaults(func=lambda args: LfsEnableCommand(args))
-
-        # Command will get called by git-lfs, do not call it directly.
-        upload_parser = parser.add_parser(LFS_MULTIPART_UPLOAD_COMMAND, add_help=False)
-        upload_parser.set_defaults(func=lambda args: LfsUploadCommand(args))
-
-
-class LfsEnableCommand:
-    def __init__(self, args):
-        self.args = args
-
-    def run(self):
-        local_path = os.path.abspath(self.args.path)
-        if not os.path.isdir(local_path):
-            print("This does not look like a valid git repo.")
-            exit(1)
-        subprocess.run(
-            "git config lfs.customtransfer.multipart.path huggingface-cli".split(),
-            check=True,
-            cwd=local_path,
-        )
-        subprocess.run(
-            f"git config lfs.customtransfer.multipart.args {LFS_MULTIPART_UPLOAD_COMMAND}".split(),
-            check=True,
-            cwd=local_path,
-        )
-        print("Local repo set up for largefiles")
-
-
-def write_msg(msg: Dict):
-    """Write out the message in Line delimited JSON."""
-    msg_str = json.dumps(msg) + "\n"
-    sys.stdout.write(msg_str)
-    sys.stdout.flush()
-
-
-def read_msg() -> Optional[Dict]:
-    """Read Line delimited JSON from stdin."""
-    msg = json.loads(sys.stdin.readline().strip())
-
-    if "terminate" in (msg.get("type"), msg.get("event")):
-        # terminate message received
-        return None
-
-    if msg.get("event") not in ("download", "upload"):
-        logger.critical("Received unexpected message")
-        sys.exit(1)
-
-    return msg
-
-
-class LfsUploadCommand:
-    def __init__(self, args) -> None:
-        self.args = args
-
-    def run(self) -> None:
-        # Immediately after invoking a custom transfer process, git-lfs
-        # sends initiation data to the process over stdin.
-        # This tells the process useful information about the configuration.
-        init_msg = json.loads(sys.stdin.readline().strip())
-        if not (init_msg.get("event") == "init" and init_msg.get("operation") == "upload"):
-            write_msg({"error": {"code": 32, "message": "Wrong lfs init operation"}})
-            sys.exit(1)
-
-        # The transfer process should use the information it needs from the
-        # initiation structure, and also perform any one-off setup tasks it
-        # needs to do. It should then respond on stdout with a simple empty
-        # confirmation structure, as follows:
-        write_msg({})
-
-        # After the initiation exchange, git-lfs will send any number of
-        # transfer requests to the stdin of the transfer process, in a serial sequence.
-        while True:
-            msg = read_msg()
-            if msg is None:
-                # When all transfers have been processed, git-lfs will send
-                # a terminate event to the stdin of the transfer process.
-                # On receiving this message the transfer process should
-                # clean up and terminate. No response is expected.
-                sys.exit(0)
-
-            oid = msg["oid"]
-            filepath = msg["path"]
-            completion_url = msg["action"]["href"]
-            header = msg["action"]["header"]
-            chunk_size = int(header.pop("chunk_size"))
-            presigned_urls: List[str] = list(header.values())
-
-            # Send a "started" progress event to allow other workers to start.
-            # Otherwise they're delayed until first "progress" event is reported,
-            # i.e. after the first 5GB by default (!)
-            write_msg(
-                {
-                    "event": "progress",
-                    "oid": oid,
-                    "bytesSoFar": 1,
-                    "bytesSinceLast": 0,
-                }
-            )
-
-            parts = []
-            with open(filepath, "rb") as file:
-                for i, presigned_url in enumerate(presigned_urls):
-                    with SliceFileObj(
-                        file,
-                        seek_from=i * chunk_size,
-                        read_limit=chunk_size,
-                    ) as data:
-                        r = get_session().put(presigned_url, data=data)
-                        hf_raise_for_status(r)
-                        parts.append(
-                            {
-                                "etag": r.headers.get("etag"),
-                                "partNumber": i + 1,
-                            }
-                        )
-                        # In order to support progress reporting while data is uploading / downloading,
-                        # the transfer process should post messages to stdout
-                        write_msg(
-                            {
-                                "event": "progress",
-                                "oid": oid,
-                                "bytesSoFar": (i + 1) * chunk_size,
-                                "bytesSinceLast": chunk_size,
-                            }
-                        )
-                        # Not precise but that's ok.
-
-            r = get_session().post(
-                completion_url,
-                json={
-                    "oid": oid,
-                    "parts": parts,
-                },
-            )
-            hf_raise_for_status(r)
-
-            write_msg({"event": "complete", "oid": oid})
diff --git a/src/huggingface_hub/commands/repo.py b/src/huggingface_hub/commands/repo.py
deleted file mode 100644
index fe75349d67..0000000000
--- a/src/huggingface_hub/commands/repo.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains commands to interact with repositories on the Hugging Face Hub.
-
-Usage:
-    # create a new dataset repo on the Hub
-    huggingface-cli repo create my-cool-dataset --repo-type=dataset
-
-    # create a private model repo on the Hub
-    huggingface-cli repo create my-cool-model --private
-"""
-
-import argparse
-from argparse import _SubParsersAction
-from typing import Optional
-
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.commands._cli_utils import ANSI
-from huggingface_hub.constants import SPACES_SDK_TYPES
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub.utils import logging
-
-from ._cli_utils import show_deprecation_warning
-
-
-logger = logging.get_logger(__name__)
-
-
-class RepoCommands(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        repo_parser = parser.add_parser("repo", help="{create} Commands to interact with your huggingface.co repos.")
-        repo_subparsers = repo_parser.add_subparsers(help="huggingface.co repos related commands")
-        repo_create_parser = repo_subparsers.add_parser("create", help="Create a new repo on huggingface.co")
-        repo_create_parser.add_argument(
-            "repo_id",
-            type=str,
-            help="The ID of the repo to create to (e.g. `username/repo-name`). The username is optional and will be set to your username if not provided.",
-        )
-        repo_create_parser.add_argument(
-            "--repo-type",
-            type=str,
-            help='Optional: set to "dataset" or "space" if creating a dataset or space, default is model.',
-        )
-        repo_create_parser.add_argument(
-            "--space_sdk",
-            type=str,
-            help='Optional: Hugging Face Spaces SDK type. Required when --type is set to "space".',
-            choices=SPACES_SDK_TYPES,
-        )
-        repo_create_parser.add_argument(
-            "--private",
-            action="store_true",
-            help="Whether to create a private repository. Defaults to public unless the organization's default is private.",
-        )
-        repo_create_parser.add_argument(
-            "--token",
-            type=str,
-            help="Hugging Face token. Will default to the locally saved token if not provided.",
-        )
-        repo_create_parser.add_argument(
-            "--exist-ok",
-            action="store_true",
-            help="Do not raise an error if repo already exists.",
-        )
-        repo_create_parser.add_argument(
-            "--resource-group-id",
-            type=str,
-            help="Resource group in which to create the repo. Resource groups is only available for Enterprise Hub organizations.",
-        )
-        repo_create_parser.add_argument(
-            "--type",
-            type=str,
-            help="[Deprecated]: use --repo-type instead.",
-        )
-        repo_create_parser.add_argument(
-            "-y",
-            "--yes",
-            action="store_true",
-            help="[Deprecated] no effect.",
-        )
-        repo_create_parser.add_argument(
-            "--organization", type=str, help="[Deprecated] Pass the organization namespace directly in the repo_id."
-        )
-        repo_create_parser.set_defaults(func=lambda args: RepoCreateCommand(args))
-
-
-class RepoCreateCommand:
-    def __init__(self, args: argparse.Namespace):
-        self.repo_id: str = args.repo_id
-        self.repo_type: Optional[str] = args.repo_type or args.type
-        self.space_sdk: Optional[str] = args.space_sdk
-        self.organization: Optional[str] = args.organization
-        self.yes: bool = args.yes
-        self.private: bool = args.private
-        self.token: Optional[str] = args.token
-        self.exist_ok: bool = args.exist_ok
-        self.resource_group_id: Optional[str] = args.resource_group_id
-
-        if args.type is not None:
-            print(
-                ANSI.yellow(
-                    "The --type argument is deprecated and will be removed in a future version. Use --repo-type instead."
-                )
-            )
-        if self.organization is not None:
-            print(
-                ANSI.yellow(
-                    "The --organization argument is deprecated and will be removed in a future version. Pass the organization namespace directly in the repo_id."
-                )
-            )
-        if self.yes:
-            print(
-                ANSI.yellow(
-                    "The --yes argument is deprecated and will be removed in a future version. It does not have any effect."
-                )
-            )
-
-        self._api = HfApi()
-
-    def run(self):
-        show_deprecation_warning("huggingface-cli repo", "hf repo")
-
-        if self.organization is not None:
-            if "/" in self.repo_id:
-                print(ANSI.red("You cannot pass both --organization and a repo_id with a namespace."))
-                exit(1)
-            self.repo_id = f"{self.organization}/{self.repo_id}"
-
-        repo_url = self._api.create_repo(
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
-            private=self.private,
-            token=self.token,
-            exist_ok=self.exist_ok,
-            resource_group_id=self.resource_group_id,
-            space_sdk=self.space_sdk,
-        )
-        print(f"Successfully created {ANSI.bold(repo_url.repo_id)} on the Hub.")
-        print(f"Your repo is now available at {ANSI.bold(repo_url)}")
diff --git a/src/huggingface_hub/commands/repo_files.py b/src/huggingface_hub/commands/repo_files.py
deleted file mode 100644
index da9685315e..0000000000
--- a/src/huggingface_hub/commands/repo_files.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# coding=utf-8
-# Copyright 2023-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to update or delete files in a repository using the CLI.
-
-Usage:
-    # delete all
-    huggingface-cli repo-files <repo_id> delete "*"
-
-    # delete single file
-    huggingface-cli repo-files <repo_id> delete file.txt
-
-    # delete single folder
-    huggingface-cli repo-files <repo_id> delete folder/
-
-    # delete multiple
-    huggingface-cli repo-files <repo_id> delete file.txt folder/ file2.txt
-
-    # delete multiple patterns
-    huggingface-cli repo-files <repo_id> delete file.txt "*.json" "folder/*.parquet"
-
-    # delete from different revision / repo-type
-    huggingface-cli repo-files <repo_id> delete file.txt --revision=refs/pr/1 --repo-type=dataset
-"""
-
-from argparse import _SubParsersAction
-from typing import List, Optional
-
-from huggingface_hub import logging
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.hf_api import HfApi
-
-from ._cli_utils import show_deprecation_warning
-
-
-logger = logging.get_logger(__name__)
-
-
-class DeleteFilesSubCommand:
-    def __init__(self, args) -> None:
-        self.args = args
-        self.repo_id: str = args.repo_id
-        self.repo_type: Optional[str] = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
-        self.patterns: List[str] = args.patterns
-        self.commit_message: Optional[str] = args.commit_message
-        self.commit_description: Optional[str] = args.commit_description
-        self.create_pr: bool = args.create_pr
-        self.token: Optional[str] = args.token
-
-    def run(self) -> None:
-        show_deprecation_warning("huggingface-cli repo-files", "hf repo-files")
-
-        logging.set_verbosity_info()
-        url = self.api.delete_files(
-            delete_patterns=self.patterns,
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
-            revision=self.revision,
-            commit_message=self.commit_message,
-            commit_description=self.commit_description,
-            create_pr=self.create_pr,
-        )
-        print(f"Files correctly deleted from repo. Commit: {url}.")
-        logging.set_verbosity_warning()
-
-
-class RepoFilesCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        repo_files_parser = parser.add_parser("repo-files", help="Manage files in a repo on the Hub")
-        repo_files_parser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to manage (e.g. `username/repo-name`)."
-        )
-        repo_files_subparsers = repo_files_parser.add_subparsers(
-            help="Action to execute against the files.",
-            required=True,
-        )
-        delete_subparser = repo_files_subparsers.add_parser(
-            "delete",
-            help="Delete files from a repo on the Hub",
-        )
-        delete_subparser.set_defaults(func=lambda args: DeleteFilesSubCommand(args))
-        delete_subparser.add_argument(
-            "patterns",
-            nargs="+",
-            type=str,
-            help="Glob patterns to match files to delete.",
-        )
-        delete_subparser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Type of the repo to upload to (e.g. `dataset`).",
-        )
-        delete_subparser.add_argument(
-            "--revision",
-            type=str,
-            help=(
-                "An optional Git revision to push to. It can be a branch name "
-                "or a PR reference. If revision does not"
-                " exist and `--create-pr` is not set, a branch will be automatically created."
-            ),
-        )
-        delete_subparser.add_argument(
-            "--commit-message", type=str, help="The summary / title / first line of the generated commit."
-        )
-        delete_subparser.add_argument(
-            "--commit-description", type=str, help="The description of the generated commit."
-        )
-        delete_subparser.add_argument(
-            "--create-pr", action="store_true", help="Whether to create a new Pull Request for these changes."
-        )
-        repo_files_parser.add_argument(
-            "--token",
-            type=str,
-            help="A User Access Token generated from https://huggingface.co/settings/tokens",
-        )
-
-        repo_files_parser.set_defaults(func=RepoFilesCommand)
diff --git a/src/huggingface_hub/commands/scan_cache.py b/src/huggingface_hub/commands/scan_cache.py
deleted file mode 100644
index 711a5d09cc..0000000000
--- a/src/huggingface_hub/commands/scan_cache.py
+++ /dev/null
@@ -1,183 +0,0 @@
-# coding=utf-8
-# Copyright 2022-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to scan the HF cache directory.
-
-Usage:
-    huggingface-cli scan-cache
-    huggingface-cli scan-cache -v
-    huggingface-cli scan-cache -vvv
-    huggingface-cli scan-cache --dir ~/.cache/huggingface/hub
-"""
-
-import time
-from argparse import Namespace, _SubParsersAction
-from typing import Optional
-
-from ..utils import CacheNotFound, HFCacheInfo, scan_cache_dir
-from . import BaseHuggingfaceCLICommand
-from ._cli_utils import ANSI, show_deprecation_warning, tabulate
-
-
-class ScanCacheCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        scan_cache_parser = parser.add_parser("scan-cache", help="Scan cache directory.")
-
-        scan_cache_parser.add_argument(
-            "--dir",
-            type=str,
-            default=None,
-            help="cache directory to scan (optional). Default to the default HuggingFace cache.",
-        )
-        scan_cache_parser.add_argument(
-            "-v",
-            "--verbose",
-            action="count",
-            default=0,
-            help="show a more verbose output",
-        )
-        scan_cache_parser.set_defaults(func=ScanCacheCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.verbosity: int = args.verbose
-        self.cache_dir: Optional[str] = args.dir
-
-    def run(self):
-        show_deprecation_warning("huggingface-cli scan-cache", "hf cache scan")
-
-        try:
-            t0 = time.time()
-            hf_cache_info = scan_cache_dir(self.cache_dir)
-            t1 = time.time()
-        except CacheNotFound as exc:
-            cache_dir = exc.cache_dir
-            print(f"Cache directory not found: {cache_dir}")
-            return
-
-        self._print_hf_cache_info_as_table(hf_cache_info)
-
-        print(
-            f"\nDone in {round(t1 - t0, 1)}s. Scanned {len(hf_cache_info.repos)} repo(s)"
-            f" for a total of {ANSI.red(hf_cache_info.size_on_disk_str)}."
-        )
-        if len(hf_cache_info.warnings) > 0:
-            message = f"Got {len(hf_cache_info.warnings)} warning(s) while scanning."
-            if self.verbosity >= 3:
-                print(ANSI.gray(message))
-                for warning in hf_cache_info.warnings:
-                    print(ANSI.gray(str(warning)))
-            else:
-                print(ANSI.gray(message + " Use -vvv to print details."))
-
-    def _print_hf_cache_info_as_table(self, hf_cache_info: HFCacheInfo) -> None:
-        print(get_table(hf_cache_info, verbosity=self.verbosity))
-
-
-def get_table(hf_cache_info: HFCacheInfo, *, verbosity: int = 0) -> str:
-    """Generate a table from the [`HFCacheInfo`] object.
-
-    Pass `verbosity=0` to get a table with a single row per repo, with columns
-    "repo_id", "repo_type", "size_on_disk", "nb_files", "last_accessed", "last_modified", "refs", "local_path".
-
-    Pass `verbosity=1` to get a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
-    "repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "refs", "local_path".
-
-    Example:
-    ```py
-    >>> from huggingface_hub.utils import scan_cache_dir
-    >>> from huggingface_hub.commands.scan_cache import get_table
-
-    >>> hf_cache_info = scan_cache_dir()
-    HFCacheInfo(...)
-
-    >>> print(get_table(hf_cache_info, verbosity=0))
-    REPO ID                                             REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
-    --------------------------------------------------- --------- ------------ -------- ------------- ------------- ---- --------------------------------------------------------------------------------------------------
-    roberta-base                                        model             2.7M        5 1 day ago     1 week ago    main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--roberta-base
-    suno/bark                                           model             8.8K        1 1 week ago    1 week ago    main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--suno--bark
-    t5-base                                             model           893.8M        4 4 days ago    7 months ago  main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--t5-base
-    t5-large                                            model             3.0G        4 5 weeks ago   5 months ago  main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--t5-large
-
-    >>> print(get_table(hf_cache_info, verbosity=1))
-    REPO ID                                             REPO TYPE REVISION                                 SIZE ON DISK NB FILES LAST_MODIFIED REFS LOCAL PATH
-    --------------------------------------------------- --------- ---------------------------------------- ------------ -------- ------------- ---- -----------------------------------------------------------------------------------------------------------------------------------------------------
-    roberta-base                                        model     e2da8e2f811d1448a5b465c236feacd80ffbac7b         2.7M        5 1 week ago    main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--roberta-base\\snapshots\\e2da8e2f811d1448a5b465c236feacd80ffbac7b
-    suno/bark                                           model     70a8a7d34168586dc5d028fa9666aceade177992         8.8K        1 1 week ago    main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--suno--bark\\snapshots\\70a8a7d34168586dc5d028fa9666aceade177992
-    t5-base                                             model     a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1       893.8M        4 7 months ago  main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--t5-base\\snapshots\\a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1
-    t5-large                                            model     150ebc2c4b72291e770f58e6057481c8d2ed331a         3.0G        4 5 months ago  main C:\\Users\\admin\\.cache\\huggingface\\hub\\models--t5-large\\snapshots\\150ebc2c4b72291e770f58e6057481c8d2ed331a                                                 ```
-    ```
-
-    Args:
-        hf_cache_info ([`HFCacheInfo`]):
-            The HFCacheInfo object to print.
-        verbosity (`int`, *optional*):
-            The verbosity level. Defaults to 0.
-
-    Returns:
-        `str`: The table as a string.
-    """
-    if verbosity == 0:
-        return tabulate(
-            rows=[
-                [
-                    repo.repo_id,
-                    repo.repo_type,
-                    "{:>12}".format(repo.size_on_disk_str),
-                    repo.nb_files,
-                    repo.last_accessed_str,
-                    repo.last_modified_str,
-                    ", ".join(sorted(repo.refs)),
-                    str(repo.repo_path),
-                ]
-                for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.repo_path)
-            ],
-            headers=[
-                "REPO ID",
-                "REPO TYPE",
-                "SIZE ON DISK",
-                "NB FILES",
-                "LAST_ACCESSED",
-                "LAST_MODIFIED",
-                "REFS",
-                "LOCAL PATH",
-            ],
-        )
-    else:
-        return tabulate(
-            rows=[
-                [
-                    repo.repo_id,
-                    repo.repo_type,
-                    revision.commit_hash,
-                    "{:>12}".format(revision.size_on_disk_str),
-                    revision.nb_files,
-                    revision.last_modified_str,
-                    ", ".join(sorted(revision.refs)),
-                    str(revision.snapshot_path),
-                ]
-                for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.repo_path)
-                for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash)
-            ],
-            headers=[
-                "REPO ID",
-                "REPO TYPE",
-                "REVISION",
-                "SIZE ON DISK",
-                "NB FILES",
-                "LAST_MODIFIED",
-                "REFS",
-                "LOCAL PATH",
-            ],
-        )
diff --git a/src/huggingface_hub/commands/tag.py b/src/huggingface_hub/commands/tag.py
deleted file mode 100644
index 405d407f81..0000000000
--- a/src/huggingface_hub/commands/tag.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# coding=utf-8
-# Copyright 2024-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Contains commands to perform tag management with the CLI.
-
-Usage Examples:
-    - Create a tag:
-        $ huggingface-cli tag user/my-model 1.0 --message "First release"
-        $ huggingface-cli tag user/my-model 1.0 -m "First release" --revision develop
-        $ huggingface-cli tag user/my-dataset 1.0 -m "First release" --repo-type dataset
-        $ huggingface-cli tag user/my-space 1.0
-    - List all tags:
-        $ huggingface-cli tag -l user/my-model
-        $ huggingface-cli tag --list user/my-dataset --repo-type dataset
-    - Delete a tag:
-        $ huggingface-cli tag -d user/my-model 1.0
-        $ huggingface-cli tag --delete user/my-dataset 1.0 --repo-type dataset
-        $ huggingface-cli tag -d user/my-space 1.0 -y
-"""
-
-from argparse import Namespace, _SubParsersAction
-
-from requests.exceptions import HTTPError
-
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.constants import (
-    REPO_TYPES,
-)
-from huggingface_hub.hf_api import HfApi
-
-from ..errors import HfHubHTTPError, RepositoryNotFoundError, RevisionNotFoundError
-from ._cli_utils import ANSI, show_deprecation_warning
-
-
-class TagCommands(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        tag_parser = parser.add_parser("tag", help="(create, list, delete) tags for a repo in the hub")
-
-        tag_parser.add_argument("repo_id", type=str, help="The ID of the repo to tag (e.g. `username/repo-name`).")
-        tag_parser.add_argument("tag", nargs="?", type=str, help="The name of the tag for creation or deletion.")
-        tag_parser.add_argument("-m", "--message", type=str, help="The description of the tag to create.")
-        tag_parser.add_argument("--revision", type=str, help="The git revision to tag.")
-        tag_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens."
-        )
-        tag_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Set the type of repository (model, dataset, or space).",
-        )
-        tag_parser.add_argument("-y", "--yes", action="store_true", help="Answer Yes to prompts automatically.")
-
-        tag_parser.add_argument("-l", "--list", action="store_true", help="List tags for a repository.")
-        tag_parser.add_argument("-d", "--delete", action="store_true", help="Delete a tag for a repository.")
-
-        tag_parser.set_defaults(func=lambda args: handle_commands(args))
-
-
-def handle_commands(args: Namespace):
-    show_deprecation_warning("huggingface-cli tag", "hf repo tag")
-
-    if args.list:
-        return TagListCommand(args)
-    elif args.delete:
-        return TagDeleteCommand(args)
-    else:
-        return TagCreateCommand(args)
-
-
-class TagCommand:
-    def __init__(self, args: Namespace):
-        self.args = args
-        self.api = HfApi(token=self.args.token)
-        self.repo_id = self.args.repo_id
-        self.repo_type = self.args.repo_type
-        if self.repo_type not in REPO_TYPES:
-            print("Invalid repo --repo-type")
-            exit(1)
-
-
-class TagCreateCommand(TagCommand):
-    def run(self):
-        print(f"You are about to create tag {ANSI.bold(self.args.tag)} on {self.repo_type} {ANSI.bold(self.repo_id)}")
-
-        try:
-            self.api.create_tag(
-                repo_id=self.repo_id,
-                tag=self.args.tag,
-                tag_message=self.args.message,
-                revision=self.args.revision,
-                repo_type=self.repo_type,
-            )
-        except RepositoryNotFoundError:
-            print(f"{self.repo_type.capitalize()} {ANSI.bold(self.repo_id)} not found.")
-            exit(1)
-        except RevisionNotFoundError:
-            print(f"Revision {ANSI.bold(self.args.revision)} not found.")
-            exit(1)
-        except HfHubHTTPError as e:
-            if e.response.status_code == 409:
-                print(f"Tag {ANSI.bold(self.args.tag)} already exists on {ANSI.bold(self.repo_id)}")
-                exit(1)
-            raise e
-
-        print(f"Tag {ANSI.bold(self.args.tag)} created on {ANSI.bold(self.repo_id)}")
-
-
-class TagListCommand(TagCommand):
-    def run(self):
-        try:
-            refs = self.api.list_repo_refs(
-                repo_id=self.repo_id,
-                repo_type=self.repo_type,
-            )
-        except RepositoryNotFoundError:
-            print(f"{self.repo_type.capitalize()} {ANSI.bold(self.repo_id)} not found.")
-            exit(1)
-        except HTTPError as e:
-            print(e)
-            print(ANSI.red(e.response.text))
-            exit(1)
-        if len(refs.tags) == 0:
-            print("No tags found")
-            exit(0)
-        print(f"Tags for {self.repo_type} {ANSI.bold(self.repo_id)}:")
-        for tag in refs.tags:
-            print(tag.name)
-
-
-class TagDeleteCommand(TagCommand):
-    def run(self):
-        print(f"You are about to delete tag {ANSI.bold(self.args.tag)} on {self.repo_type} {ANSI.bold(self.repo_id)}")
-
-        if not self.args.yes:
-            choice = input("Proceed? [Y/n] ").lower()
-            if choice not in ("", "y", "yes"):
-                print("Abort")
-                exit()
-        try:
-            self.api.delete_tag(repo_id=self.repo_id, tag=self.args.tag, repo_type=self.repo_type)
-        except RepositoryNotFoundError:
-            print(f"{self.repo_type.capitalize()} {ANSI.bold(self.repo_id)} not found.")
-            exit(1)
-        except RevisionNotFoundError:
-            print(f"Tag {ANSI.bold(self.args.tag)} not found on {ANSI.bold(self.repo_id)}")
-            exit(1)
-        print(f"Tag {ANSI.bold(self.args.tag)} deleted on {ANSI.bold(self.repo_id)}")
diff --git a/src/huggingface_hub/commands/upload.py b/src/huggingface_hub/commands/upload.py
deleted file mode 100644
index c778555cda..0000000000
--- a/src/huggingface_hub/commands/upload.py
+++ /dev/null
@@ -1,318 +0,0 @@
-# coding=utf-8
-# Copyright 2023-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to upload a repo or file with the CLI.
-
-Usage:
-    # Upload file (implicit)
-    huggingface-cli upload my-cool-model ./my-cool-model.safetensors
-
-    # Upload file (explicit)
-    huggingface-cli upload my-cool-model ./my-cool-model.safetensors  model.safetensors
-
-    # Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
-    huggingface-cli upload my-cool-model
-
-    # Upload directory (explicit)
-    huggingface-cli upload my-cool-model ./models/my-cool-model .
-
-    # Upload filtered directory (example: tensorboard logs except for the last run)
-    huggingface-cli upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
-
-    # Upload with wildcard
-    huggingface-cli upload my-cool-model "./model/training/*.safetensors"
-
-    # Upload private dataset
-    huggingface-cli upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
-
-    # Upload with token
-    huggingface-cli upload Wauplin/my-cool-model --token=hf_****
-
-    # Sync local Space with Hub (upload new files, delete removed files)
-    huggingface-cli upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
-
-    # Schedule commits every 30 minutes
-    huggingface-cli upload Wauplin/my-cool-model --every=30
-"""
-
-import os
-import time
-import warnings
-from argparse import Namespace, _SubParsersAction
-from typing import List, Optional
-
-from huggingface_hub import logging
-from huggingface_hub._commit_scheduler import CommitScheduler
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
-from huggingface_hub.errors import RevisionNotFoundError
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
-from huggingface_hub.utils._runtime import is_xet_available
-
-from ._cli_utils import show_deprecation_warning
-
-
-logger = logging.get_logger(__name__)
-
-
-class UploadCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        upload_parser = parser.add_parser("upload", help="Upload a file or a folder to a repo on the Hub")
-        upload_parser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
-        )
-        upload_parser.add_argument(
-            "local_path",
-            nargs="?",
-            help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
-        )
-        upload_parser.add_argument(
-            "path_in_repo",
-            nargs="?",
-            help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
-        )
-        upload_parser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            default="model",
-            help="Type of the repo to upload to (e.g. `dataset`).",
-        )
-        upload_parser.add_argument(
-            "--revision",
-            type=str,
-            help=(
-                "An optional Git revision to push to. It can be a branch name or a PR reference. If revision does not"
-                " exist and `--create-pr` is not set, a branch will be automatically created."
-            ),
-        )
-        upload_parser.add_argument(
-            "--private",
-            action="store_true",
-            help=(
-                "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already"
-                " exists."
-            ),
-        )
-        upload_parser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
-        upload_parser.add_argument(
-            "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload."
-        )
-        upload_parser.add_argument(
-            "--delete",
-            nargs="*",
-            type=str,
-            help="Glob patterns for file to be deleted from the repo while committing.",
-        )
-        upload_parser.add_argument(
-            "--commit-message", type=str, help="The summary / title / first line of the generated commit."
-        )
-        upload_parser.add_argument("--commit-description", type=str, help="The description of the generated commit.")
-        upload_parser.add_argument(
-            "--create-pr", action="store_true", help="Whether to upload content as a new Pull Request."
-        )
-        upload_parser.add_argument(
-            "--every",
-            type=float,
-            help="If set, a background job is scheduled to create commits every `every` minutes.",
-        )
-        upload_parser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        upload_parser.add_argument(
-            "--quiet",
-            action="store_true",
-            help="If True, progress bars are disabled and only the path to the uploaded files is printed.",
-        )
-        upload_parser.set_defaults(func=UploadCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.repo_id: str = args.repo_id
-        self.repo_type: Optional[str] = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.private: bool = args.private
-
-        self.include: Optional[List[str]] = args.include
-        self.exclude: Optional[List[str]] = args.exclude
-        self.delete: Optional[List[str]] = args.delete
-
-        self.commit_message: Optional[str] = args.commit_message
-        self.commit_description: Optional[str] = args.commit_description
-        self.create_pr: bool = args.create_pr
-        self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
-        self.quiet: bool = args.quiet  # disable warnings and progress bars
-
-        # Check `--every` is valid
-        if args.every is not None and args.every <= 0:
-            raise ValueError(f"`every` must be a positive value (got '{args.every}')")
-        self.every: Optional[float] = args.every
-
-        # Resolve `local_path` and `path_in_repo`
-        repo_name: str = args.repo_id.split("/")[-1]  # e.g. "Wauplin/my-cool-model" => "my-cool-model"
-        self.local_path: str
-        self.path_in_repo: str
-
-        if args.local_path is not None and any(c in args.local_path for c in ["*", "?", "["]):
-            if args.include is not None:
-                raise ValueError("Cannot set `--include` when passing a `local_path` containing a wildcard.")
-            if args.path_in_repo is not None and args.path_in_repo != ".":
-                raise ValueError("Cannot set `path_in_repo` when passing a `local_path` containing a wildcard.")
-            self.local_path = "."
-            self.include = args.local_path
-            self.path_in_repo = "."
-        elif args.local_path is None and os.path.isfile(repo_name):
-            # Implicit case 1: user provided only a repo_id which happen to be a local file as well => upload it with same name
-            self.local_path = repo_name
-            self.path_in_repo = repo_name
-        elif args.local_path is None and os.path.isdir(repo_name):
-            # Implicit case 2: user provided only a repo_id which happen to be a local folder as well => upload it at root
-            self.local_path = repo_name
-            self.path_in_repo = "."
-        elif args.local_path is None:
-            # Implicit case 3: user provided only a repo_id that does not match a local file or folder
-            # => the user must explicitly provide a local_path => raise exception
-            raise ValueError(f"'{repo_name}' is not a local file or folder. Please set `local_path` explicitly.")
-        elif args.path_in_repo is None and os.path.isfile(args.local_path):
-            # Explicit local path to file, no path in repo => upload it at root with same name
-            self.local_path = args.local_path
-            self.path_in_repo = os.path.basename(args.local_path)
-        elif args.path_in_repo is None:
-            # Explicit local path to folder, no path in repo => upload at root
-            self.local_path = args.local_path
-            self.path_in_repo = "."
-        else:
-            # Finally, if both paths are explicit
-            self.local_path = args.local_path
-            self.path_in_repo = args.path_in_repo
-
-    def run(self) -> None:
-        show_deprecation_warning("huggingface-cli upload", "hf upload")
-
-        if self.quiet:
-            disable_progress_bars()
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                print(self._upload())
-            enable_progress_bars()
-        else:
-            logging.set_verbosity_info()
-            print(self._upload())
-            logging.set_verbosity_warning()
-
-    def _upload(self) -> str:
-        if os.path.isfile(self.local_path):
-            if self.include is not None and len(self.include) > 0:
-                warnings.warn("Ignoring `--include` since a single file is uploaded.")
-            if self.exclude is not None and len(self.exclude) > 0:
-                warnings.warn("Ignoring `--exclude` since a single file is uploaded.")
-            if self.delete is not None and len(self.delete) > 0:
-                warnings.warn("Ignoring `--delete` since a single file is uploaded.")
-
-        if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
-            logger.info(
-                "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
-                " https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."
-            )
-
-        # Schedule commits if `every` is set
-        if self.every is not None:
-            if os.path.isfile(self.local_path):
-                # If file => watch entire folder + use allow_patterns
-                folder_path = os.path.dirname(self.local_path)
-                path_in_repo = (
-                    self.path_in_repo[: -len(self.local_path)]  # remove filename from path_in_repo
-                    if self.path_in_repo.endswith(self.local_path)
-                    else self.path_in_repo
-                )
-                allow_patterns = [self.local_path]
-                ignore_patterns = []
-            else:
-                folder_path = self.local_path
-                path_in_repo = self.path_in_repo
-                allow_patterns = self.include or []
-                ignore_patterns = self.exclude or []
-                if self.delete is not None and len(self.delete) > 0:
-                    warnings.warn("Ignoring `--delete` when uploading with scheduled commits.")
-
-            scheduler = CommitScheduler(
-                folder_path=folder_path,
-                repo_id=self.repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
-                allow_patterns=allow_patterns,
-                ignore_patterns=ignore_patterns,
-                path_in_repo=path_in_repo,
-                private=self.private,
-                every=self.every,
-                hf_api=self.api,
-            )
-            print(f"Scheduling commits every {self.every} minutes to {scheduler.repo_id}.")
-            try:  # Block main thread until KeyboardInterrupt
-                while True:
-                    time.sleep(100)
-            except KeyboardInterrupt:
-                scheduler.stop()
-                return "Stopped scheduled commits."
-
-        # Otherwise, create repo and proceed with the upload
-        if not os.path.isfile(self.local_path) and not os.path.isdir(self.local_path):
-            raise FileNotFoundError(f"No such file or directory: '{self.local_path}'.")
-        repo_id = self.api.create_repo(
-            repo_id=self.repo_id,
-            repo_type=self.repo_type,
-            exist_ok=True,
-            private=self.private,
-            space_sdk="gradio" if self.repo_type == "space" else None,
-            # ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
-            # ^ I'd rather not add CLI args to set it explicitly as we already have `huggingface-cli repo create` for that.
-        ).repo_id
-
-        # Check if branch already exists and if not, create it
-        if self.revision is not None and not self.create_pr:
-            try:
-                self.api.repo_info(repo_id=repo_id, repo_type=self.repo_type, revision=self.revision)
-            except RevisionNotFoundError:
-                logger.info(f"Branch '{self.revision}' not found. Creating it...")
-                self.api.create_branch(repo_id=repo_id, repo_type=self.repo_type, branch=self.revision, exist_ok=True)
-                # ^ `exist_ok=True` to avoid race concurrency issues
-
-        # File-based upload
-        if os.path.isfile(self.local_path):
-            return self.api.upload_file(
-                path_or_fileobj=self.local_path,
-                path_in_repo=self.path_in_repo,
-                repo_id=repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
-                commit_message=self.commit_message,
-                commit_description=self.commit_description,
-                create_pr=self.create_pr,
-            )
-
-        # Folder-based upload
-        else:
-            return self.api.upload_folder(
-                folder_path=self.local_path,
-                path_in_repo=self.path_in_repo,
-                repo_id=repo_id,
-                repo_type=self.repo_type,
-                revision=self.revision,
-                commit_message=self.commit_message,
-                commit_description=self.commit_description,
-                create_pr=self.create_pr,
-                allow_patterns=self.include,
-                ignore_patterns=self.exclude,
-                delete_patterns=self.delete,
-            )
diff --git a/src/huggingface_hub/commands/upload_large_folder.py b/src/huggingface_hub/commands/upload_large_folder.py
deleted file mode 100644
index 3105ba3f57..0000000000
--- a/src/huggingface_hub/commands/upload_large_folder.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# coding=utf-8
-# Copyright 2023-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to upload a large folder with the CLI."""
-
-import os
-from argparse import Namespace, _SubParsersAction
-from typing import List, Optional
-
-from huggingface_hub import logging
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub.utils import disable_progress_bars
-
-from ._cli_utils import ANSI, show_deprecation_warning
-
-
-logger = logging.get_logger(__name__)
-
-
-class UploadLargeFolderCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        subparser = parser.add_parser("upload-large-folder", help="Upload a large folder to a repo on the Hub")
-        subparser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
-        )
-        subparser.add_argument("local_path", type=str, help="Local path to the file or folder to upload.")
-        subparser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            help="Type of the repo to upload to (e.g. `dataset`).",
-        )
-        subparser.add_argument(
-            "--revision",
-            type=str,
-            help=("An optional Git revision to push to. It can be a branch name or a PR reference."),
-        )
-        subparser.add_argument(
-            "--private",
-            action="store_true",
-            help=(
-                "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists."
-            ),
-        )
-        subparser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
-        subparser.add_argument("--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload.")
-        subparser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        subparser.add_argument(
-            "--num-workers", type=int, help="Number of workers to use to hash, upload and commit files."
-        )
-        subparser.add_argument("--no-report", action="store_true", help="Whether to disable regular status report.")
-        subparser.add_argument("--no-bars", action="store_true", help="Whether to disable progress bars.")
-        subparser.set_defaults(func=UploadLargeFolderCommand)
-
-    def __init__(self, args: Namespace) -> None:
-        self.repo_id: str = args.repo_id
-        self.local_path: str = args.local_path
-        self.repo_type: str = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.private: bool = args.private
-
-        self.include: Optional[List[str]] = args.include
-        self.exclude: Optional[List[str]] = args.exclude
-
-        self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
-
-        self.num_workers: Optional[int] = args.num_workers
-        self.no_report: bool = args.no_report
-        self.no_bars: bool = args.no_bars
-
-        if not os.path.isdir(self.local_path):
-            raise ValueError("Large upload is only supported for folders.")
-
-    def run(self) -> None:
-        show_deprecation_warning("huggingface-cli upload-large-folder", "hf upload-large-folder")
-
-        logging.set_verbosity_info()
-
-        print(
-            ANSI.yellow(
-                "You are about to upload a large folder to the Hub using `huggingface-cli upload-large-folder`. "
-                "This is a new feature so feedback is very welcome!\n"
-                "\n"
-                "A few things to keep in mind:\n"
-                "  - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
-                "  - Do not start several processes in parallel.\n"
-                "  - You can interrupt and resume the process at any time. "
-                "The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
-                "  - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
-                "\n"
-                f"Some temporary metadata will be stored under `{self.local_path}/.cache/huggingface`.\n"
-                "  - You must not modify those files manually.\n"
-                "  - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
-                "  - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
-                "\n"
-                "If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
-                "You can also entirely disable the status report with `--no-report`.\n"
-                "\n"
-                "For more details, run `huggingface-cli upload-large-folder --help` or check the documentation at "
-                "https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
-            )
-        )
-
-        if self.no_bars:
-            disable_progress_bars()
-
-        self.api.upload_large_folder(
-            repo_id=self.repo_id,
-            folder_path=self.local_path,
-            repo_type=self.repo_type,
-            revision=self.revision,
-            private=self.private,
-            allow_patterns=self.include,
-            ignore_patterns=self.exclude,
-            num_workers=self.num_workers,
-            print_report=not self.no_report,
-        )
diff --git a/src/huggingface_hub/commands/user.py b/src/huggingface_hub/commands/user.py
deleted file mode 100644
index 3f4da0f45d..0000000000
--- a/src/huggingface_hub/commands/user.py
+++ /dev/null
@@ -1,208 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains commands to authenticate to the Hugging Face Hub and interact with your repositories.
-
-Usage:
-    # login and save token locally.
-    huggingface-cli login --token=hf_*** --add-to-git-credential
-
-    # switch between tokens
-    huggingface-cli auth switch
-
-    # list all tokens
-    huggingface-cli auth list
-
-    # logout from a specific token, if no token-name is provided, all tokens will be deleted from your machine.
-    huggingface-cli logout --token-name=your_token_name
-
-    # find out which huggingface.co account you are logged in as
-    huggingface-cli whoami
-"""
-
-from argparse import _SubParsersAction
-from typing import List, Optional
-
-from requests.exceptions import HTTPError
-
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.constants import ENDPOINT
-from huggingface_hub.hf_api import HfApi
-
-from .._login import auth_list, auth_switch, login, logout
-from ..utils import get_stored_tokens, get_token, logging
-from ._cli_utils import ANSI, show_deprecation_warning
-
-
-logger = logging.get_logger(__name__)
-
-try:
-    from InquirerPy import inquirer
-    from InquirerPy.base.control import Choice
-
-    _inquirer_py_available = True
-except ImportError:
-    _inquirer_py_available = False
-
-
-class UserCommands(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        login_parser = parser.add_parser("login", help="Log in using a token from huggingface.co/settings/tokens")
-        login_parser.add_argument(
-            "--token",
-            type=str,
-            help="Token generated from https://huggingface.co/settings/tokens",
-        )
-        login_parser.add_argument(
-            "--add-to-git-credential",
-            action="store_true",
-            help="Optional: Save token to git credential helper.",
-        )
-        login_parser.set_defaults(func=lambda args: LoginCommand(args))
-        whoami_parser = parser.add_parser("whoami", help="Find out which huggingface.co account you are logged in as.")
-        whoami_parser.set_defaults(func=lambda args: WhoamiCommand(args))
-
-        logout_parser = parser.add_parser("logout", help="Log out")
-        logout_parser.add_argument(
-            "--token-name",
-            type=str,
-            help="Optional: Name of the access token to log out from.",
-        )
-        logout_parser.set_defaults(func=lambda args: LogoutCommand(args))
-
-        auth_parser = parser.add_parser("auth", help="Other authentication related commands")
-        auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands")
-        auth_switch_parser = auth_subparsers.add_parser("switch", help="Switch between access tokens")
-        auth_switch_parser.add_argument(
-            "--token-name",
-            type=str,
-            help="Optional: Name of the access token to switch to.",
-        )
-        auth_switch_parser.add_argument(
-            "--add-to-git-credential",
-            action="store_true",
-            help="Optional: Save token to git credential helper.",
-        )
-        auth_switch_parser.set_defaults(func=lambda args: AuthSwitchCommand(args))
-        auth_list_parser = auth_subparsers.add_parser("list", help="List all stored access tokens")
-        auth_list_parser.set_defaults(func=lambda args: AuthListCommand(args))
-
-
-class BaseUserCommand:
-    def __init__(self, args):
-        self.args = args
-        self._api = HfApi()
-
-
-class LoginCommand(BaseUserCommand):
-    def run(self):
-        show_deprecation_warning("huggingface-cli login", "hf auth login")
-
-        logging.set_verbosity_info()
-        login(
-            token=self.args.token,
-            add_to_git_credential=self.args.add_to_git_credential,
-        )
-
-
-class LogoutCommand(BaseUserCommand):
-    def run(self):
-        show_deprecation_warning("huggingface-cli logout", "hf auth logout")
-
-        logging.set_verbosity_info()
-        logout(token_name=self.args.token_name)
-
-
-class AuthSwitchCommand(BaseUserCommand):
-    def run(self):
-        show_deprecation_warning("huggingface-cli auth switch", "hf auth switch")
-
-        logging.set_verbosity_info()
-        token_name = self.args.token_name
-        if token_name is None:
-            token_name = self._select_token_name()
-
-        if token_name is None:
-            print("No token name provided. Aborting.")
-            exit()
-        auth_switch(token_name, add_to_git_credential=self.args.add_to_git_credential)
-
-    def _select_token_name(self) -> Optional[str]:
-        token_names = list(get_stored_tokens().keys())
-
-        if not token_names:
-            logger.error("No stored tokens found. Please login first.")
-            return None
-
-        if _inquirer_py_available:
-            return self._select_token_name_tui(token_names)
-        # if inquirer is not available, use a simpler terminal UI
-        print("Available stored tokens:")
-        for i, token_name in enumerate(token_names, 1):
-            print(f"{i}. {token_name}")
-        while True:
-            try:
-                choice = input("Enter the number of the token to switch to (or 'q' to quit): ")
-                if choice.lower() == "q":
-                    return None
-                index = int(choice) - 1
-                if 0 <= index < len(token_names):
-                    return token_names[index]
-                else:
-                    print("Invalid selection. Please try again.")
-            except ValueError:
-                print("Invalid input. Please enter a number or 'q' to quit.")
-
-    def _select_token_name_tui(self, token_names: List[str]) -> Optional[str]:
-        choices = [Choice(token_name, name=token_name) for token_name in token_names]
-        try:
-            return inquirer.select(
-                message="Select a token to switch to:",
-                choices=choices,
-                default=None,
-            ).execute()
-        except KeyboardInterrupt:
-            logger.info("Token selection cancelled.")
-            return None
-
-
-class AuthListCommand(BaseUserCommand):
-    def run(self):
-        show_deprecation_warning("huggingface-cli auth list", "hf auth list")
-
-        logging.set_verbosity_info()
-        auth_list()
-
-
-class WhoamiCommand(BaseUserCommand):
-    def run(self):
-        show_deprecation_warning("huggingface-cli whoami", "hf auth whoami")
-
-        token = get_token()
-        if token is None:
-            print("Not logged in")
-            exit()
-        try:
-            info = self._api.whoami(token)
-            print(info["name"])
-            orgs = [org["name"] for org in info["orgs"]]
-            if orgs:
-                print(ANSI.bold("orgs: "), ",".join(orgs))
-
-            if ENDPOINT != "https://huggingface.co":
-                print(f"Authenticated through private endpoint: {ENDPOINT}")
-        except HTTPError as e:
-            print(e)
-            print(ANSI.red(e.response.text))
-            exit(1)
diff --git a/src/huggingface_hub/commands/version.py b/src/huggingface_hub/commands/version.py
deleted file mode 100644
index 10d341bcdb..0000000000
--- a/src/huggingface_hub/commands/version.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains command to print information about the version.
-
-Usage:
-    huggingface-cli version
-"""
-
-from argparse import _SubParsersAction
-
-from huggingface_hub import __version__
-
-from . import BaseHuggingfaceCLICommand
-from ._cli_utils import show_deprecation_warning
-
-
-class VersionCommand(BaseHuggingfaceCLICommand):
-    def __init__(self, args):
-        self.args = args
-
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        version_parser = parser.add_parser("version", help="Print information about the huggingface-cli version.")
-        version_parser.set_defaults(func=VersionCommand)
-
-    def run(self) -> None:
-        show_deprecation_warning("huggingface-cli version", "hf version")
-
-        print(f"huggingface_hub version: {__version__}")
diff --git a/src/huggingface_hub/community.py b/src/huggingface_hub/community.py
index 16f2f02428..3bb81e8e2e 100644
--- a/src/huggingface_hub/community.py
+++ b/src/huggingface_hub/community.py
@@ -7,7 +7,7 @@
 
 from dataclasses import dataclass
 from datetime import datetime
-from typing import List, Literal, Optional, Union
+from typing import Literal, Optional, Union
 
 from . import constants
 from .utils import parse_datetime
@@ -116,7 +116,7 @@ class DiscussionWithDetails(Discussion):
             The `datetime` of creation of the Discussion / Pull Request.
         events (`list` of [`DiscussionEvent`])
             The list of [`DiscussionEvents`] in this Discussion or Pull Request.
-        conflicting_files (`Union[List[str], bool, None]`, *optional*):
+        conflicting_files (`Union[list[str], bool, None]`, *optional*):
             A list of conflicting files if this is a Pull Request.
             `None` if `self.is_pull_request` is `False`.
             `True` if there are conflicting files but the list can't be retrieved.
@@ -136,8 +136,8 @@ class DiscussionWithDetails(Discussion):
             (property) URL of the discussion on the Hub.
     """
 
-    events: List["DiscussionEvent"]
-    conflicting_files: Union[List[str], bool, None]
+    events: list["DiscussionEvent"]
+    conflicting_files: Union[list[str], bool, None]
     target_branch: Optional[str]
     merge_commit_oid: Optional[str]
     diff: Optional[str]
@@ -222,7 +222,7 @@ def last_edited_by(self) -> str:
         return self._event["data"]["latest"].get("author", {}).get("name", "deleted")
 
     @property
-    def edit_history(self) -> List[dict]:
+    def edit_history(self) -> list[dict]:
         """The edit history of the comment"""
         return self._event["data"]["history"]
 
diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py
index b30b2c01d9..90ba0a5889 100644
--- a/src/huggingface_hub/constants.py
+++ b/src/huggingface_hub/constants.py
@@ -1,7 +1,7 @@
 import os
 import re
 import typing
-from typing import Literal, Optional, Tuple
+from typing import Literal, Optional
 
 
 # Possible values for env variables
@@ -118,9 +118,9 @@ def _as_int(value: Optional[str]) -> Optional[int]:
 }
 
 DiscussionTypeFilter = Literal["all", "discussion", "pull_request"]
-DISCUSSION_TYPES: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
+DISCUSSION_TYPES: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
 DiscussionStatusFilter = Literal["all", "open", "closed"]
-DISCUSSION_STATUS: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
+DISCUSSION_STATUS: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
 
 # Webhook subscription types
 WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
@@ -135,7 +135,6 @@ def _as_int(value: Optional[str]) -> Optional[int]:
         )
     )
 )
-hf_cache_home = HF_HOME  # for backward compatibility. TODO: remove this in 1.0.0
 
 default_cache_path = os.path.join(HF_HOME, "hub")
 default_assets_cache_path = os.path.join(HF_HOME, "assets")
@@ -164,6 +163,10 @@ def _as_int(value: Optional[str]) -> Optional[int]:
 
 HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
 
+# File created to mark that the version check has been done.
+# Check is performed once per 24 hours at most.
+CHECK_FOR_UPDATE_DONE_PATH = os.path.join(HF_HOME, ".check_for_update_done")
+
 # If set, log level will be set to DEBUG and all requests made to the Hub will be logged
 # as curl commands for reproducibility.
 HF_DEBUG = _is_true(os.environ.get("HF_DEBUG"))
@@ -234,43 +237,6 @@ def _as_int(value: Optional[str]) -> Optional[int]:
 # Allows to add information about the requester in the user-agent (eg. partner name)
 HF_HUB_USER_AGENT_ORIGIN: Optional[str] = os.environ.get("HF_HUB_USER_AGENT_ORIGIN")
 
-# List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
-# deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
-# default. We still keep the full list of supported frameworks in case we want to scan all of them.
-MAIN_INFERENCE_API_FRAMEWORKS = [
-    "diffusers",
-    "sentence-transformers",
-    "text-generation-inference",
-    "transformers",
-]
-
-ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
-    "adapter-transformers",
-    "allennlp",
-    "asteroid",
-    "bertopic",
-    "doctr",
-    "espnet",
-    "fairseq",
-    "fastai",
-    "fasttext",
-    "flair",
-    "k2",
-    "keras",
-    "mindspore",
-    "nemo",
-    "open_clip",
-    "paddlenlp",
-    "peft",
-    "pyannote-audio",
-    "sklearn",
-    "spacy",
-    "span-marker",
-    "speechbrain",
-    "stanza",
-    "timm",
-]
-
 # If OAuth didn't work after 2 redirects, there's likely a third-party cookie issue in the Space iframe view.
 # In this case, we redirect the user to the non-iframe view.
 OAUTH_MAX_REDIRECTS = 2
diff --git a/src/huggingface_hub/dataclasses.py b/src/huggingface_hub/dataclasses.py
index 636a0ac64b..2fcec6b774 100644
--- a/src/huggingface_hub/dataclasses.py
+++ b/src/huggingface_hub/dataclasses.py
@@ -1,15 +1,13 @@
 import inspect
-from dataclasses import _MISSING_TYPE, MISSING, Field, field, fields
-from functools import wraps
+from dataclasses import _MISSING_TYPE, MISSING, Field, field, fields, make_dataclass
+from functools import lru_cache, wraps
 from typing import (
+    Annotated,
     Any,
     Callable,
-    Dict,
     ForwardRef,
-    List,
     Literal,
     Optional,
-    Tuple,
     Type,
     TypeVar,
     Union,
@@ -18,6 +16,19 @@
     overload,
 )
 
+
+try:
+    # Python 3.11+
+    from typing import NotRequired, Required  # type: ignore
+except ImportError:
+    try:
+        # In case typing_extensions is installed
+        from typing_extensions import NotRequired, Required  # type: ignore
+    except ImportError:
+        # Fallback: create dummy types that will never match
+        Required = type("Required", (), {})  # type: ignore
+        NotRequired = type("NotRequired", (), {})  # type: ignore
+
 from .errors import (
     StrictDataclassClassValidationError,
     StrictDataclassDefinitionError,
@@ -27,6 +38,9 @@
 
 Validator_T = Callable[[Any], None]
 T = TypeVar("T")
+TypedDictType = TypeVar("TypedDictType", bound=dict[str, Any])
+
+_TYPED_DICT_DEFAULT_VALUE = object()  # used as default value in TypedDict fields (to distinguish from None)
 
 
 # The overload decorator helps type checkers understand the different return types
@@ -103,7 +117,7 @@ def wrap(cls: Type[T]) -> Type[T]:
             )
 
         # List and store validators
-        field_validators: Dict[str, List[Validator_T]] = {}
+        field_validators: dict[str, list[Validator_T]] = {}
         for f in fields(cls):  # type: ignore [arg-type]
             validators = []
             validators.append(_create_type_validator(f))
@@ -238,15 +252,101 @@ def init_with_validate(self, *args, **kwargs) -> None:
     return wrap(cls) if cls is not None else wrap
 
 
+def validate_typed_dict(schema: type[TypedDictType], data: dict) -> None:
+    """
+    Validate that a dictionary conforms to the types defined in a TypedDict class.
+
+    Under the hood, the typed dict is converted to a strict dataclass and validated using the `@strict` decorator.
+
+    Args:
+        schema (`type[TypedDictType]`):
+            The TypedDict class defining the expected structure and types.
+        data (`dict`):
+            The dictionary to validate.
+
+    Raises:
+        `StrictDataclassFieldValidationError`:
+            If any field in the dictionary does not conform to the expected type.
+
+    Example:
+    ```py
+    >>> from typing import Annotated, TypedDict
+    >>> from huggingface_hub.dataclasses import validate_typed_dict
+
+    >>> def positive_int(value: int):
+    ...     if not value >= 0:
+    ...         raise ValueError(f"Value must be positive, got {value}")
+
+    >>> class User(TypedDict):
+    ...     name: str
+    ...     age: Annotated[int, positive_int]
+
+    >>> # Valid data
+    >>> validate_typed_dict(User, {"name": "John", "age": 30})
+
+    >>> # Invalid type for age
+    >>> validate_typed_dict(User, {"name": "John", "age": "30"})
+    huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age':
+        TypeError: Field 'age' expected int, got str (value: '30')
+
+    >>> # Invalid value for age
+    >>> validate_typed_dict(User, {"name": "John", "age": -1})
+    huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age':
+        ValueError: Value must be positive, got -1
+    ```
+    """
+    # Convert typed dict to dataclass
+    strict_cls = _build_strict_cls_from_typed_dict(schema)
+
+    # Validate the data by instantiating the strict dataclass
+    strict_cls(**data)  # will raise if validation fails
+
+
+@lru_cache
+def _build_strict_cls_from_typed_dict(schema: type[TypedDictType]) -> Type:
+    # Extract type hints from the TypedDict class
+    type_hints = {
+        # We do not use `get_type_hints` here to avoid evaluating ForwardRefs (which might fail).
+        # ForwardRefs are not validated by @strict anyway.
+        name: value if value is not None else type(None)
+        for name, value in schema.__dict__.get("__annotations__", {}).items()
+    }
+
+    # If the TypedDict is not total, wrap fields as NotRequired (unless explicitly Required or NotRequired)
+    if not getattr(schema, "__total__", True):
+        for key, value in type_hints.items():
+            origin = get_origin(value)
+
+            if origin is Annotated:
+                base, *meta = get_args(value)
+                if not _is_required_or_notrequired(base):
+                    base = NotRequired[base]
+                type_hints[key] = Annotated[tuple([base] + list(meta))]
+            elif not _is_required_or_notrequired(value):
+                type_hints[key] = NotRequired[value]
+
+    # Convert type hints to dataclass fields
+    fields = []
+    for key, value in type_hints.items():
+        if get_origin(value) is Annotated:
+            base, *meta = get_args(value)
+            fields.append((key, base, field(default=_TYPED_DICT_DEFAULT_VALUE, metadata={"validator": meta[0]})))
+        else:
+            fields.append((key, value, field(default=_TYPED_DICT_DEFAULT_VALUE)))
+
+    # Create a strict dataclass from the TypedDict fields
+    return strict(make_dataclass(schema.__name__, fields))
+
+
 def validated_field(
-    validator: Union[List[Validator_T], Validator_T],
+    validator: Union[list[Validator_T], Validator_T],
     default: Union[Any, _MISSING_TYPE] = MISSING,
     default_factory: Union[Callable[[], Any], _MISSING_TYPE] = MISSING,
     init: bool = True,
     repr: bool = True,
     hash: Optional[bool] = None,
     compare: bool = True,
-    metadata: Optional[Dict] = None,
+    metadata: Optional[dict] = None,
     **kwargs: Any,
 ) -> Any:
     """
@@ -255,7 +355,7 @@ def validated_field(
     Useful to apply several checks to a field. If only applying one rule, check out the [`as_validated_field`] decorator.
 
     Args:
-        validator (`Callable` or `List[Callable]`):
+        validator (`Callable` or `list[Callable]`):
             A method that takes a value as input and raises ValueError/TypeError if the value is invalid.
             Can be a list of validators to apply multiple checks.
         **kwargs:
@@ -297,7 +397,7 @@ def _inner(
         repr: bool = True,
         hash: Optional[bool] = None,
         compare: bool = True,
-        metadata: Optional[Dict] = None,
+        metadata: Optional[dict] = None,
         **kwargs: Any,
     ):
         return validated_field(
@@ -328,11 +428,19 @@ def type_validator(name: str, value: Any, expected_type: Any) -> None:
         _validate_simple_type(name, value, expected_type)
     elif isinstance(expected_type, ForwardRef) or isinstance(expected_type, str):
         return
+    elif origin is Required:
+        if value is _TYPED_DICT_DEFAULT_VALUE:
+            raise TypeError(f"Field '{name}' is required but missing.")
+        type_validator(name, value, args[0])
+    elif origin is NotRequired:
+        if value is _TYPED_DICT_DEFAULT_VALUE:
+            return
+        type_validator(name, value, args[0])
     else:
         raise TypeError(f"Unsupported type for field '{name}': {expected_type}")
 
 
-def _validate_union(name: str, value: Any, args: Tuple[Any, ...]) -> None:
+def _validate_union(name: str, value: Any, args: tuple[Any, ...]) -> None:
     """Validate that value matches one of the types in a Union."""
     errors = []
     for t in args:
@@ -347,14 +455,14 @@ def _validate_union(name: str, value: Any, args: Tuple[Any, ...]) -> None:
     )
 
 
-def _validate_literal(name: str, value: Any, args: Tuple[Any, ...]) -> None:
+def _validate_literal(name: str, value: Any, args: tuple[Any, ...]) -> None:
     """Validate Literal type."""
     if value not in args:
         raise TypeError(f"Field '{name}' expected one of {args}, got {value}")
 
 
-def _validate_list(name: str, value: Any, args: Tuple[Any, ...]) -> None:
-    """Validate List[T] type."""
+def _validate_list(name: str, value: Any, args: tuple[Any, ...]) -> None:
+    """Validate list[T] type."""
     if not isinstance(value, list):
         raise TypeError(f"Field '{name}' expected a list, got {type(value).__name__}")
 
@@ -367,8 +475,8 @@ def _validate_list(name: str, value: Any, args: Tuple[Any, ...]) -> None:
             raise TypeError(f"Invalid item at index {i} in list '{name}'") from e
 
 
-def _validate_dict(name: str, value: Any, args: Tuple[Any, ...]) -> None:
-    """Validate Dict[K, V] type."""
+def _validate_dict(name: str, value: Any, args: tuple[Any, ...]) -> None:
+    """Validate dict[K, V] type."""
     if not isinstance(value, dict):
         raise TypeError(f"Field '{name}' expected a dict, got {type(value).__name__}")
 
@@ -382,19 +490,19 @@ def _validate_dict(name: str, value: Any, args: Tuple[Any, ...]) -> None:
             raise TypeError(f"Invalid key or value in dict '{name}'") from e
 
 
-def _validate_tuple(name: str, value: Any, args: Tuple[Any, ...]) -> None:
+def _validate_tuple(name: str, value: Any, args: tuple[Any, ...]) -> None:
     """Validate Tuple type."""
     if not isinstance(value, tuple):
         raise TypeError(f"Field '{name}' expected a tuple, got {type(value).__name__}")
 
-    # Handle variable-length tuples: Tuple[T, ...]
+    # Handle variable-length tuples: tuple[T, ...]
     if len(args) == 2 and args[1] is Ellipsis:
         for i, item in enumerate(value):
             try:
                 type_validator(f"{name}[{i}]", item, args[0])
             except TypeError as e:
                 raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e
-    # Handle fixed-length tuples: Tuple[T1, T2, ...]
+    # Handle fixed-length tuples: tuple[T1, T2, ...]
     elif len(args) != len(value):
         raise TypeError(f"Field '{name}' expected a tuple of length {len(args)}, got {len(value)}")
     else:
@@ -405,8 +513,8 @@ def _validate_tuple(name: str, value: Any, args: Tuple[Any, ...]) -> None:
                 raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e
 
 
-def _validate_set(name: str, value: Any, args: Tuple[Any, ...]) -> None:
-    """Validate Set[T] type."""
+def _validate_set(name: str, value: Any, args: tuple[Any, ...]) -> None:
+    """Validate set[T] type."""
     if not isinstance(value, set):
         raise TypeError(f"Field '{name}' expected a set, got {type(value).__name__}")
 
@@ -464,6 +572,11 @@ def _is_validator(validator: Any) -> bool:
     return True
 
 
+def _is_required_or_notrequired(type_hint: Any) -> bool:
+    """Helper to check if a type is Required/NotRequired."""
+    return type_hint in (Required, NotRequired) or (get_origin(type_hint) in (Required, NotRequired))
+
+
 _BASIC_TYPE_VALIDATORS = {
     Union: _validate_union,
     Literal: _validate_literal,
@@ -476,6 +589,7 @@ def _is_validator(validator: Any) -> bool:
 
 __all__ = [
     "strict",
+    "validate_typed_dict",
     "validated_field",
     "Validator_T",
     "StrictDataclassClassValidationError",
diff --git a/src/huggingface_hub/errors.py b/src/huggingface_hub/errors.py
index a0f7ed80e3..f429db7cc4 100644
--- a/src/huggingface_hub/errors.py
+++ b/src/huggingface_hub/errors.py
@@ -3,7 +3,7 @@
 from pathlib import Path
 from typing import Optional, Union
 
-from requests import HTTPError, Response
+from httpx import HTTPError, Response
 
 
 # CACHE ERRORS
@@ -37,7 +37,7 @@ class OfflineModeIsEnabled(ConnectionError):
     """Raised when a request is made but `HF_HUB_OFFLINE=1` is set as environment variable."""
 
 
-class HfHubHTTPError(HTTPError):
+class HfHubHTTPError(HTTPError, OSError):
     """
     HTTPError to inherit from for any custom HTTP Error raised in HF Hub.
 
@@ -51,7 +51,7 @@ class HfHubHTTPError(HTTPError):
 
     Example:
     ```py
-        import requests
+        import httpx
         from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
 
         response = get_session().post(...)
@@ -67,19 +67,18 @@ class HfHubHTTPError(HTTPError):
     ```
     """
 
-    def __init__(self, message: str, response: Optional[Response] = None, *, server_message: Optional[str] = None):
-        self.request_id = (
-            response.headers.get("x-request-id") or response.headers.get("X-Amzn-Trace-Id")
-            if response is not None
-            else None
-        )
+    def __init__(
+        self,
+        message: str,
+        *,
+        response: Response,
+        server_message: Optional[str] = None,
+    ):
+        self.request_id = response.headers.get("x-request-id") or response.headers.get("X-Amzn-Trace-Id")
         self.server_message = server_message
-
-        super().__init__(
-            message,
-            response=response,  # type: ignore [arg-type]
-            request=response.request if response is not None else None,  # type: ignore [arg-type]
-        )
+        self.response = response
+        self.request = response.request
+        super().__init__(message)
 
     def append_to_message(self, additional_message: str) -> None:
         """Append additional information to the `HfHubHTTPError` initial message."""
@@ -161,6 +160,10 @@ class HFValidationError(ValueError):
 # FILE METADATA ERRORS
 
 
+class DryRunError(OSError):
+    """Error triggered when a dry run is requested but cannot be performed (e.g. invalid repo)."""
+
+
 class FileMetadataError(OSError):
     """Error triggered when the metadata of a file on the Hub cannot be retrieved (missing ETag or commit_hash).
 
@@ -182,7 +185,7 @@ class RepositoryNotFoundError(HfHubHTTPError):
     >>> from huggingface_hub import model_info
     >>> model_info("<non_existent_repository>")
     (...)
-    huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: PvMw_VjBMjVdMz53WKIzP)
+    huggingface_hub.errors.RepositoryNotFoundError: 401 Client Error. (Request ID: PvMw_VjBMjVdMz53WKIzP)
 
     Repository Not Found for url: https://huggingface.co/api/models/%3Cnon_existent_repository%3E.
     Please make sure you specified the correct `repo_id` and `repo_type`.
@@ -205,7 +208,7 @@ class GatedRepoError(RepositoryNotFoundError):
     >>> from huggingface_hub import model_info
     >>> model_info("<gated_repository>")
     (...)
-    huggingface_hub.utils._errors.GatedRepoError: 403 Client Error. (Request ID: ViT1Bf7O_026LGSQuVqfa)
+    huggingface_hub.errors.GatedRepoError: 403 Client Error. (Request ID: ViT1Bf7O_026LGSQuVqfa)
 
     Cannot access gated repo for url https://huggingface.co/api/models/ardent-figment/gated-model.
     Access to model ardent-figment/gated-model is restricted and you are not in the authorized list.
@@ -224,7 +227,7 @@ class DisabledRepoError(HfHubHTTPError):
     >>> from huggingface_hub import dataset_info
     >>> dataset_info("laion/laion-art")
     (...)
-    huggingface_hub.utils._errors.DisabledRepoError: 403 Client Error. (Request ID: Root=1-659fc3fa-3031673e0f92c71a2260dbe2;bc6f4dfb-b30a-4862-af0a-5cfe827610d8)
+    huggingface_hub.errors.DisabledRepoError: 403 Client Error. (Request ID: Root=1-659fc3fa-3031673e0f92c71a2260dbe2;bc6f4dfb-b30a-4862-af0a-5cfe827610d8)
 
     Cannot access repository for url https://huggingface.co/api/datasets/laion/laion-art.
     Access to this resource is disabled.
@@ -246,7 +249,7 @@ class RevisionNotFoundError(HfHubHTTPError):
     >>> from huggingface_hub import hf_hub_download
     >>> hf_hub_download('bert-base-cased', 'config.json', revision='<non-existent-revision>')
     (...)
-    huggingface_hub.utils._errors.RevisionNotFoundError: 404 Client Error. (Request ID: Mwhe_c3Kt650GcdKEFomX)
+    huggingface_hub.errors.RevisionNotFoundError: 404 Client Error. (Request ID: Mwhe_c3Kt650GcdKEFomX)
 
     Revision Not Found for url: https://huggingface.co/bert-base-cased/resolve/%3Cnon-existent-revision%3E/config.json.
     ```
@@ -254,7 +257,25 @@ class RevisionNotFoundError(HfHubHTTPError):
 
 
 # ENTRY ERRORS
-class EntryNotFoundError(HfHubHTTPError):
+class EntryNotFoundError(Exception):
+    """
+    Raised when entry not found, either locally or remotely.
+
+    Example:
+
+    ```py
+    >>> from huggingface_hub import hf_hub_download
+    >>> hf_hub_download('bert-base-cased', '<non-existent-file>')
+    (...)
+    huggingface_hub.errors.RemoteEntryNotFoundError (...)
+    >>> hf_hub_download('bert-base-cased', '<non-existent-file>', local_files_only=True)
+    (...)
+    huggingface_hub.utils.errors.LocalEntryNotFoundError (...)
+    ```
+    """
+
+
+class RemoteEntryNotFoundError(HfHubHTTPError, EntryNotFoundError):
     """
     Raised when trying to access a hf.co URL with a valid repository and revision
     but an invalid filename.
@@ -265,34 +286,30 @@ class EntryNotFoundError(HfHubHTTPError):
     >>> from huggingface_hub import hf_hub_download
     >>> hf_hub_download('bert-base-cased', '<non-existent-file>')
     (...)
-    huggingface_hub.utils._errors.EntryNotFoundError: 404 Client Error. (Request ID: 53pNl6M0MxsnG5Sw8JA6x)
+    huggingface_hub.errors.EntryNotFoundError: 404 Client Error. (Request ID: 53pNl6M0MxsnG5Sw8JA6x)
 
     Entry Not Found for url: https://huggingface.co/bert-base-cased/resolve/main/%3Cnon-existent-file%3E.
     ```
     """
 
 
-class LocalEntryNotFoundError(EntryNotFoundError, FileNotFoundError, ValueError):
+class LocalEntryNotFoundError(FileNotFoundError, EntryNotFoundError):
     """
     Raised when trying to access a file or snapshot that is not on the disk when network is
     disabled or unavailable (connection issue). The entry may exist on the Hub.
 
-    Note: `ValueError` type is to ensure backward compatibility.
-    Note: `LocalEntryNotFoundError` derives from `HTTPError` because of `EntryNotFoundError`
-          even when it is not a network issue.
-
     Example:
 
     ```py
     >>> from huggingface_hub import hf_hub_download
     >>> hf_hub_download('bert-base-cased', '<non-cached-file>',  local_files_only=True)
     (...)
-    huggingface_hub.utils._errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False.
+    huggingface_hub.errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False.
     ```
     """
 
     def __init__(self, message: str):
-        super().__init__(message, response=None)
+        super().__init__(message)
 
 
 # REQUEST ERROR
@@ -303,9 +320,9 @@ class BadRequestError(HfHubHTTPError, ValueError):
     Example:
 
     ```py
-    >>> resp = requests.post("hf.co/api/check", ...)
+    >>> resp = httpx.post("hf.co/api/check", ...)
     >>> hf_raise_for_status(resp, endpoint_name="check")
-    huggingface_hub.utils._errors.BadRequestError: Bad request for check endpoint: {details} (Request ID: XXX)
+    huggingface_hub.errors.BadRequestError: Bad request for check endpoint: {details} (Request ID: XXX)
     ```
     """
 
diff --git a/src/huggingface_hub/fastai_utils.py b/src/huggingface_hub/fastai_utils.py
index fc3b42323a..768cbf6450 100644
--- a/src/huggingface_hub/fastai_utils.py
+++ b/src/huggingface_hub/fastai_utils.py
@@ -2,7 +2,7 @@
 import os
 from pathlib import Path
 from pickle import DEFAULT_PROTOCOL, PicklingError
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Optional, Union
 
 from packaging import version
 
@@ -16,7 +16,6 @@
 )
 
 from .utils import logging, validate_hf_hub_args
-from .utils._runtime import _PY_VERSION  # noqa: F401 # for backward compatibility...
 
 
 logger = logging.get_logger(__name__)
@@ -237,7 +236,7 @@ def _create_model_pyproject(repo_dir: Path):
 def _save_pretrained_fastai(
     learner,
     save_directory: Union[str, Path],
-    config: Optional[Dict[str, Any]] = None,
+    config: Optional[dict[str, Any]] = None,
 ):
     """
     Saves a fastai learner to `save_directory` in pickle format using the default pickle protocol for the version of python used.
@@ -343,9 +342,9 @@ def push_to_hub_fastai(
     config: Optional[dict] = None,
     branch: Optional[str] = None,
     create_pr: Optional[bool] = None,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
-    delete_patterns: Optional[Union[List[str], str]] = None,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    delete_patterns: Optional[Union[list[str], str]] = None,
     api_endpoint: Optional[str] = None,
 ):
     """
@@ -378,11 +377,11 @@ def push_to_hub_fastai(
             Defaults to `False`.
         api_endpoint (`str`, *optional*):
             The API endpoint to use when pushing the model to the hub.
-        allow_patterns (`List[str]` or `str`, *optional*):
+        allow_patterns (`list[str]` or `str`, *optional*):
             If provided, only files matching at least one pattern are pushed.
-        ignore_patterns (`List[str]` or `str`, *optional*):
+        ignore_patterns (`list[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not pushed.
-        delete_patterns (`List[str]` or `str`, *optional*):
+        delete_patterns (`list[str]` or `str`, *optional*):
             If provided, remote files matching any of the patterns will be deleted from the repo.
 
     Returns:
diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py
index 2cea2da453..5b835ba0f3 100644
--- a/src/huggingface_hub/file_download.py
+++ b/src/huggingface_hub/file_download.py
@@ -1,6 +1,5 @@
 import copy
 import errno
-import inspect
 import os
 import re
 import shutil
@@ -10,26 +9,19 @@
 import warnings
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union
+from typing import Any, BinaryIO, Literal, NoReturn, Optional, Union, overload
 from urllib.parse import quote, urlparse
 
-import requests
+import httpx
 
-from . import (
-    __version__,  # noqa: F401 # for backward compatibility
-    constants,
-)
+from . import constants
 from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
-from .constants import (
-    HUGGINGFACE_CO_URL_TEMPLATE,  # noqa: F401 # for backward compatibility
-    HUGGINGFACE_HUB_CACHE,  # noqa: F401 # for backward compatibility
-)
 from .errors import (
-    EntryNotFoundError,
     FileMetadataError,
     GatedRepoError,
     HfHubHTTPError,
     LocalEntryNotFoundError,
+    RemoteEntryNotFoundError,
     RepositoryNotFoundError,
     RevisionNotFoundError,
 )
@@ -39,30 +31,15 @@
     WeakFileLock,
     XetFileData,
     build_hf_headers,
-    get_fastai_version,  # noqa: F401 # for backward compatibility
-    get_fastcore_version,  # noqa: F401 # for backward compatibility
-    get_graphviz_version,  # noqa: F401 # for backward compatibility
-    get_jinja_version,  # noqa: F401 # for backward compatibility
-    get_pydot_version,  # noqa: F401 # for backward compatibility
-    get_tf_version,  # noqa: F401 # for backward compatibility
-    get_torch_version,  # noqa: F401 # for backward compatibility
     hf_raise_for_status,
-    is_fastai_available,  # noqa: F401 # for backward compatibility
-    is_fastcore_available,  # noqa: F401 # for backward compatibility
-    is_graphviz_available,  # noqa: F401 # for backward compatibility
-    is_jinja_available,  # noqa: F401 # for backward compatibility
-    is_pydot_available,  # noqa: F401 # for backward compatibility
-    is_tf_available,  # noqa: F401 # for backward compatibility
-    is_torch_available,  # noqa: F401 # for backward compatibility
     logging,
     parse_xet_file_data_from_response,
     refresh_xet_connection_info,
-    reset_sessions,
     tqdm,
     validate_hf_hub_args,
 )
-from .utils._http import _adjust_range_header, http_backoff
-from .utils._runtime import _PY_VERSION, is_xet_available  # noqa: F401 # for backward compatibility
+from .utils._http import _adjust_range_header, http_backoff, http_stream_backoff
+from .utils._runtime import is_xet_available
 from .utils._typing import HTTP_METHOD_T
 from .utils.sha import sha_fileobj
 from .utils.tqdm import _get_progress_bar_context
@@ -83,7 +60,7 @@
 # Regex to check if the file etag IS a valid sha256
 REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
 
-_are_symlinks_supported_in_dir: Dict[str, bool] = {}
+_are_symlinks_supported_in_dir: dict[str, bool] = {}
 
 
 def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
@@ -172,6 +149,34 @@ class HfFileMetadata:
     xet_file_data: Optional[XetFileData]
 
 
+@dataclass
+class DryRunFileInfo:
+    """Information returned when performing a dry run of a file download.
+
+    Returned by [`hf_hub_download`] when `dry_run=True`.
+
+    Args:
+        commit_hash (`str`):
+            The commit_hash related to the file.
+        file_size (`int`):
+            Size of the file. In case of an LFS file, contains the size of the actual LFS file, not the pointer.
+        filename (`str`):
+            Name of the file in the repo.
+        is_cached (`bool`):
+            Whether the file is already cached locally.
+        will_download (`bool`):
+            Whether the file will be downloaded if `hf_hub_download` is called with `dry_run=False`.
+            In practice, will_download is `True` if the file is not cached or if `force_download=True`.
+    """
+
+    commit_hash: str
+    file_size: int
+    filename: str
+    local_path: str
+    is_cached: bool
+    will_download: bool
+
+
 @validate_hf_hub_args
 def hf_hub_url(
     repo_id: str,
@@ -249,7 +254,7 @@ def hf_hub_url(
 
     if revision is None:
         revision = constants.DEFAULT_REVISION
-    url = HUGGINGFACE_CO_URL_TEMPLATE.format(
+    url = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
         repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
     )
     # Update endpoint if provided
@@ -258,11 +263,10 @@ def hf_hub_url(
     return url
 
 
-def _request_wrapper(
-    method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
-) -> requests.Response:
-    """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
-    `allow_redirection=False`.
+def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kwargs) -> httpx.Response:
+    """Perform an HTTP request with backoff and follow relative redirects only.
+
+    This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
 
     A backoff mechanism retries the HTTP call on 5xx errors and network errors.
 
@@ -271,44 +275,36 @@ def _request_wrapper(
             HTTP method, such as 'GET' or 'HEAD'.
         url (`str`):
             The URL of the resource to fetch.
-        follow_relative_redirects (`bool`, *optional*, defaults to `False`)
-            If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
-            kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
-            following redirection to a CDN.
-        **params (`dict`, *optional*):
-            Params to pass to `requests.request`.
+        **httpx_kwargs (`dict`, *optional*):
+            Params to pass to `httpx.request`.
     """
-    # Recursively follow relative redirects
-    if follow_relative_redirects:
-        response = _request_wrapper(
+    while True:
+        # Make the request
+        response = http_backoff(
             method=method,
             url=url,
-            follow_relative_redirects=False,
-            **params,
+            **httpx_kwargs,
+            follow_redirects=False,
+            retry_on_exceptions=(),
+            retry_on_status_codes=(429,),
         )
+        hf_raise_for_status(response)
 
-        # If redirection, we redirect only relative paths.
-        # This is useful in case of a renamed repository.
+        # Check if response is a relative redirect
         if 300 <= response.status_code <= 399:
             parsed_target = urlparse(response.headers["Location"])
             if parsed_target.netloc == "":
-                # This means it is a relative 'location' headers, as allowed by RFC 7231.
-                # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
-                # We want to follow this relative redirect !
-                #
-                # Highly inspired by `resolve_redirects` from requests library.
-                # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
-                next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
-                return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
-        return response
-
-    # Perform request and return if status_code is not in the retry list.
-    response = http_backoff(method=method, url=url, **params)
-    hf_raise_for_status(response)
+                # Relative redirect -> update URL and retry
+                url = urlparse(url)._replace(path=parsed_target.path).geturl()
+                continue
+
+        # Break if no relative redirect
+        break
+
     return response
 
 
-def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]:
+def _get_file_length_from_http_response(response: httpx.Response) -> Optional[int]:
     """
     Get the length of the file from the HTTP response headers.
 
@@ -316,7 +312,7 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
     `Content-Range` or `Content-Length` header, if available (in that order).
 
     Args:
-        response (`requests.Response`):
+        response (`httpx.Response`):
             The HTTP response object.
 
     Returns:
@@ -343,13 +339,13 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
     return None
 
 
+@validate_hf_hub_args
 def http_get(
     url: str,
     temp_file: BinaryIO,
     *,
-    proxies: Optional[Dict] = None,
     resume_size: int = 0,
-    headers: Optional[Dict[str, Any]] = None,
+    headers: Optional[dict[str, Any]] = None,
     expected_size: Optional[int] = None,
     displayed_filename: Optional[str] = None,
     _nb_retries: int = 5,
@@ -367,8 +363,6 @@ def http_get(
             The URL of the file to download.
         temp_file (`BinaryIO`):
             The file-like object where to save the file.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
         resume_size (`int`, *optional*):
             The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a
             positive number, the download will resume at the given position.
@@ -390,8 +384,6 @@ def http_get(
     if constants.HF_HUB_ENABLE_HF_TRANSFER:
         if resume_size != 0:
             warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
-        elif proxies is not None:
-            warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
         elif has_custom_range_header:
             warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
         else:
@@ -420,103 +412,97 @@ def http_get(
                 " Try `pip install hf_transfer` or `pip install hf_xet`."
             )
 
-    r = _request_wrapper(
-        method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
-    )
-
-    hf_raise_for_status(r)
-    total: Optional[int] = _get_file_length_from_http_response(r)
-
-    if displayed_filename is None:
-        displayed_filename = url
-        content_disposition = r.headers.get("Content-Disposition")
-        if content_disposition is not None:
-            match = HEADER_FILENAME_PATTERN.search(content_disposition)
-            if match is not None:
-                # Means file is on CDN
-                displayed_filename = match.groupdict()["filename"]
-
-    # Truncate filename if too long to display
-    if len(displayed_filename) > 40:
-        displayed_filename = f"(…){displayed_filename[-40:]}"
+    with http_stream_backoff(
+        method="GET",
+        url=url,
+        headers=headers,
+        timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
+        retry_on_exceptions=(),
+        retry_on_status_codes=(429,),
+    ) as response:
+        hf_raise_for_status(response)
+        total: Optional[int] = _get_file_length_from_http_response(response)
+
+        if displayed_filename is None:
+            displayed_filename = url
+            content_disposition = response.headers.get("Content-Disposition")
+            if content_disposition is not None:
+                match = HEADER_FILENAME_PATTERN.search(content_disposition)
+                if match is not None:
+                    # Means file is on CDN
+                    displayed_filename = match.groupdict()["filename"]
+
+        # Truncate filename if too long to display
+        if len(displayed_filename) > 40:
+            displayed_filename = f"(…){displayed_filename[-40:]}"
+
+        consistency_error_message = (
+            f"Consistency check failed: file should be of size {expected_size} but has size"
+            f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
+            " Please retry with `force_download=True`."
+        )
+        progress_cm = _get_progress_bar_context(
+            desc=displayed_filename,
+            log_level=logger.getEffectiveLevel(),
+            total=total,
+            initial=resume_size,
+            name="huggingface_hub.http_get",
+            _tqdm_bar=_tqdm_bar,
+        )
 
-    consistency_error_message = (
-        f"Consistency check failed: file should be of size {expected_size} but has size"
-        f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
-        " Please retry with `force_download=True`."
-    )
-    progress_cm = _get_progress_bar_context(
-        desc=displayed_filename,
-        log_level=logger.getEffectiveLevel(),
-        total=total,
-        initial=resume_size,
-        name="huggingface_hub.http_get",
-        _tqdm_bar=_tqdm_bar,
-    )
+        with progress_cm as progress:
+            if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
+                try:
+                    hf_transfer.download(
+                        url=url,
+                        filename=temp_file.name,
+                        max_files=constants.HF_TRANSFER_CONCURRENCY,
+                        chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
+                        headers=initial_headers,
+                        parallel_failures=3,
+                        max_retries=5,
+                        callback=progress.update,
+                    )
+                except Exception as e:
+                    raise RuntimeError(
+                        "An error occurred while downloading using `hf_transfer`. Consider"
+                        " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
+                    ) from e
+                if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
+                    raise EnvironmentError(
+                        consistency_error_message.format(
+                            actual_size=os.path.getsize(temp_file.name),
+                        )
+                    )
+                return
 
-    with progress_cm as progress:
-        if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
-            supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
-            if not supports_callback:
-                warnings.warn(
-                    "You are using an outdated version of `hf_transfer`. "
-                    "Consider upgrading to latest version to enable progress bars "
-                    "using `pip install -U hf_transfer`."
-                )
+            new_resume_size = resume_size
             try:
-                hf_transfer.download(
+                for chunk in response.iter_bytes(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
+                    if chunk:  # filter out keep-alive new chunks
+                        progress.update(len(chunk))
+                        temp_file.write(chunk)
+                        new_resume_size += len(chunk)
+                        # Some data has been downloaded from the server so we reset the number of retries.
+                        _nb_retries = 5
+            except (httpx.ConnectError, httpx.TimeoutException) as e:
+                # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
+                # a transient error (network outage?). We log a warning message and try to resume the download a few times
+                # before giving up. Tre retry mechanism is basic but should be enough in most cases.
+                if _nb_retries <= 0:
+                    logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
+                    raise
+                logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
+                time.sleep(1)
+                return http_get(
                     url=url,
-                    filename=temp_file.name,
-                    max_files=constants.HF_TRANSFER_CONCURRENCY,
-                    chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
+                    temp_file=temp_file,
+                    resume_size=new_resume_size,
                     headers=initial_headers,
-                    parallel_failures=3,
-                    max_retries=5,
-                    **({"callback": progress.update} if supports_callback else {}),
-                )
-            except Exception as e:
-                raise RuntimeError(
-                    "An error occurred while downloading using `hf_transfer`. Consider"
-                    " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
-                ) from e
-            if not supports_callback:
-                progress.update(total)
-            if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
-                raise EnvironmentError(
-                    consistency_error_message.format(
-                        actual_size=os.path.getsize(temp_file.name),
-                    )
+                    expected_size=expected_size,
+                    _nb_retries=_nb_retries - 1,
+                    _tqdm_bar=_tqdm_bar,
                 )
-            return
-        new_resume_size = resume_size
-        try:
-            for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
-                if chunk:  # filter out keep-alive new chunks
-                    progress.update(len(chunk))
-                    temp_file.write(chunk)
-                    new_resume_size += len(chunk)
-                    # Some data has been downloaded from the server so we reset the number of retries.
-                    _nb_retries = 5
-        except (requests.ConnectionError, requests.ReadTimeout) as e:
-            # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
-            # a transient error (network outage?). We log a warning message and try to resume the download a few times
-            # before giving up. Tre retry mechanism is basic but should be enough in most cases.
-            if _nb_retries <= 0:
-                logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
-                raise
-            logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
-            time.sleep(1)
-            reset_sessions()  # In case of SSLError it's best to reset the shared requests.Session objects
-            return http_get(
-                url=url,
-                temp_file=temp_file,
-                proxies=proxies,
-                resume_size=new_resume_size,
-                headers=initial_headers,
-                expected_size=expected_size,
-                _nb_retries=_nb_retries - 1,
-                _tqdm_bar=_tqdm_bar,
-            )
 
     if expected_size is not None and expected_size != temp_file.tell():
         raise EnvironmentError(
@@ -530,7 +516,7 @@ def xet_get(
     *,
     incomplete_path: Path,
     xet_file_data: XetFileData,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     expected_size: Optional[int] = None,
     displayed_filename: Optional[str] = None,
     _tqdm_bar: Optional[tqdm] = None,
@@ -543,7 +529,7 @@ def xet_get(
             The path to the file to download.
         xet_file_data (`XetFileData`):
             The file metadata needed to make the request to the xet storage service.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             The headers to send to the xet storage service.
         expected_size (`int`, *optional*):
             The expected size of the file to download. If set, the download will raise an error if the size of the
@@ -590,7 +576,7 @@ def xet_get(
 
     connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
 
-    def token_refresher() -> Tuple[str, int]:
+    def token_refresher() -> tuple[str, int]:
         connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
         if connection_info is None:
             raise ValueError("Failed to refresh token using xet metadata.")
@@ -805,6 +791,75 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
             pass
 
 
+@overload
+def hf_hub_download(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    user_agent: Union[dict, str, None] = None,
+    force_download: bool = False,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[False] = False,
+) -> str: ...
+
+
+@overload
+def hf_hub_download(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    user_agent: Union[dict, str, None] = None,
+    force_download: bool = False,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[True] = True,
+) -> DryRunFileInfo: ...
+
+
+@overload
+def hf_hub_download(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    user_agent: Union[dict, str, None] = None,
+    force_download: bool = False,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: bool = False,
+) -> Union[str, DryRunFileInfo]: ...
+
+
 @validate_hf_hub_args
 def hf_hub_download(
     repo_id: str,
@@ -817,18 +872,15 @@ def hf_hub_download(
     library_version: Optional[str] = None,
     cache_dir: Union[str, Path, None] = None,
     local_dir: Union[str, Path, None] = None,
-    user_agent: Union[Dict, str, None] = None,
+    user_agent: Union[dict, str, None] = None,
     force_download: bool = False,
-    proxies: Optional[Dict] = None,
     etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
     token: Union[bool, str, None] = None,
     local_files_only: bool = False,
-    headers: Optional[Dict[str, str]] = None,
+    headers: Optional[dict[str, str]] = None,
     endpoint: Optional[str] = None,
-    resume_download: Optional[bool] = None,
-    force_filename: Optional[str] = None,
-    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
-) -> str:
+    dry_run: bool = False,
+) -> Union[str, DryRunFileInfo]:
     """Download a given file if it's not already present in the local cache.
 
     The new cache file layout looks like this:
@@ -890,9 +942,6 @@ def hf_hub_download(
         force_download (`bool`, *optional*, defaults to `False`):
             Whether the file should be downloaded even if it already exists in
             the local cache.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to
-            `requests.request`.
         etag_timeout (`float`, *optional*, defaults to `10`):
             When fetching ETag, how many seconds to wait for the server to send
             data before giving up which is passed to `requests.request`.
@@ -906,9 +955,14 @@ def hf_hub_download(
             local cached file if it exists.
         headers (`dict`, *optional*):
             Additional headers to be sent with the request.
+        dry_run (`bool`, *optional*, defaults to `False`):
+            If `True`, perform a dry run without actually downloading the file. Returns a
+            [`DryRunFileInfo`] object containing information about what would be downloaded.
 
     Returns:
-        `str`: Local path of file or if networking is off, last version of file cached on disk.
+        `str` or [`DryRunFileInfo`]:
+            - If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
+            - If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
 
     Raises:
         [`~utils.RepositoryNotFoundError`]
@@ -916,7 +970,7 @@ def hf_hub_download(
             or because it is set to `private` and you do not have access.
         [`~utils.RevisionNotFoundError`]
             If the revision to download from cannot be found.
-        [`~utils.EntryNotFoundError`]
+        [`~utils.RemoteEntryNotFoundError`]
             If the file to download cannot be found.
         [`~utils.LocalEntryNotFoundError`]
             If network is disabled or unavailable and file is not found in cache.
@@ -932,20 +986,6 @@ def hf_hub_download(
         # Respect environment variable above user value
         etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
 
-    if force_filename is not None:
-        warnings.warn(
-            "The `force_filename` parameter is deprecated as a new caching system, "
-            "which keeps the filenames as they are on the Hub, is now in place.",
-            FutureWarning,
-        )
-    if resume_download is not None:
-        warnings.warn(
-            "`resume_download` is deprecated and will be removed in version 1.0.0. "
-            "Downloads always resume when possible. "
-            "If you want to force a new download, use `force_download=True`.",
-            FutureWarning,
-        )
-
     if cache_dir is None:
         cache_dir = constants.HF_HUB_CACHE
     if revision is None:
@@ -975,15 +1015,6 @@ def hf_hub_download(
     )
 
     if local_dir is not None:
-        if local_dir_use_symlinks != "auto":
-            warnings.warn(
-                "`local_dir_use_symlinks` parameter is deprecated and will be ignored. "
-                "The process to download files to a local folder has been updated and do "
-                "not rely on symlinks anymore. You only need to pass a destination folder "
-                "as`local_dir`.\n"
-                "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder."
-            )
-
         return _hf_hub_download_to_local_dir(
             # Destination
             local_dir=local_dir,
@@ -996,12 +1027,12 @@ def hf_hub_download(
             endpoint=endpoint,
             etag_timeout=etag_timeout,
             headers=hf_headers,
-            proxies=proxies,
             token=token,
             # Additional options
             cache_dir=cache_dir,
             force_download=force_download,
             local_files_only=local_files_only,
+            dry_run=dry_run,
         )
     else:
         return _hf_hub_download_to_cache_dir(
@@ -1016,11 +1047,11 @@ def hf_hub_download(
             endpoint=endpoint,
             etag_timeout=etag_timeout,
             headers=hf_headers,
-            proxies=proxies,
             token=token,
             # Additional options
             local_files_only=local_files_only,
             force_download=force_download,
+            dry_run=dry_run,
         )
 
 
@@ -1036,13 +1067,13 @@ def _hf_hub_download_to_cache_dir(
     # HTTP info
     endpoint: Optional[str],
     etag_timeout: float,
-    headers: Dict[str, str],
-    proxies: Optional[Dict],
+    headers: dict[str, str],
     token: Optional[Union[bool, str]],
     # Additional options
     local_files_only: bool,
     force_download: bool,
-) -> str:
+    dry_run: bool,
+) -> Union[str, DryRunFileInfo]:
     """Download a given file to a cache folder, if not already present.
 
     Method should not be called directly. Please use `hf_hub_download` instead.
@@ -1062,8 +1093,18 @@ def _hf_hub_download_to_cache_dir(
     # if user provides a commit_hash and they already have the file on disk, shortcut everything.
     if REGEX_COMMIT_HASH.match(revision):
         pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
-        if os.path.exists(pointer_path) and not force_download:
-            return pointer_path
+        if os.path.exists(pointer_path):
+            if dry_run:
+                return DryRunFileInfo(
+                    commit_hash=revision,
+                    file_size=os.path.getsize(pointer_path),
+                    filename=filename,
+                    is_cached=True,
+                    local_path=pointer_path,
+                    will_download=force_download,
+                )
+            if not force_download:
+                return pointer_path
 
     # Try to get metadata (etag, commit_hash, url, size) from the server.
     # If we can't, a HEAD request error is returned.
@@ -1073,7 +1114,6 @@ def _hf_hub_download_to_cache_dir(
         repo_type=repo_type,
         revision=revision,
         endpoint=endpoint,
-        proxies=proxies,
         etag_timeout=etag_timeout,
         headers=headers,
         token=token,
@@ -1107,8 +1147,18 @@ def _hf_hub_download_to_cache_dir(
             # Return pointer file if exists
             if commit_hash is not None:
                 pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
-                if os.path.exists(pointer_path) and not force_download:
-                    return pointer_path
+                if os.path.exists(pointer_path):
+                    if dry_run:
+                        return DryRunFileInfo(
+                            commit_hash=commit_hash,
+                            file_size=os.path.getsize(pointer_path),
+                            filename=filename,
+                            is_cached=True,
+                            local_path=pointer_path,
+                            will_download=force_download,
+                        )
+                    if not force_download:
+                        return pointer_path
 
         # Otherwise, raise appropriate error
         _raise_on_head_call_error(head_call_error, force_download, local_files_only)
@@ -1121,6 +1171,17 @@ def _hf_hub_download_to_cache_dir(
     blob_path = os.path.join(storage_folder, "blobs", etag)
     pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
 
+    if dry_run:
+        is_cached = os.path.exists(pointer_path) or os.path.exists(blob_path)
+        return DryRunFileInfo(
+            commit_hash=commit_hash,
+            file_size=expected_size,
+            filename=filename,
+            is_cached=is_cached,
+            local_path=pointer_path,
+            will_download=force_download or not is_cached,
+        )
+
     os.makedirs(os.path.dirname(blob_path), exist_ok=True)
     os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
 
@@ -1169,7 +1230,6 @@ def _hf_hub_download_to_cache_dir(
             incomplete_path=Path(blob_path + ".incomplete"),
             destination_path=Path(blob_path),
             url_to_download=url_to_download,
-            proxies=proxies,
             headers=headers,
             expected_size=expected_size,
             filename=filename,
@@ -1195,14 +1255,14 @@ def _hf_hub_download_to_local_dir(
     # HTTP info
     endpoint: Optional[str],
     etag_timeout: float,
-    headers: Dict[str, str],
-    proxies: Optional[Dict],
+    headers: dict[str, str],
     token: Union[bool, str, None],
     # Additional options
     cache_dir: str,
     force_download: bool,
     local_files_only: bool,
-) -> str:
+    dry_run: bool,
+) -> Union[str, DryRunFileInfo]:
     """Download a given file to a local folder, if not already present.
 
     Method should not be called directly. Please use `hf_hub_download` instead.
@@ -1217,13 +1277,23 @@ def _hf_hub_download_to_local_dir(
 
     # Local file exists + metadata exists + commit_hash matches => return file
     if (
-        not force_download
-        and REGEX_COMMIT_HASH.match(revision)
+        REGEX_COMMIT_HASH.match(revision)
         and paths.file_path.is_file()
         and local_metadata is not None
         and local_metadata.commit_hash == revision
     ):
-        return str(paths.file_path)
+        local_file = str(paths.file_path)
+        if dry_run:
+            return DryRunFileInfo(
+                commit_hash=revision,
+                file_size=os.path.getsize(local_file),
+                filename=filename,
+                is_cached=True,
+                local_path=local_file,
+                will_download=force_download,
+            )
+        if not force_download:
+            return local_file
 
     # Local file doesn't exist or commit_hash doesn't match => we need the etag
     (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
@@ -1232,7 +1302,6 @@ def _hf_hub_download_to_local_dir(
         repo_type=repo_type,
         revision=revision,
         endpoint=endpoint,
-        proxies=proxies,
         etag_timeout=etag_timeout,
         headers=headers,
         token=token,
@@ -1241,11 +1310,24 @@ def _hf_hub_download_to_local_dir(
 
     if head_call_error is not None:
         # No HEAD call but local file exists => default to local file
-        if not force_download and paths.file_path.is_file():
-            logger.warning(
-                f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
-            )
-            return str(paths.file_path)
+        if paths.file_path.is_file():
+            if dry_run or not force_download:
+                logger.warning(
+                    f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
+                )
+            local_path = str(paths.file_path)
+            if dry_run and local_metadata is not None:
+                return DryRunFileInfo(
+                    commit_hash=local_metadata.commit_hash,
+                    file_size=os.path.getsize(local_path),
+                    filename=filename,
+                    is_cached=True,
+                    local_path=local_path,
+                    will_download=force_download,
+                )
+            if not force_download:
+                return local_path
+
         # Otherwise => raise
         _raise_on_head_call_error(head_call_error, force_download, local_files_only)
 
@@ -1260,6 +1342,15 @@ def _hf_hub_download_to_local_dir(
         # etag matches => update metadata and return file
         if local_metadata is not None and local_metadata.etag == etag:
             write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+            if dry_run:
+                return DryRunFileInfo(
+                    commit_hash=commit_hash,
+                    file_size=expected_size,
+                    filename=filename,
+                    is_cached=True,
+                    local_path=str(paths.file_path),
+                    will_download=False,
+                )
             return str(paths.file_path)
 
         # metadata is outdated + etag is a sha256
@@ -1271,6 +1362,15 @@ def _hf_hub_download_to_local_dir(
                 file_hash = sha_fileobj(f).hex()
             if file_hash == etag:
                 write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+                if dry_run:
+                    return DryRunFileInfo(
+                        commit_hash=commit_hash,
+                        file_size=expected_size,
+                        filename=filename,
+                        is_cached=True,
+                        local_path=str(paths.file_path),
+                        will_download=False,
+                    )
                 return str(paths.file_path)
 
     # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
@@ -1289,8 +1389,28 @@ def _hf_hub_download_to_local_dir(
                 paths.file_path.parent.mkdir(parents=True, exist_ok=True)
                 shutil.copyfile(cached_path, paths.file_path)
             write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+            if dry_run:
+                return DryRunFileInfo(
+                    commit_hash=commit_hash,
+                    file_size=expected_size,
+                    filename=filename,
+                    is_cached=True,
+                    local_path=str(paths.file_path),
+                    will_download=False,
+                )
             return str(paths.file_path)
 
+    if dry_run:
+        is_cached = paths.file_path.is_file()
+        return DryRunFileInfo(
+            commit_hash=commit_hash,
+            file_size=expected_size,
+            filename=filename,
+            is_cached=is_cached,
+            local_path=str(paths.file_path),
+            will_download=force_download or not is_cached,
+        )
+
     # Otherwise, let's download the file!
     with WeakFileLock(paths.lock_path):
         paths.file_path.unlink(missing_ok=True)  # delete outdated file first
@@ -1298,7 +1418,6 @@ def _hf_hub_download_to_local_dir(
             incomplete_path=paths.incomplete_path(etag),
             destination_path=paths.file_path,
             url_to_download=url_to_download,
-            proxies=proxies,
             headers=headers,
             expected_size=expected_size,
             filename=filename,
@@ -1408,12 +1527,11 @@ def try_to_load_from_cache(
 def get_hf_file_metadata(
     url: str,
     token: Union[bool, str, None] = None,
-    proxies: Optional[Dict] = None,
     timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Union[Dict, str, None] = None,
-    headers: Optional[Dict[str, str]] = None,
+    user_agent: Union[dict, str, None] = None,
+    headers: Optional[dict[str, str]] = None,
     endpoint: Optional[str] = None,
 ) -> HfFileMetadata:
     """Fetch metadata of a file versioned on the Hub for a given url.
@@ -1427,9 +1545,6 @@ def get_hf_file_metadata(
                   folder.
                 - If `False` or `None`, no token is provided.
                 - If a string, it's used as the authentication token.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to
-            `requests.request`.
         timeout (`float`, *optional*, defaults to 10):
             How many seconds to wait for the server to send metadata before giving up.
         library_name (`str`, *optional*):
@@ -1457,31 +1572,23 @@ def get_hf_file_metadata(
     hf_headers["Accept-Encoding"] = "identity"  # prevent any compression => we want to know the real size of the file
 
     # Retrieve metadata
-    r = _request_wrapper(
-        method="HEAD",
-        url=url,
-        headers=hf_headers,
-        allow_redirects=False,
-        follow_relative_redirects=True,
-        proxies=proxies,
-        timeout=timeout,
-    )
-    hf_raise_for_status(r)
+    response = _httpx_follow_relative_redirects(method="HEAD", url=url, headers=hf_headers, timeout=timeout)
+    hf_raise_for_status(response)
 
     # Return
     return HfFileMetadata(
-        commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
-        # We favor a custom header indicating the etag of the linked resource, and
-        # we fallback to the regular etag header.
-        etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
+        commit_hash=response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
+        # We favor a custom header indicating the etag of the linked resource, and we fallback to the regular etag header.
+        etag=_normalize_etag(
+            response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or response.headers.get("ETag")
+        ),
         # Either from response headers (if redirected) or defaults to request url
-        # Do not use directly `url`, as `_request_wrapper` might have followed relative
-        # redirects.
-        location=r.headers.get("Location") or r.request.url,  # type: ignore
+        # Do not use directly `url` as we might have followed relative redirects.
+        location=response.headers.get("Location") or str(response.request.url),  # type: ignore
         size=_int_or_none(
-            r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
+            response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or response.headers.get("Content-Length")
         ),
-        xet_file_data=parse_xet_file_data_from_response(r, endpoint=endpoint),  # type: ignore
+        xet_file_data=parse_xet_file_data_from_response(response, endpoint=endpoint),  # type: ignore
     )
 
 
@@ -1492,19 +1599,18 @@ def _get_metadata_or_catch_error(
     repo_type: str,
     revision: str,
     endpoint: Optional[str],
-    proxies: Optional[Dict],
     etag_timeout: Optional[float],
-    headers: Dict[str, str],  # mutated inplace!
+    headers: dict[str, str],  # mutated inplace!
     token: Union[bool, str, None],
     local_files_only: bool,
     relative_filename: Optional[str] = None,  # only used to store `.no_exists` in cache
     storage_folder: Optional[str] = None,  # only used to store `.no_exists` in cache
 ) -> Union[
     # Either an exception is caught and returned
-    Tuple[None, None, None, None, None, Exception],
+    tuple[None, None, None, None, None, Exception],
     # Or the metadata is returned as
     # `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)`
-    Tuple[str, str, str, int, Optional[XetFileData], None],
+    tuple[str, str, str, int, Optional[XetFileData], None],
 ]:
     """Get metadata for a file on the Hub, safely handling network issues.
 
@@ -1541,9 +1647,9 @@ def _get_metadata_or_catch_error(
         try:
             try:
                 metadata = get_hf_file_metadata(
-                    url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
+                    url=url, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
                 )
-            except EntryNotFoundError as http_error:
+            except RemoteEntryNotFoundError as http_error:
                 if storage_folder is not None and relative_filename is not None:
                     # Cache the non-existence of the file
                     commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
@@ -1594,21 +1700,17 @@ def _get_metadata_or_catch_error(
                 if urlparse(url).netloc != urlparse(metadata.location).netloc:
                     # Remove authorization header when downloading a LFS blob
                     headers.pop("authorization", None)
-        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
-            # Actually raise for those subclasses of ConnectionError
+        except httpx.ProxyError:
+            # Actually raise on proxy error
             raise
-        except (
-            requests.exceptions.ConnectionError,
-            requests.exceptions.Timeout,
-            OfflineModeIsEnabled,
-        ) as error:
+        except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
             # Otherwise, our Internet connection is down.
             # etag is None
             head_error_call = error
-        except (RevisionNotFoundError, EntryNotFoundError):
+        except (RevisionNotFoundError, RemoteEntryNotFoundError):
             # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
             raise
-        except requests.HTTPError as error:
+        except HfHubHTTPError as error:
             # Multiple reasons for an http error:
             # - Repository is private and invalid/missing token sent
             # - Repository is gated and invalid/missing token sent
@@ -1666,8 +1768,7 @@ def _download_to_tmp_and_move(
     incomplete_path: Path,
     destination_path: Path,
     url_to_download: str,
-    proxies: Optional[Dict],
-    headers: Dict[str, str],
+    headers: dict[str, str],
     expected_size: Optional[int],
     filename: str,
     force_download: bool,
@@ -1691,14 +1792,14 @@ def _download_to_tmp_and_move(
         # Do nothing if already exists (except if force_download=True)
         return
 
-    if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
+    if incomplete_path.exists() and (force_download or constants.HF_HUB_ENABLE_HF_TRANSFER):
         # By default, we will try to resume the download if possible.
         # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
         # not resume the download => delete the incomplete file.
         message = f"Removing incomplete file '{incomplete_path}'"
         if force_download:
             message += " (force_download=True)"
-        elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
+        elif constants.HF_HUB_ENABLE_HF_TRANSFER:
             message += " (hf_transfer=True)"
         logger.info(message)
         incomplete_path.unlink(missing_ok=True)
@@ -1735,7 +1836,6 @@ def _download_to_tmp_and_move(
             http_get(
                 url_to_download,
                 f,
-                proxies=proxies,
                 resume_size=resume_size,
                 headers=headers,
                 expected_size=expected_size,
diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py
index 600ee9aa4a..097d8673a6 100644
--- a/src/huggingface_hub/hf_api.py
+++ b/src/huggingface_hub/hf_api.py
@@ -33,22 +33,17 @@
     Any,
     BinaryIO,
     Callable,
-    Dict,
     Iterable,
     Iterator,
-    List,
     Literal,
     Optional,
-    Tuple,
-    Type,
     TypeVar,
     Union,
     overload,
 )
 from urllib.parse import quote
 
-import requests
-from requests.exceptions import HTTPError
+import httpx
 from tqdm.auto import tqdm as base_tqdm
 from tqdm.contrib.concurrent import thread_map
 
@@ -76,41 +71,18 @@
     DiscussionWithDetails,
     deserialize_event,
 )
-from .constants import (
-    DEFAULT_ETAG_TIMEOUT,  # noqa: F401 # kept for backward compatibility
-    DEFAULT_REQUEST_TIMEOUT,  # noqa: F401 # kept for backward compatibility
-    DEFAULT_REVISION,  # noqa: F401 # kept for backward compatibility
-    DISCUSSION_STATUS,  # noqa: F401 # kept for backward compatibility
-    DISCUSSION_TYPES,  # noqa: F401 # kept for backward compatibility
-    ENDPOINT,  # noqa: F401 # kept for backward compatibility
-    INFERENCE_ENDPOINTS_ENDPOINT,  # noqa: F401 # kept for backward compatibility
-    REGEX_COMMIT_OID,  # noqa: F401 # kept for backward compatibility
-    REPO_TYPE_MODEL,  # noqa: F401 # kept for backward compatibility
-    REPO_TYPES,  # noqa: F401 # kept for backward compatibility
-    REPO_TYPES_MAPPING,  # noqa: F401 # kept for backward compatibility
-    REPO_TYPES_URL_PREFIXES,  # noqa: F401 # kept for backward compatibility
-    SAFETENSORS_INDEX_FILE,  # noqa: F401 # kept for backward compatibility
-    SAFETENSORS_MAX_HEADER_LENGTH,  # noqa: F401 # kept for backward compatibility
-    SAFETENSORS_SINGLE_FILE,  # noqa: F401 # kept for backward compatibility
-    SPACES_SDK_TYPES,  # noqa: F401 # kept for backward compatibility
-    WEBHOOK_DOMAIN_T,  # noqa: F401 # kept for backward compatibility
-    DiscussionStatusFilter,  # noqa: F401 # kept for backward compatibility
-    DiscussionTypeFilter,  # noqa: F401 # kept for backward compatibility
-)
 from .errors import (
     BadRequestError,
-    EntryNotFoundError,
     GatedRepoError,
     HfHubHTTPError,
+    RemoteEntryNotFoundError,
     RepositoryNotFoundError,
     RevisionNotFoundError,
 )
-from .file_download import HfFileMetadata, get_hf_file_metadata, hf_hub_url
+from .file_download import DryRunFileInfo, HfFileMetadata, get_hf_file_metadata, hf_hub_url
 from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData
 from .utils import (
     DEFAULT_IGNORE_PATTERNS,
-    HfFolder,  # noqa: F401 # kept for backward compatibility
-    LocalTokenNotFoundError,
     NotASafetensorsRepoError,
     SafetensorsFileMetadata,
     SafetensorsParsingError,
@@ -131,7 +103,7 @@
 )
 from .utils import tqdm as hf_tqdm
 from .utils._auth import _get_token_from_environment, _get_token_from_file, _get_token_from_google_colab
-from .utils._deprecation import _deprecate_arguments, _deprecate_method
+from .utils._deprecation import _deprecate_arguments
 from .utils._typing import CallableT
 from .utils.endpoint_helpers import _is_emission_within_threshold
 
@@ -236,7 +208,7 @@
 logger = logging.get_logger(__name__)
 
 
-def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tuple[Optional[str], Optional[str], str]:
+def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> tuple[Optional[str], Optional[str], str]:
     """
     Returns the repo type and ID from a huggingface.co URL linking to a
     repository
@@ -346,8 +318,8 @@ def __post_init__(self):  # hack to make BlobLfsInfo backward compatible
 class BlobSecurityInfo(dict):
     safe: bool  # duplicate information with "status" field, keeping it for backward compatibility
     status: str
-    av_scan: Optional[Dict]
-    pickle_import_scan: Optional[Dict]
+    av_scan: Optional[dict]
+    pickle_import_scan: Optional[dict]
 
     def __post_init__(self):  # hack to make BlogSecurityInfo backward compatible
         self.update(asdict(self))
@@ -367,7 +339,7 @@ def __post_init__(self):  # hack to make TransformersInfo backward compatible
 
 @dataclass
 class SafeTensorsInfo(dict):
-    parameters: Dict[str, int]
+    parameters: dict[str, int]
     total: int
 
     def __post_init__(self):  # hack to make SafeTensorsInfo backward compatible
@@ -410,12 +382,6 @@ class CommitInfo(str):
 
         repo_url (`RepoUrl`):
             Repo URL of the commit containing info like repo_id, repo_type, etc.
-
-        _url (`str`, *optional*):
-            Legacy url for `str` compatibility. Can be the url to the uploaded file on the Hub (if returned by
-            [`upload_file`]), to the uploaded folder on the Hub (if returned by [`upload_folder`]) or to the commit on
-            the Hub (if returned by [`create_commit`]). Defaults to `commit_url`. It is deprecated to use this
-            attribute. Please use `commit_url` instead.
     """
 
     commit_url: str
@@ -431,11 +397,8 @@ class CommitInfo(str):
     pr_revision: Optional[str] = field(init=False)
     pr_num: Optional[str] = field(init=False)
 
-    # legacy url for `str` compatibility (ex: url to uploaded file, url to uploaded folder, url to PR, etc.)
-    _url: str = field(repr=False, default=None)  # type: ignore  # defaults to `commit_url`
-
-    def __new__(cls, *args, commit_url: str, _url: Optional[str] = None, **kwargs):
-        return str.__new__(cls, _url or commit_url)
+    def __new__(cls, *args, commit_url: str, **kwargs):
+        return str.__new__(cls, commit_url)
 
     def __post_init__(self):
         """Populate pr-related fields after initialization.
@@ -470,7 +433,7 @@ class AccessRequest:
             Timestamp of the request.
         status (`Literal["pending", "accepted", "rejected"]`):
             Status of the request. Can be one of `["pending", "accepted", "rejected"]`.
-        fields (`Dict[str, Any]`, *optional*):
+        fields (`dict[str, Any]`, *optional*):
             Additional fields filled by the user in the gate form.
     """
 
@@ -481,7 +444,7 @@ class AccessRequest:
     status: Literal["pending", "accepted", "rejected"]
 
     # Additional fields filled by the user in the gate form
-    fields: Optional[Dict[str, Any]] = None
+    fields: Optional[dict[str, Any]] = None
 
 
 @dataclass
@@ -512,9 +475,9 @@ class WebhookInfo:
             URL of the webhook.
         job (`JobSpec`, *optional*):
             Specifications of the Job to trigger.
-        watched (`List[WebhookWatchedItem]`):
+        watched (`list[WebhookWatchedItem]`):
             List of items watched by the webhook, see [`WebhookWatchedItem`].
-        domains (`List[WEBHOOK_DOMAIN_T]`):
+        domains (`list[WEBHOOK_DOMAIN_T]`):
             List of domains the webhook is watching. Can be one of `["repo", "discussions"]`.
         secret (`str`, *optional*):
             Secret of the webhook.
@@ -525,8 +488,8 @@ class WebhookInfo:
     id: str
     url: Optional[str]
     job: Optional[JobSpec]
-    watched: List[WebhookWatchedItem]
-    domains: List[constants.WEBHOOK_DOMAIN_T]
+    watched: list[WebhookWatchedItem]
+    domains: list[constants.WEBHOOK_DOMAIN_T]
     secret: Optional[str]
     disabled: bool
 
@@ -771,17 +734,17 @@ class ModelInfo:
         gated (`Literal["auto", "manual", False]`, *optional*):
             Is the repo gated.
             If so, whether there is manual or automatic approval.
-        gguf (`Dict`, *optional*):
+        gguf (`dict`, *optional*):
             GGUF information of the model.
         inference (`Literal["warm"]`, *optional*):
             Status of the model on Inference Providers. Warm if the model is served by at least one provider.
-        inference_provider_mapping (`List[InferenceProviderMapping]`, *optional*):
+        inference_provider_mapping (`list[InferenceProviderMapping]`, *optional*):
             A list of [`InferenceProviderMapping`] ordered after the user's provider order.
         likes (`int`):
             Number of likes of the model.
         library_name (`str`, *optional*):
             Library associated with the model.
-        tags (`List[str]`):
+        tags (`list[str]`):
             List of tags of the model. Compared to `card_data.tags`, contains extra tags computed by the Hub
             (e.g. supported libraries, model's arXiv).
         pipeline_tag (`str`, *optional*):
@@ -790,9 +753,9 @@ class ModelInfo:
             Mask token used by the model.
         widget_data (`Any`, *optional*):
             Widget data associated with the model.
-        model_index (`Dict`, *optional*):
+        model_index (`dict`, *optional*):
             Model index for evaluation.
-        config (`Dict`, *optional*):
+        config (`dict`, *optional*):
             Model configuration.
         transformers_info (`TransformersInfo`, *optional*):
             Transformers-specific info (auto class, processor, etc.) associated with the model.
@@ -800,13 +763,13 @@ class ModelInfo:
             Trending score of the model.
         card_data (`ModelCardData`, *optional*):
             Model Card Metadata  as a [`huggingface_hub.repocard_data.ModelCardData`] object.
-        siblings (`List[RepoSibling]`):
+        siblings (`list[RepoSibling]`):
             List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the model.
-        spaces (`List[str]`, *optional*):
+        spaces (`list[str]`, *optional*):
             List of spaces using the model.
         safetensors (`SafeTensorsInfo`, *optional*):
             Model's safetensors information.
-        security_repo_status (`Dict`, *optional*):
+        security_repo_status (`dict`, *optional*):
             Model's security scan status.
     """
 
@@ -820,24 +783,24 @@ class ModelInfo:
     downloads: Optional[int]
     downloads_all_time: Optional[int]
     gated: Optional[Literal["auto", "manual", False]]
-    gguf: Optional[Dict]
+    gguf: Optional[dict]
     inference: Optional[Literal["warm"]]
-    inference_provider_mapping: Optional[List[InferenceProviderMapping]]
+    inference_provider_mapping: Optional[list[InferenceProviderMapping]]
     likes: Optional[int]
     library_name: Optional[str]
-    tags: Optional[List[str]]
+    tags: Optional[list[str]]
     pipeline_tag: Optional[str]
     mask_token: Optional[str]
     card_data: Optional[ModelCardData]
     widget_data: Optional[Any]
-    model_index: Optional[Dict]
-    config: Optional[Dict]
+    model_index: Optional[dict]
+    config: Optional[dict]
     transformers_info: Optional[TransformersInfo]
     trending_score: Optional[int]
-    siblings: Optional[List[RepoSibling]]
-    spaces: Optional[List[str]]
+    siblings: Optional[list[RepoSibling]]
+    spaces: Optional[list[str]]
     safetensors: Optional[SafeTensorsInfo]
-    security_repo_status: Optional[Dict]
+    security_repo_status: Optional[dict]
     xet_enabled: Optional[bool]
 
     def __init__(self, **kwargs):
@@ -969,11 +932,11 @@ class DatasetInfo:
             Cumulated number of downloads of the model since its creation.
         likes (`int`):
             Number of likes of the dataset.
-        tags (`List[str]`):
+        tags (`list[str]`):
             List of tags of the dataset.
         card_data (`DatasetCardData`, *optional*):
             Model Card Metadata  as a [`huggingface_hub.repocard_data.DatasetCardData`] object.
-        siblings (`List[RepoSibling]`):
+        siblings (`list[RepoSibling]`):
             List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the dataset.
         paperswithcode_id (`str`, *optional*):
             Papers with code ID of the dataset.
@@ -993,10 +956,10 @@ class DatasetInfo:
     downloads_all_time: Optional[int]
     likes: Optional[int]
     paperswithcode_id: Optional[str]
-    tags: Optional[List[str]]
+    tags: Optional[list[str]]
     trending_score: Optional[int]
     card_data: Optional[DatasetCardData]
-    siblings: Optional[List[RepoSibling]]
+    siblings: Optional[list[RepoSibling]]
     xet_enabled: Optional[bool]
 
     def __init__(self, **kwargs):
@@ -1085,9 +1048,9 @@ class SpaceInfo:
             Subdomain of the Space.
         likes (`int`):
             Number of likes of the Space.
-        tags (`List[str]`):
+        tags (`list[str]`):
             List of tags of the Space.
-        siblings (`List[RepoSibling]`):
+        siblings (`list[RepoSibling]`):
             List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the Space.
         card_data (`SpaceCardData`, *optional*):
             Space Card Metadata  as a [`huggingface_hub.repocard_data.SpaceCardData`] object.
@@ -1095,9 +1058,9 @@ class SpaceInfo:
             Space runtime information as a [`huggingface_hub.hf_api.SpaceRuntime`] object.
         sdk (`str`, *optional*):
             SDK used by the Space.
-        models (`List[str]`, *optional*):
+        models (`list[str]`, *optional*):
             List of models used by the Space.
-        datasets (`List[str]`, *optional*):
+        datasets (`list[str]`, *optional*):
             List of datasets used by the Space.
         trending_score (`int`, *optional*):
             Trending score of the Space.
@@ -1115,13 +1078,13 @@ class SpaceInfo:
     subdomain: Optional[str]
     likes: Optional[int]
     sdk: Optional[str]
-    tags: Optional[List[str]]
-    siblings: Optional[List[RepoSibling]]
+    tags: Optional[list[str]]
+    siblings: Optional[list[RepoSibling]]
     trending_score: Optional[int]
     card_data: Optional[SpaceCardData]
     runtime: Optional[SpaceRuntime]
-    models: Optional[List[str]]
-    datasets: Optional[List[str]]
+    models: Optional[list[str]]
+    datasets: Optional[list[str]]
     xet_enabled: Optional[bool]
 
     def __init__(self, **kwargs):
@@ -1209,7 +1172,7 @@ def __init__(
         id: str,
         type: CollectionItemType_T,
         position: int,
-        note: Optional[Dict] = None,
+        note: Optional[dict] = None,
         **kwargs,
     ) -> None:
         self.item_object_id: str = _id  # id in database
@@ -1235,7 +1198,7 @@ class Collection:
             Title of the collection. E.g. `"Recent models"`.
         owner (`str`):
             Owner of the collection. E.g. `"TheBloke"`.
-        items (`List[CollectionItem]`):
+        items (`list[CollectionItem]`):
             List of items in the collection.
         last_updated (`datetime`):
             Date of the last update of the collection.
@@ -1256,7 +1219,7 @@ class Collection:
     slug: str
     title: str
     owner: str
-    items: List[CollectionItem]
+    items: list[CollectionItem]
     last_updated: datetime
     position: int
     private: bool
@@ -1313,22 +1276,22 @@ class GitRefs:
     Object is returned by [`list_repo_refs`].
 
     Attributes:
-        branches (`List[GitRefInfo]`):
+        branches (`list[GitRefInfo]`):
             A list of [`GitRefInfo`] containing information about branches on the repo.
-        converts (`List[GitRefInfo]`):
+        converts (`list[GitRefInfo]`):
             A list of [`GitRefInfo`] containing information about "convert" refs on the repo.
             Converts are refs used (internally) to push preprocessed data in Dataset repos.
-        tags (`List[GitRefInfo]`):
+        tags (`list[GitRefInfo]`):
             A list of [`GitRefInfo`] containing information about tags on the repo.
-        pull_requests (`List[GitRefInfo]`, *optional*):
+        pull_requests (`list[GitRefInfo]`, *optional*):
             A list of [`GitRefInfo`] containing information about pull requests on the repo.
             Only returned if `include_prs=True` is set.
     """
 
-    branches: List[GitRefInfo]
-    converts: List[GitRefInfo]
-    tags: List[GitRefInfo]
-    pull_requests: Optional[List[GitRefInfo]] = None
+    branches: list[GitRefInfo]
+    converts: list[GitRefInfo]
+    tags: list[GitRefInfo]
+    pull_requests: Optional[list[GitRefInfo]] = None
 
 
 @dataclass
@@ -1339,7 +1302,7 @@ class GitCommitInfo:
     Attributes:
         commit_id (`str`):
             OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`)
-        authors (`List[str]`):
+        authors (`list[str]`):
             List of authors of the commit.
         created_at (`datetime`):
             Datetime when the commit was created.
@@ -1355,7 +1318,7 @@ class GitCommitInfo:
 
     commit_id: str
 
-    authors: List[str]
+    authors: list[str]
     created_at: datetime
     title: str
     message: str
@@ -1374,11 +1337,11 @@ class UserLikes:
             Name of the user for which we fetched the likes.
         total (`int`):
             Total number of likes.
-        datasets (`List[str]`):
+        datasets (`list[str]`):
             List of datasets liked by the user (as repo_ids).
-        models (`List[str]`):
+        models (`list[str]`):
             List of models liked by the user (as repo_ids).
-        spaces (`List[str]`):
+        spaces (`list[str]`):
             List of spaces liked by the user (as repo_ids).
     """
 
@@ -1387,9 +1350,9 @@ class UserLikes:
     total: int
 
     # User likes
-    datasets: List[str]
-    models: List[str]
-    spaces: List[str]
+    datasets: list[str]
+    models: list[str]
+    spaces: list[str]
 
 
 @dataclass
@@ -1475,7 +1438,7 @@ class User:
     num_likes: Optional[int] = None
     num_following: Optional[int] = None
     num_followers: Optional[int] = None
-    orgs: List[Organization] = field(default_factory=list)
+    orgs: list[Organization] = field(default_factory=list)
 
     def __init__(self, **kwargs) -> None:
         self.username = kwargs.pop("user", "")
@@ -1508,7 +1471,7 @@ class PaperInfo:
     Attributes:
         id (`str`):
             arXiv paper ID.
-        authors (`List[str]`, **optional**):
+        authors (`list[str]`, **optional**):
             Names of paper authors
         published_at (`datetime`, **optional**):
             Date paper published.
@@ -1531,7 +1494,7 @@ class PaperInfo:
     """
 
     id: str
-    authors: Optional[List[str]]
+    authors: Optional[list[str]]
     published_at: Optional[datetime]
     title: Optional[str]
     summary: Optional[str]
@@ -1695,8 +1658,8 @@ def __init__(
         token: Union[str, bool, None] = None,
         library_name: Optional[str] = None,
         library_version: Optional[str] = None,
-        user_agent: Union[Dict, str, None] = None,
-        headers: Optional[Dict[str, str]] = None,
+        user_agent: Union[dict, str, None] = None,
+        headers: Optional[dict[str, str]] = None,
     ) -> None:
         self.endpoint = endpoint if endpoint is not None else constants.ENDPOINT
         self.token = token
@@ -1747,7 +1710,7 @@ def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]:
         return self._thread_pool.submit(fn, *args, **kwargs)
 
     @validate_hf_hub_args
-    def whoami(self, token: Union[bool, str, None] = None) -> Dict:
+    def whoami(self, token: Union[bool, str, None] = None) -> dict:
         """
         Call HF API to know "whoami".
 
@@ -1766,7 +1729,7 @@ def whoami(self, token: Union[bool, str, None] = None) -> Dict:
         )
         try:
             hf_raise_for_status(r)
-        except HTTPError as e:
+        except HfHubHTTPError as e:
             if e.response.status_code == 401:
                 error_message = "Invalid user token."
                 # Check which token is the effective one and generate the error message accordingly
@@ -1779,48 +1742,11 @@ def whoami(self, token: Union[bool, str, None] = None) -> Dict:
                     )
                 elif effective_token == _get_token_from_file():
                     error_message += " The token stored is invalid. Please run `hf auth login` to update it."
-                raise HTTPError(error_message, request=e.request, response=e.response) from e
+                raise HfHubHTTPError(error_message, response=e.response) from e
             raise
         return r.json()
 
-    @_deprecate_method(
-        version="1.0",
-        message=(
-            "Permissions are more complex than when `get_token_permission` was first introduced. "
-            "OAuth and fine-grain tokens allows for more detailed permissions. "
-            "If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key."
-        ),
-    )
-    def get_token_permission(
-        self, token: Union[bool, str, None] = None
-    ) -> Literal["read", "write", "fineGrained", None]:
-        """
-        Check if a given `token` is valid and return its permissions.
-
-        > [!WARNING]
-        > This method is deprecated and will be removed in version 1.0. Permissions are more complex than when
-        > `get_token_permission` was first introduced. OAuth and fine-grain tokens allows for more detailed permissions.
-        > If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key.
-
-        For more details about tokens, please refer to https://huggingface.co/docs/hub/security-tokens#what-are-user-access-tokens.
-
-        Args:
-            token (Union[bool, str, None], optional):
-                A valid user access token (string). Defaults to the locally saved
-                token, which is the recommended method for authentication (see
-                https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
-                To disable authentication, pass `False`.
-
-        Returns:
-            `Literal["read", "write", "fineGrained", None]`: Permission granted by the token ("read" or "write"). Returns `None` if no
-            token passed, if token is invalid or if role is not returned by the server. This typically happens when the token is an OAuth token.
-        """
-        try:
-            return self.whoami(token=token)["auth"]["accessToken"]["role"]
-        except (LocalTokenNotFoundError, HTTPError, KeyError):
-            return None
-
-    def get_model_tags(self) -> Dict:
+    def get_model_tags(self) -> dict:
         """
         List all valid model tags as a nested namespace object
         """
@@ -1829,7 +1755,7 @@ def get_model_tags(self) -> Dict:
         hf_raise_for_status(r)
         return r.json()
 
-    def get_dataset_tags(self) -> Dict:
+    def get_dataset_tags(self) -> dict:
         """
         List all valid dataset tags as a nested namespace object.
         """
@@ -1838,9 +1764,6 @@ def get_dataset_tags(self) -> Dict:
         hf_raise_for_status(r)
         return r.json()
 
-    @_deprecate_arguments(
-        version="1.0", deprecated_args=["language", "library", "task", "tags"], custom_message="Use `filter` instead."
-    )
     @validate_hf_hub_args
     def list_models(
         self,
@@ -1848,30 +1771,25 @@ def list_models(
         # Search-query parameter
         filter: Union[str, Iterable[str], None] = None,
         author: Optional[str] = None,
-        apps: Optional[Union[str, List[str]]] = None,
+        apps: Optional[Union[str, list[str]]] = None,
         gated: Optional[bool] = None,
         inference: Optional[Literal["warm"]] = None,
-        inference_provider: Optional[Union[Literal["all"], "PROVIDER_T", List["PROVIDER_T"]]] = None,
+        inference_provider: Optional[Union[Literal["all"], "PROVIDER_T", list["PROVIDER_T"]]] = None,
         model_name: Optional[str] = None,
-        trained_dataset: Optional[Union[str, List[str]]] = None,
+        trained_dataset: Optional[Union[str, list[str]]] = None,
         search: Optional[str] = None,
         pipeline_tag: Optional[str] = None,
-        emissions_thresholds: Optional[Tuple[float, float]] = None,
+        emissions_thresholds: Optional[tuple[float, float]] = None,
         # Sorting and pagination parameters
         sort: Union[Literal["last_modified"], str, None] = None,
         direction: Optional[Literal[-1]] = None,
         limit: Optional[int] = None,
         # Additional data to fetch
-        expand: Optional[List[ExpandModelProperty_T]] = None,
+        expand: Optional[list[ExpandModelProperty_T]] = None,
         full: Optional[bool] = None,
         cardData: bool = False,
         fetch_config: bool = False,
         token: Union[bool, str, None] = None,
-        # Deprecated arguments - use `filter` instead
-        language: Optional[Union[str, List[str]]] = None,
-        library: Optional[Union[str, List[str]]] = None,
-        tags: Optional[Union[str, List[str]]] = None,
-        task: Optional[Union[str, List[str]]] = None,
     ) -> Iterable[ModelInfo]:
         """
         List models hosted on the Huggingface Hub, given some filters.
@@ -1895,20 +1813,12 @@ def list_models(
             inference_provider (`Literal["all"]` or `str`, *optional*):
                 A string to filter models on the Hub that are served by a specific provider.
                 Pass `"all"` to get all models served by at least one provider.
-            library (`str` or `List`, *optional*):
-                Deprecated. Pass a library name in `filter` to filter models by library.
-            language (`str` or `List`, *optional*):
-                Deprecated. Pass a language in `filter` to filter models by language.
             model_name (`str`, *optional*):
                 A string that contain complete or partial names for models on the
                 Hub, such as "bert" or "bert-base-cased"
-            task (`str` or `List`, *optional*):
-                Deprecated. Pass a task in `filter` to filter models by task.
             trained_dataset (`str` or `List`, *optional*):
                 A string tag or a list of string tags of the trained dataset for a
                 model on the Hub.
-            tags (`str` or `List`, *optional*):
-                Deprecated. Pass tags in `filter` to filter models by tags.
             search (`str`, *optional*):
                 A string that will be contained in the returned model ids.
             pipeline_tag (`str`, *optional*):
@@ -1925,7 +1835,7 @@ def list_models(
             limit (`int`, *optional*):
                 The limit on the number of models fetched. Leaving this option
                 to `None` fetches all models.
-            expand (`List[ExpandModelProperty_T]`, *optional*):
+            expand (`list[ExpandModelProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed.
                 Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"resourceGroup"` and `"xetEnabled"`.
@@ -1984,27 +1894,15 @@ def list_models(
 
         path = f"{self.endpoint}/api/models"
         headers = self._build_hf_headers(token=token)
-        params: Dict[str, Any] = {}
+        params: dict[str, Any] = {}
 
         # Build the filter list
-        filter_list: List[str] = []
+        filter_list: list[str] = []
         if filter:
             filter_list.extend([filter] if isinstance(filter, str) else filter)
-        if library:
-            filter_list.extend([library] if isinstance(library, str) else library)
-        if task:
-            filter_list.extend([task] if isinstance(task, str) else task)
         if trained_dataset:
-            if isinstance(trained_dataset, str):
-                trained_dataset = [trained_dataset]
-            for dataset in trained_dataset:
-                if not dataset.startswith("dataset:"):
-                    dataset = f"dataset:{dataset}"
-                filter_list.append(dataset)
-        if language:
-            filter_list.extend([language] if isinstance(language, str) else language)
-        if tags:
-            filter_list.extend([tags] if isinstance(tags, str) else tags)
+            datasets = [trained_dataset] if isinstance(trained_dataset, str) else trained_dataset
+            filter_list.extend(f"dataset:{d}" if not d.startswith("dataset:") else d for d in datasets)
         if len(filter_list) > 0:
             params["filter"] = filter_list
 
@@ -2074,26 +1972,26 @@ def list_datasets(
         # Search-query parameter
         filter: Union[str, Iterable[str], None] = None,
         author: Optional[str] = None,
-        benchmark: Optional[Union[str, List[str]]] = None,
+        benchmark: Optional[Union[str, list[str]]] = None,
         dataset_name: Optional[str] = None,
         gated: Optional[bool] = None,
-        language_creators: Optional[Union[str, List[str]]] = None,
-        language: Optional[Union[str, List[str]]] = None,
-        multilinguality: Optional[Union[str, List[str]]] = None,
-        size_categories: Optional[Union[str, List[str]]] = None,
-        task_categories: Optional[Union[str, List[str]]] = None,
-        task_ids: Optional[Union[str, List[str]]] = None,
+        language_creators: Optional[Union[str, list[str]]] = None,
+        language: Optional[Union[str, list[str]]] = None,
+        multilinguality: Optional[Union[str, list[str]]] = None,
+        size_categories: Optional[Union[str, list[str]]] = None,
+        task_categories: Optional[Union[str, list[str]]] = None,
+        task_ids: Optional[Union[str, list[str]]] = None,
         search: Optional[str] = None,
         # Sorting and pagination parameters
         sort: Optional[Union[Literal["last_modified"], str]] = None,
         direction: Optional[Literal[-1]] = None,
         limit: Optional[int] = None,
         # Additional data to fetch
-        expand: Optional[List[ExpandDatasetProperty_T]] = None,
+        expand: Optional[list[ExpandDatasetProperty_T]] = None,
         full: Optional[bool] = None,
         token: Union[bool, str, None] = None,
         # Deprecated arguments - use `filter` instead
-        tags: Optional[Union[str, List[str]]] = None,
+        tags: Optional[Union[str, list[str]]] = None,
     ) -> Iterable[DatasetInfo]:
         """
         List datasets hosted on the Huggingface Hub, given some filters.
@@ -2148,7 +2046,7 @@ def list_datasets(
             limit (`int`, *optional*):
                 The limit on the number of datasets fetched. Leaving this option
                 to `None` fetches all datasets.
-            expand (`List[ExpandDatasetProperty_T]`, *optional*):
+            expand (`list[ExpandDatasetProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full` is passed.
                 Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
@@ -2208,7 +2106,7 @@ def list_datasets(
 
         path = f"{self.endpoint}/api/datasets"
         headers = self._build_hf_headers(token=token)
-        params: Dict[str, Any] = {}
+        params: dict[str, Any] = {}
 
         # Build `filter` list
         filter_list = []
@@ -2295,7 +2193,7 @@ def list_spaces(
         direction: Optional[Literal[-1]] = None,
         limit: Optional[int] = None,
         # Additional data to fetch
-        expand: Optional[List[ExpandSpaceProperty_T]] = None,
+        expand: Optional[list[ExpandSpaceProperty_T]] = None,
         full: Optional[bool] = None,
         token: Union[bool, str, None] = None,
     ) -> Iterable[SpaceInfo]:
@@ -2326,7 +2224,7 @@ def list_spaces(
             limit (`int`, *optional*):
                 The limit on the number of Spaces fetched. Leaving this option
                 to `None` fetches all Spaces.
-            expand (`List[ExpandSpaceProperty_T]`, *optional*):
+            expand (`list[ExpandSpaceProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full` is passed.
                 Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
@@ -2347,7 +2245,7 @@ def list_spaces(
 
         path = f"{self.endpoint}/api/spaces"
         headers = self._build_hf_headers(token=token)
-        params: Dict[str, Any] = {}
+        params: dict[str, Any] = {}
         if filter is not None:
             params["filter"] = filter
         if author is not None:
@@ -2564,7 +2462,7 @@ def model_info(
         timeout: Optional[float] = None,
         securityStatus: Optional[bool] = None,
         files_metadata: bool = False,
-        expand: Optional[List[ExpandModelProperty_T]] = None,
+        expand: Optional[list[ExpandModelProperty_T]] = None,
         token: Union[bool, str, None] = None,
     ) -> ModelInfo:
         """
@@ -2587,7 +2485,7 @@ def model_info(
             files_metadata (`bool`, *optional*):
                 Whether or not to retrieve metadata for files in the repository
                 (size, LFS metadata, etc). Defaults to `False`.
-            expand (`List[ExpandModelProperty_T]`, *optional*):
+            expand (`list[ExpandModelProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `securityStatus` or `files_metadata` are passed.
                 Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
@@ -2618,7 +2516,7 @@ def model_info(
             if revision is None
             else (f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}")
         )
-        params: Dict = {}
+        params: dict = {}
         if securityStatus:
             params["securityStatus"] = True
         if files_metadata:
@@ -2638,7 +2536,7 @@ def dataset_info(
         revision: Optional[str] = None,
         timeout: Optional[float] = None,
         files_metadata: bool = False,
-        expand: Optional[List[ExpandDatasetProperty_T]] = None,
+        expand: Optional[list[ExpandDatasetProperty_T]] = None,
         token: Union[bool, str, None] = None,
     ) -> DatasetInfo:
         """
@@ -2658,7 +2556,7 @@ def dataset_info(
             files_metadata (`bool`, *optional*):
                 Whether or not to retrieve metadata for files in the repository
                 (size, LFS metadata, etc). Defaults to `False`.
-            expand (`List[ExpandDatasetProperty_T]`, *optional*):
+            expand (`list[ExpandDatasetProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `files_metadata` is passed.
                 Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`,`"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
@@ -2689,7 +2587,7 @@ def dataset_info(
             if revision is None
             else (f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}")
         )
-        params: Dict = {}
+        params: dict = {}
         if files_metadata:
             params["blobs"] = True
         if expand:
@@ -2708,7 +2606,7 @@ def space_info(
         revision: Optional[str] = None,
         timeout: Optional[float] = None,
         files_metadata: bool = False,
-        expand: Optional[List[ExpandSpaceProperty_T]] = None,
+        expand: Optional[list[ExpandSpaceProperty_T]] = None,
         token: Union[bool, str, None] = None,
     ) -> SpaceInfo:
         """
@@ -2728,7 +2626,7 @@ def space_info(
             files_metadata (`bool`, *optional*):
                 Whether or not to retrieve metadata for files in the repository
                 (size, LFS metadata, etc). Defaults to `False`.
-            expand (`List[ExpandSpaceProperty_T]`, *optional*):
+            expand (`list[ExpandSpaceProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full` is passed.
                 Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
@@ -2759,7 +2657,7 @@ def space_info(
             if revision is None
             else (f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}")
         )
-        params: Dict = {}
+        params: dict = {}
         if files_metadata:
             params["blobs"] = True
         if expand:
@@ -2987,7 +2885,7 @@ def file_exists(
             return True
         except GatedRepoError:  # raise specifically on gated repo
             raise
-        except (RepositoryNotFoundError, EntryNotFoundError, RevisionNotFoundError):
+        except (RepositoryNotFoundError, RemoteEntryNotFoundError, RevisionNotFoundError):
             return False
 
     @validate_hf_hub_args
@@ -2998,7 +2896,7 @@ def list_repo_files(
         revision: Optional[str] = None,
         repo_type: Optional[str] = None,
         token: Union[str, bool, None] = None,
-    ) -> List[str]:
+    ) -> list[str]:
         """
         Get the list of files in a given repo.
 
@@ -3017,7 +2915,7 @@ def list_repo_files(
                 To disable authentication, pass `False`.
 
         Returns:
-            `List[str]`: the list of files in a given repository.
+            `list[str]`: the list of files in a given repository.
         """
         return [
             f.rfilename
@@ -3077,7 +2975,7 @@ def list_repo_tree(
                 does not exist.
             [`~utils.RevisionNotFoundError`]:
                 If revision is not found (error 404) on the repo.
-            [`~utils.EntryNotFoundError`]:
+            [`~utils.RemoteEntryNotFoundError`]:
                 If the tree (folder) does not exist (error 404) on the repo.
 
         Examples:
@@ -3219,7 +3117,7 @@ def list_repo_refs(
         hf_raise_for_status(response)
         data = response.json()
 
-        def _format_as_git_ref_info(item: Dict) -> GitRefInfo:
+        def _format_as_git_ref_info(item: dict) -> GitRefInfo:
             return GitRefInfo(name=item["name"], ref=item["ref"], target_commit=item["targetCommit"])
 
         return GitRefs(
@@ -3240,7 +3138,7 @@ def list_repo_commits(
         token: Union[bool, str, None] = None,
         revision: Optional[str] = None,
         formatted: bool = False,
-    ) -> List[GitCommitInfo]:
+    ) -> list[GitCommitInfo]:
         """
         Get the list of commits of a given revision for a repo on the Hub.
 
@@ -3287,7 +3185,7 @@ def list_repo_commits(
         ```
 
         Returns:
-            List[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub.
+            list[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub.
 
         Raises:
             [`~utils.RepositoryNotFoundError`]:
@@ -3321,20 +3219,20 @@ def list_repo_commits(
     def get_paths_info(
         self,
         repo_id: str,
-        paths: Union[List[str], str],
+        paths: Union[list[str], str],
         *,
         expand: bool = False,
         revision: Optional[str] = None,
         repo_type: Optional[str] = None,
         token: Union[str, bool, None] = None,
-    ) -> List[Union[RepoFile, RepoFolder]]:
+    ) -> list[Union[RepoFile, RepoFolder]]:
         """
         Get information about a repo's paths.
 
         Args:
             repo_id (`str`):
                 A namespace (user or an organization) and a repo name separated by a `/`.
-            paths (`Union[List[str], str]`, *optional*):
+            paths (`Union[list[str], str]`, *optional*):
                 The paths to get information about. If a path do not exist, it is ignored without raising
                 an exception.
             expand (`bool`, *optional*, defaults to `False`):
@@ -3354,7 +3252,7 @@ def get_paths_info(
                 To disable authentication, pass `False`.
 
         Returns:
-            `List[Union[RepoFile, RepoFolder]]`:
+            `list[Union[RepoFile, RepoFolder]]`:
                 The information about the paths, as a list of [`RepoFile`] and [`RepoFolder`] objects.
 
         Raises:
@@ -3610,8 +3508,8 @@ def create_repo(
         space_hardware: Optional[SpaceHardware] = None,
         space_storage: Optional[SpaceStorage] = None,
         space_sleep_time: Optional[int] = None,
-        space_secrets: Optional[List[Dict[str, str]]] = None,
-        space_variables: Optional[List[Dict[str, str]]] = None,
+        space_secrets: Optional[list[dict[str, str]]] = None,
+        space_variables: Optional[list[dict[str, str]]] = None,
     ) -> RepoUrl:
         """Create an empty repo on the HuggingFace Hub.
 
@@ -3648,10 +3546,10 @@ def create_repo(
                 your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
                 the sleep time (value is fixed to 48 hours of inactivity).
                 See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
-            space_secrets (`List[Dict[str, str]]`, *optional*):
+            space_secrets (`list[dict[str, str]]`, *optional*):
                 A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
                 For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
-            space_variables (`List[Dict[str, str]]`, *optional*):
+            space_variables (`list[dict[str, str]]`, *optional*):
                 A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
                 For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables.
 
@@ -3666,7 +3564,7 @@ def create_repo(
         if repo_type not in constants.REPO_TYPES:
             raise ValueError("Invalid repo type")
 
-        json: Dict[str, Any] = {"name": name, "organization": organization}
+        json: dict[str, Any] = {"name": name, "organization": organization}
         if private is not None:
             json["private"] = private
         if repo_type is not None:
@@ -3726,7 +3624,7 @@ def create_repo(
 
         try:
             hf_raise_for_status(r)
-        except HTTPError as err:
+        except HfHubHTTPError as err:
             if exist_ok and err.response.status_code == 409:
                 # Repo already exists and `exist_ok=True`
                 pass
@@ -3788,65 +3686,13 @@ def delete_repo(
             json["type"] = repo_type
 
         headers = self._build_hf_headers(token=token)
-        r = get_session().delete(path, headers=headers, json=json)
+        r = get_session().request("DELETE", path, headers=headers, json=json)
         try:
             hf_raise_for_status(r)
         except RepositoryNotFoundError:
             if not missing_ok:
                 raise
 
-    @_deprecate_method(version="0.32", message="Please use `update_repo_settings` instead.")
-    @validate_hf_hub_args
-    def update_repo_visibility(
-        self,
-        repo_id: str,
-        private: bool = False,
-        *,
-        token: Union[str, bool, None] = None,
-        repo_type: Optional[str] = None,
-    ) -> Dict[str, bool]:
-        """Update the visibility setting of a repository.
-
-        Deprecated. Use `update_repo_settings` instead.
-
-        Args:
-            repo_id (`str`, *optional*):
-                A namespace (user or an organization) and a repo name separated by a `/`.
-            private (`bool`, *optional*, defaults to `False`):
-                Whether the repository should be private.
-            token (Union[bool, str, None], optional):
-                A valid user access token (string). Defaults to the locally saved
-                token, which is the recommended method for authentication (see
-                https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
-                To disable authentication, pass `False`.
-            repo_type (`str`, *optional*):
-                Set to `"dataset"` or `"space"` if uploading to a dataset or
-                space, `None` or `"model"` if uploading to a model. Default is
-                `None`.
-
-        Returns:
-            The HTTP response in json.
-
-        > [!TIP]
-        > Raises the following errors:
-        >
-        >     - [`~utils.RepositoryNotFoundError`]
-        >       If the repository to download from cannot be found. This may be because it doesn't exist,
-        >       or because it is set to `private` and you do not have access.
-        """
-        if repo_type not in constants.REPO_TYPES:
-            raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
-        if repo_type is None:
-            repo_type = constants.REPO_TYPE_MODEL  # default repo type
-
-        r = get_session().put(
-            url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings",
-            headers=self._build_hf_headers(token=token),
-            json={"private": private},
-        )
-        hf_raise_for_status(r)
-        return r.json()
-
     @validate_hf_hub_args
     def update_repo_settings(
         self,
@@ -3902,7 +3748,7 @@ def update_repo_settings(
             repo_type = constants.REPO_TYPE_MODEL  # default repo type
 
         # Prepare the JSON payload for the PUT request
-        payload: Dict = {}
+        payload: dict = {}
 
         if gated is not None:
             if gated not in ["auto", "manual", False]:
@@ -4285,12 +4131,12 @@ def _payload_as_ndjson() -> Iterable[bytes]:
         params = {"create_pr": "1"} if create_pr else None
 
         try:
-            commit_resp = get_session().post(url=commit_url, headers=headers, data=data, params=params)
+            commit_resp = get_session().post(url=commit_url, headers=headers, content=data, params=params)
             hf_raise_for_status(commit_resp, endpoint_name="commit")
         except RepositoryNotFoundError as e:
             e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
             raise
-        except EntryNotFoundError as e:
+        except RemoteEntryNotFoundError as e:
             if nb_deletions > 0 and "A file with this name doesn't exist" in str(e):
                 e.append_to_message(
                     "\nMake sure to differentiate file and folder paths in delete"
@@ -4601,7 +4447,6 @@ def upload_file(
         ...         repo_type="dataset",
         ...         token="my_token",
         ...     )
-        "https://huggingface.co/datasets/username/my-dataset/blob/main/remote/file/path.h5"
 
         >>> upload_file(
         ...     path_or_fileobj=".\\\\local\\\\file\\\\path",
@@ -4609,7 +4454,6 @@ def upload_file(
         ...     repo_id="username/my-model",
         ...     token="my_token",
         ... )
-        "https://huggingface.co/username/my-model/blob/main/remote/file/path.h5"
 
         >>> upload_file(
         ...     path_or_fileobj=".\\\\local\\\\file\\\\path",
@@ -4618,7 +4462,6 @@ def upload_file(
         ...     token="my_token",
         ...     create_pr=True,
         ... )
-        "https://huggingface.co/username/my-model/blob/refs%2Fpr%2F1/remote/file/path.h5"
         ```
         """
         if repo_type not in constants.REPO_TYPES:
@@ -4632,7 +4475,7 @@ def upload_file(
             path_in_repo=path_in_repo,
         )
 
-        commit_info = self.create_commit(
+        return self.create_commit(
             repo_id=repo_id,
             repo_type=repo_type,
             operations=[operation],
@@ -4644,23 +4487,6 @@ def upload_file(
             parent_commit=parent_commit,
         )
 
-        if commit_info.pr_url is not None:
-            revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="")
-        if repo_type in constants.REPO_TYPES_URL_PREFIXES:
-            repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
-        revision = revision if revision is not None else constants.DEFAULT_REVISION
-
-        return CommitInfo(
-            commit_url=commit_info.commit_url,
-            commit_message=commit_info.commit_message,
-            commit_description=commit_info.commit_description,
-            oid=commit_info.oid,
-            pr_url=commit_info.pr_url,
-            # Similar to `hf_hub_url` but it's "blob" instead of "resolve"
-            # TODO: remove this in v1.0
-            _url=f"{self.endpoint}/{repo_id}/blob/{revision}/{path_in_repo}",
-        )
-
     @overload
     def upload_folder(  # type: ignore
         self,
@@ -4675,9 +4501,9 @@ def upload_folder(  # type: ignore
         revision: Optional[str] = None,
         create_pr: Optional[bool] = None,
         parent_commit: Optional[str] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
-        delete_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
+        delete_patterns: Optional[Union[list[str], str]] = None,
         run_as_future: Literal[False] = ...,
     ) -> CommitInfo: ...
 
@@ -4695,9 +4521,9 @@ def upload_folder(  # type: ignore
         revision: Optional[str] = None,
         create_pr: Optional[bool] = None,
         parent_commit: Optional[str] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
-        delete_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
+        delete_patterns: Optional[Union[list[str], str]] = None,
         run_as_future: Literal[True] = ...,
     ) -> Future[CommitInfo]: ...
 
@@ -4716,9 +4542,9 @@ def upload_folder(
         revision: Optional[str] = None,
         create_pr: Optional[bool] = None,
         parent_commit: Optional[str] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
-        delete_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
+        delete_patterns: Optional[Union[list[str], str]] = None,
         run_as_future: bool = False,
     ) -> Union[CommitInfo, Future[CommitInfo]]:
         """
@@ -4780,11 +4606,11 @@ def upload_folder(
                 If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
                 Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
                 especially useful if the repo is updated / committed to concurrently.
-            allow_patterns (`List[str]` or `str`, *optional*):
+            allow_patterns (`list[str]` or `str`, *optional*):
                 If provided, only files matching at least one pattern are uploaded.
-            ignore_patterns (`List[str]` or `str`, *optional*):
+            ignore_patterns (`list[str]` or `str`, *optional*):
                 If provided, files matching any of the patterns are not uploaded.
-            delete_patterns (`List[str]` or `str`, *optional*):
+            delete_patterns (`list[str]` or `str`, *optional*):
                 If provided, remote files matching any of the patterns will be deleted from the repo while committing
                 new files. This is useful if you don't know which files have already been uploaded.
                 Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern.
@@ -4827,7 +4653,6 @@ def upload_folder(
         ...     token="my_token",
         ...     ignore_patterns="**/logs/*.txt",
         ... )
-        # "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints"
 
         # Upload checkpoints folder including logs while deleting existing logs from the repo
         # Useful if you don't know exactly which log files have already being pushed
@@ -4839,7 +4664,6 @@ def upload_folder(
         ...     token="my_token",
         ...     delete_patterns="**/logs/*.txt",
         ... )
-        "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints"
 
         # Upload checkpoints folder while creating a PR
         >>> upload_folder(
@@ -4850,8 +4674,6 @@ def upload_folder(
         ...     token="my_token",
         ...     create_pr=True,
         ... )
-        "https://huggingface.co/datasets/username/my-dataset/tree/refs%2Fpr%2F1/remote/experiment/checkpoints"
-
         ```
         """
         if repo_type not in constants.REPO_TYPES:
@@ -4895,7 +4717,7 @@ def upload_folder(
 
         commit_message = commit_message or "Upload folder using huggingface_hub"
 
-        commit_info = self.create_commit(
+        return self.create_commit(
             repo_type=repo_type,
             repo_id=repo_id,
             operations=commit_operations,
@@ -4907,24 +4729,6 @@ def upload_folder(
             parent_commit=parent_commit,
         )
 
-        # Create url to uploaded folder (for legacy return value)
-        if create_pr and commit_info.pr_url is not None:
-            revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="")
-        if repo_type in constants.REPO_TYPES_URL_PREFIXES:
-            repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
-        revision = revision if revision is not None else constants.DEFAULT_REVISION
-
-        return CommitInfo(
-            commit_url=commit_info.commit_url,
-            commit_message=commit_info.commit_message,
-            commit_description=commit_info.commit_description,
-            oid=commit_info.oid,
-            pr_url=commit_info.pr_url,
-            # Similar to `hf_hub_url` but it's "tree" instead of "resolve"
-            # TODO: remove this in v1.0
-            _url=f"{self.endpoint}/{repo_id}/tree/{revision}/{path_in_repo}",
-        )
-
     @validate_hf_hub_args
     def delete_file(
         self,
@@ -5016,7 +4820,7 @@ def delete_file(
     def delete_files(
         self,
         repo_id: str,
-        delete_patterns: List[str],
+        delete_patterns: list[str],
         *,
         token: Union[bool, str, None] = None,
         repo_type: Optional[str] = None,
@@ -5036,7 +4840,7 @@ def delete_files(
             repo_id (`str`):
                 The repository from which the folder will be deleted, for example:
                 `"username/custom_transformers"`
-            delete_patterns (`List[str]`):
+            delete_patterns (`list[str]`):
                 List of files or folders to delete. Each string can either be
                 a file path, a folder path or a Unix shell-style wildcard.
                 E.g. `["file.txt", "folder/", "data/*.parquet"]`
@@ -5164,8 +4968,8 @@ def upload_large_folder(
         repo_type: str,  # Repo type is required!
         revision: Optional[str] = None,
         private: Optional[bool] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
         num_workers: Optional[int] = None,
         print_report: bool = True,
         print_report_every: int = 60,
@@ -5193,9 +4997,9 @@ def upload_large_folder(
             private (`bool`, `optional`):
                 Whether the repository should be private.
                 If `None` (default), the repo will be public unless the organization's default is private.
-            allow_patterns (`List[str]` or `str`, *optional*):
+            allow_patterns (`list[str]` or `str`, *optional*):
                 If provided, only files matching at least one pattern are uploaded.
-            ignore_patterns (`List[str]` or `str`, *optional*):
+            ignore_patterns (`list[str]` or `str`, *optional*):
                 If provided, files matching any of the patterns are not uploaded.
             num_workers (`int`, *optional*):
                 Number of workers to start. Defaults to `os.cpu_count() - 2` (minimum 2).
@@ -5279,7 +5083,6 @@ def get_hf_file_metadata(
         *,
         url: str,
         token: Union[bool, str, None] = None,
-        proxies: Optional[Dict] = None,
         timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
     ) -> HfFileMetadata:
         """Fetch metadata of a file versioned on the Hub for a given url.
@@ -5292,8 +5095,6 @@ def get_hf_file_metadata(
                 token, which is the recommended method for authentication (see
                 https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
                 To disable authentication, pass `False`.
-            proxies (`dict`, *optional*):
-                Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
             timeout (`float`, *optional*, defaults to 10):
                 How many seconds to wait for the server to send metadata before giving up.
 
@@ -5307,7 +5108,6 @@ def get_hf_file_metadata(
         return get_hf_file_metadata(
             url=url,
             token=token,
-            proxies=proxies,
             timeout=timeout,
             library_name=self.library_name,
             library_version=self.library_version,
@@ -5315,6 +5115,42 @@ def get_hf_file_metadata(
             endpoint=self.endpoint,
         )
 
+    @overload
+    def hf_hub_download(
+        self,
+        repo_id: str,
+        filename: str,
+        *,
+        subfolder: Optional[str] = None,
+        repo_type: Optional[str] = None,
+        revision: Optional[str] = None,
+        cache_dir: Union[str, Path, None] = None,
+        local_dir: Union[str, Path, None] = None,
+        force_download: bool = False,
+        etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+        token: Union[bool, str, None] = None,
+        local_files_only: bool = False,
+        dry_run: Literal[False] = False,
+    ) -> str: ...
+
+    @overload
+    def hf_hub_download(
+        self,
+        repo_id: str,
+        filename: str,
+        *,
+        subfolder: Optional[str] = None,
+        repo_type: Optional[str] = None,
+        revision: Optional[str] = None,
+        cache_dir: Union[str, Path, None] = None,
+        local_dir: Union[str, Path, None] = None,
+        force_download: bool = False,
+        etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+        token: Union[bool, str, None] = None,
+        local_files_only: bool = False,
+        dry_run: Literal[True],
+    ) -> DryRunFileInfo: ...
+
     @validate_hf_hub_args
     def hf_hub_download(
         self,
@@ -5327,15 +5163,11 @@ def hf_hub_download(
         cache_dir: Union[str, Path, None] = None,
         local_dir: Union[str, Path, None] = None,
         force_download: bool = False,
-        proxies: Optional[Dict] = None,
         etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
         token: Union[bool, str, None] = None,
         local_files_only: bool = False,
-        # Deprecated args
-        resume_download: Optional[bool] = None,
-        force_filename: Optional[str] = None,
-        local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
-    ) -> str:
+        dry_run: bool = False,
+    ) -> Union[str, DryRunFileInfo]:
         """Download a given file if it's not already present in the local cache.
 
         The new cache file layout looks like this:
@@ -5391,12 +5223,9 @@ def hf_hub_download(
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether the file should be downloaded even if it already exists in
                 the local cache.
-            proxies (`dict`, *optional*):
-                Dictionary mapping protocol to the URL of the proxy passed to
-                `requests.request`.
             etag_timeout (`float`, *optional*, defaults to `10`):
                 When fetching ETag, how many seconds to wait for the server to send
-                data before giving up which is passed to `requests.request`.
+                data before giving up which is passed to `httpx.request`.
             token (Union[bool, str, None], optional):
                 A valid user access token (string). Defaults to the locally saved
                 token, which is the recommended method for authentication (see
@@ -5405,9 +5234,14 @@ def hf_hub_download(
             local_files_only (`bool`, *optional*, defaults to `False`):
                 If `True`, avoid downloading the file and return the path to the
                 local cached file if it exists.
+            dry_run (`bool`, *optional*, defaults to `False`):
+                If `True`, perform a dry run without actually downloading the file. Returns a
+                [`DryRunFileInfo`] object containing information about what would be downloaded.
 
         Returns:
-            `str`: Local path of file or if networking is off, last version of file cached on disk.
+            `str` or [`DryRunFileInfo`]:
+                - If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
+                - If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
 
         Raises:
             [`~utils.RepositoryNotFoundError`]
@@ -5415,7 +5249,7 @@ def hf_hub_download(
                 or because it is set to `private` and you do not have access.
             [`~utils.RevisionNotFoundError`]
                 If the revision to download from cannot be found.
-            [`~utils.EntryNotFoundError`]
+            [`~utils.RemoteEntryNotFoundError`]
                 If the file to download cannot be found.
             [`~utils.LocalEntryNotFoundError`]
                 If network is disabled or unavailable and file is not found in cache.
@@ -5443,13 +5277,9 @@ def hf_hub_download(
             library_version=self.library_version,
             cache_dir=cache_dir,
             local_dir=local_dir,
-            local_dir_use_symlinks=local_dir_use_symlinks,
             user_agent=self.user_agent,
             force_download=force_download,
-            force_filename=force_filename,
-            proxies=proxies,
             etag_timeout=etag_timeout,
-            resume_download=resume_download,
             token=token,
             headers=self.headers,
             local_files_only=local_files_only,
@@ -5464,18 +5294,14 @@ def snapshot_download(
         revision: Optional[str] = None,
         cache_dir: Union[str, Path, None] = None,
         local_dir: Union[str, Path, None] = None,
-        proxies: Optional[Dict] = None,
         etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
         force_download: bool = False,
         token: Union[bool, str, None] = None,
         local_files_only: bool = False,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
         max_workers: int = 8,
-        tqdm_class: Optional[Type[base_tqdm]] = None,
-        # Deprecated args
-        local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
-        resume_download: Optional[bool] = None,
+        tqdm_class: Optional[type[base_tqdm]] = None,
     ) -> str:
         """Download repo files.
 
@@ -5505,12 +5331,9 @@ def snapshot_download(
                 Path to the folder where cached files are stored.
             local_dir (`str` or `Path`, *optional*):
                 If provided, the downloaded files will be placed under this directory.
-            proxies (`dict`, *optional*):
-                Dictionary mapping protocol to the URL of the proxy passed to
-                `requests.request`.
             etag_timeout (`float`, *optional*, defaults to `10`):
                 When fetching ETag, how many seconds to wait for the server to send
-                data before giving up which is passed to `requests.request`.
+                data before giving up which is passed to `httpx.request`.
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether the file should be downloaded even if it already exists in the local cache.
             token (Union[bool, str, None], optional):
@@ -5521,9 +5344,9 @@ def snapshot_download(
             local_files_only (`bool`, *optional*, defaults to `False`):
                 If `True`, avoid downloading the file and return the path to the
                 local cached file if it exists.
-            allow_patterns (`List[str]` or `str`, *optional*):
+            allow_patterns (`list[str]` or `str`, *optional*):
                 If provided, only files matching at least one pattern are downloaded.
-            ignore_patterns (`List[str]` or `str`, *optional*):
+            ignore_patterns (`list[str]` or `str`, *optional*):
                 If provided, files matching any of the patterns are not downloaded.
             max_workers (`int`, *optional*):
                 Number of concurrent threads to download files (1 thread = 1 file download).
@@ -5564,13 +5387,10 @@ def snapshot_download(
             endpoint=self.endpoint,
             cache_dir=cache_dir,
             local_dir=local_dir,
-            local_dir_use_symlinks=local_dir_use_symlinks,
             library_name=self.library_name,
             library_version=self.library_version,
             user_agent=self.user_agent,
-            proxies=proxies,
             etag_timeout=etag_timeout,
-            resume_download=resume_download,
             force_download=force_download,
             token=token,
             local_files_only=local_files_only,
@@ -6187,7 +6007,7 @@ def get_repo_discussions(
         headers = self._build_hf_headers(token=token)
         path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions"
 
-        params: Dict[str, Union[str, int]] = {}
+        params: dict[str, Union[str, int]] = {}
         if discussion_type is not None:
             params["type"] = discussion_type
         if discussion_status is not None:
@@ -6456,7 +6276,7 @@ def _post_discussion_changes(
         body: Optional[dict] = None,
         token: Union[bool, str, None] = None,
         repo_type: Optional[str] = None,
-    ) -> requests.Response:
+    ) -> httpx.Response:
         """Internal utility to POST changes to a Discussion or Pull Request"""
         if not isinstance(discussion_num, int) or discussion_num <= 0:
             raise ValueError("Invalid discussion_num, must be a positive integer")
@@ -6469,7 +6289,7 @@ def _post_discussion_changes(
         path = f"{self.endpoint}/api/{repo_id}/discussions/{discussion_num}/{resource}"
 
         headers = self._build_hf_headers(token=token)
-        resp = requests.post(path, headers=headers, json=body)
+        resp = get_session().post(path, headers=headers, json=body)
         hf_raise_for_status(resp)
         return resp
 
@@ -6678,7 +6498,7 @@ def change_discussion_status(
         """
         if new_status not in ["open", "closed"]:
             raise ValueError("Invalid status, valid statuses are: 'open' and 'closed'")
-        body: Dict[str, str] = {"status": new_status}
+        body: dict[str, str] = {"status": new_status}
         if comment and comment.strip():
             body["comment"] = comment.strip()
         resp = self._post_discussion_changes(
@@ -6919,7 +6739,8 @@ def delete_space_secret(self, repo_id: str, key: str, *, token: Union[bool, str,
                 https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
                 To disable authentication, pass `False`.
         """
-        r = get_session().delete(
+        r = get_session().request(
+            "DELETE",
             f"{self.endpoint}/api/spaces/{repo_id}/secrets",
             headers=self._build_hf_headers(token=token),
             json={"key": key},
@@ -6927,7 +6748,7 @@ def delete_space_secret(self, repo_id: str, key: str, *, token: Union[bool, str,
         hf_raise_for_status(r)
 
     @validate_hf_hub_args
-    def get_space_variables(self, repo_id: str, *, token: Union[bool, str, None] = None) -> Dict[str, SpaceVariable]:
+    def get_space_variables(self, repo_id: str, *, token: Union[bool, str, None] = None) -> dict[str, SpaceVariable]:
         """Gets all variables from a Space.
 
         Variables allow to set environment variables to a Space without hardcoding them.
@@ -6958,7 +6779,7 @@ def add_space_variable(
         *,
         description: Optional[str] = None,
         token: Union[bool, str, None] = None,
-    ) -> Dict[str, SpaceVariable]:
+    ) -> dict[str, SpaceVariable]:
         """Adds or updates a variable in a Space.
 
         Variables allow to set environment variables to a Space without hardcoding them.
@@ -6993,7 +6814,7 @@ def add_space_variable(
     @validate_hf_hub_args
     def delete_space_variable(
         self, repo_id: str, key: str, *, token: Union[bool, str, None] = None
-    ) -> Dict[str, SpaceVariable]:
+    ) -> dict[str, SpaceVariable]:
         """Deletes a variable from a Space.
 
         Variables allow to set environment variables to a Space without hardcoding them.
@@ -7010,7 +6831,8 @@ def delete_space_variable(
                 https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
                 To disable authentication, pass `False`.
         """
-        r = get_session().delete(
+        r = get_session().request(
+            "DELETE",
             f"{self.endpoint}/api/spaces/{repo_id}/variables",
             headers=self._build_hf_headers(token=token),
             json={"key": key},
@@ -7078,7 +6900,7 @@ def request_space_hardware(
                 " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
                 UserWarning,
             )
-        payload: Dict[str, Any] = {"flavor": hardware}
+        payload: dict[str, Any] = {"flavor": hardware}
         if sleep_time is not None:
             payload["sleepTimeSeconds"] = sleep_time
         r = get_session().post(
@@ -7234,8 +7056,8 @@ def duplicate_space(
         hardware: Optional[SpaceHardware] = None,
         storage: Optional[SpaceStorage] = None,
         sleep_time: Optional[int] = None,
-        secrets: Optional[List[Dict[str, str]]] = None,
-        variables: Optional[List[Dict[str, str]]] = None,
+        secrets: Optional[list[dict[str, str]]] = None,
+        variables: Optional[list[dict[str, str]]] = None,
     ) -> RepoUrl:
         """Duplicate a Space.
 
@@ -7266,10 +7088,10 @@ def duplicate_space(
                 your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
                 the sleep time (value is fixed to 48 hours of inactivity).
                 See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
-            secrets (`List[Dict[str, str]]`, *optional*):
+            secrets (`list[dict[str, str]]`, *optional*):
                 A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
                 For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
-            variables (`List[Dict[str, str]]`, *optional*):
+            variables (`list[dict[str, str]]`, *optional*):
                 A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
                 For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables.
 
@@ -7281,7 +7103,7 @@ def duplicate_space(
             [`~utils.RepositoryNotFoundError`]:
               If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist,
               or because it is set to `private` and you do not have access.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
               If the HuggingFace API returned an error
 
         Example:
@@ -7309,7 +7131,7 @@ def duplicate_space(
         to_repo_name = parsed_to_id.repo_name if to_id is not None else RepoUrl(from_id).repo_name  # type: ignore
 
         # repository must be a valid repo_id (namespace/repo_name).
-        payload: Dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"}
+        payload: dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"}
 
         keys = ["private", "hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"]
         values = [private, hardware, storage, sleep_time, secrets, variables]
@@ -7331,7 +7153,7 @@ def duplicate_space(
 
         try:
             hf_raise_for_status(r)
-        except HTTPError as err:
+        except HfHubHTTPError as err:
             if exist_ok and err.response.status_code == 409:
                 # Repo already exists and `exist_ok=True`
                 pass
@@ -7367,7 +7189,7 @@ def request_space_storage(
         > It is not possible to decrease persistent storage after its granted. To do so, you must delete it
         > via [`delete_space_storage`].
         """
-        payload: Dict[str, SpaceStorage] = {"tier": storage}
+        payload: dict[str, SpaceStorage] = {"tier": storage}
         r = get_session().post(
             f"{self.endpoint}/api/spaces/{repo_id}/storage",
             headers=self._build_hf_headers(token=token),
@@ -7413,7 +7235,7 @@ def delete_space_storage(
 
     def list_inference_endpoints(
         self, namespace: Optional[str] = None, *, token: Union[bool, str, None] = None
-    ) -> List[InferenceEndpoint]:
+    ) -> list[InferenceEndpoint]:
         """Lists all inference endpoints for the given namespace.
 
         Args:
@@ -7427,7 +7249,7 @@ def list_inference_endpoints(
                 To disable authentication, pass `False`.
 
         Returns:
-            List[`InferenceEndpoint`]: A list of all inference endpoints for the given namespace.
+            list[`InferenceEndpoint`]: A list of all inference endpoints for the given namespace.
 
         Example:
         ```python
@@ -7442,7 +7264,7 @@ def list_inference_endpoints(
             user = self.whoami(token=token)
 
             # List personal endpoints first
-            endpoints: List[InferenceEndpoint] = list_inference_endpoints(namespace=self._get_namespace(token=token))
+            endpoints: list[InferenceEndpoint] = list_inference_endpoints(namespace=self._get_namespace(token=token))
 
             # Then list endpoints for all orgs the user belongs to and ignore 401 errors (no billing or no access)
             for org in user.get("orgs", []):
@@ -7486,14 +7308,14 @@ def create_inference_endpoint(
         scale_to_zero_timeout: Optional[int] = None,
         revision: Optional[str] = None,
         task: Optional[str] = None,
-        custom_image: Optional[Dict] = None,
-        env: Optional[Dict[str, str]] = None,
-        secrets: Optional[Dict[str, str]] = None,
+        custom_image: Optional[dict] = None,
+        env: Optional[dict[str, str]] = None,
+        secrets: Optional[dict[str, str]] = None,
         type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
         domain: Optional[str] = None,
         path: Optional[str] = None,
         cache_http_responses: Optional[bool] = None,
-        tags: Optional[List[str]] = None,
+        tags: Optional[list[str]] = None,
         namespace: Optional[str] = None,
         token: Union[bool, str, None] = None,
     ) -> InferenceEndpoint:
@@ -7530,12 +7352,12 @@ def create_inference_endpoint(
                 The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
             task (`str`, *optional*):
                 The task on which to deploy the model (e.g. `"text-classification"`).
-            custom_image (`Dict`, *optional*):
+            custom_image (`dict`, *optional*):
                 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
                 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
-            env (`Dict[str, str]`, *optional*):
+            env (`dict[str, str]`, *optional*):
                 Non-secret environment variables to inject in the container environment.
-            secrets (`Dict[str, str]`, *optional*):
+            secrets (`dict[str, str]`, *optional*):
                 Secret values to inject in the container environment.
             type ([`InferenceEndpointType]`, *optional*):
                 The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`.
@@ -7545,7 +7367,7 @@ def create_inference_endpoint(
                 The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
             cache_http_responses (`bool`, *optional*):
                 Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`.
-            tags (`List[str]`, *optional*):
+            tags (`list[str]`, *optional*):
                 A list of tags to associate with the Inference Endpoint.
             namespace (`str`, *optional*):
                 The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
@@ -7648,7 +7470,7 @@ def create_inference_endpoint(
         else:
             image = {"huggingface": {}}
 
-        payload: Dict = {
+        payload: dict = {
             "accountId": account_id,
             "compute": {
                 "accelerator": accelerator,
@@ -7734,7 +7556,7 @@ def create_inference_endpoint_from_catalog(
         > if you have any suggestions or requests.
         """
         token = token or self.token or get_token()
-        payload: Dict = {
+        payload: dict = {
             "namespace": namespace or self._get_namespace(token=token),
             "repoId": repo_id,
         }
@@ -7752,7 +7574,7 @@ def create_inference_endpoint_from_catalog(
 
     @experimental
     @validate_hf_hub_args
-    def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> List[str]:
+    def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> list[str]:
         """List models available in the Hugging Face Inference Catalog.
 
         The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
@@ -7843,15 +7665,15 @@ def update_inference_endpoint(
         framework: Optional[str] = None,
         revision: Optional[str] = None,
         task: Optional[str] = None,
-        custom_image: Optional[Dict] = None,
-        env: Optional[Dict[str, str]] = None,
-        secrets: Optional[Dict[str, str]] = None,
+        custom_image: Optional[dict] = None,
+        env: Optional[dict[str, str]] = None,
+        secrets: Optional[dict[str, str]] = None,
         # Route update
         domain: Optional[str] = None,
         path: Optional[str] = None,
         # Other
         cache_http_responses: Optional[bool] = None,
-        tags: Optional[List[str]] = None,
+        tags: Optional[list[str]] = None,
         namespace: Optional[str] = None,
         token: Union[bool, str, None] = None,
     ) -> InferenceEndpoint:
@@ -7887,12 +7709,12 @@ def update_inference_endpoint(
                 The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
             task (`str`, *optional*):
                 The task on which to deploy the model (e.g. `"text-classification"`).
-            custom_image (`Dict`, *optional*):
+            custom_image (`dict`, *optional*):
                 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
                 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
-            env (`Dict[str, str]`, *optional*):
+            env (`dict[str, str]`, *optional*):
                 Non-secret environment variables to inject in the container environment
-            secrets (`Dict[str, str]`, *optional*):
+            secrets (`dict[str, str]`, *optional*):
                 Secret values to inject in the container environment.
 
             domain (`str`, *optional*):
@@ -7902,7 +7724,7 @@ def update_inference_endpoint(
 
             cache_http_responses (`bool`, *optional*):
                 Whether to cache HTTP responses from the Inference Endpoint.
-            tags (`List[str]`, *optional*):
+            tags (`list[str]`, *optional*):
                 A list of tags to associate with the Inference Endpoint.
 
             namespace (`str`, *optional*):
@@ -7919,7 +7741,7 @@ def update_inference_endpoint(
         namespace = namespace or self._get_namespace(token=token)
 
         # Populate only the fields that are not None
-        payload: Dict = defaultdict(lambda: defaultdict(dict))
+        payload: dict = defaultdict(lambda: defaultdict(dict))
         if accelerator is not None:
             payload["compute"]["accelerator"] = accelerator
         if instance_size is not None:
@@ -8126,8 +7948,8 @@ def _get_namespace(self, token: Union[bool, str, None] = None) -> str:
     def list_collections(
         self,
         *,
-        owner: Union[List[str], str, None] = None,
-        item: Union[List[str], str, None] = None,
+        owner: Union[list[str], str, None] = None,
+        item: Union[list[str], str, None] = None,
         sort: Optional[Literal["lastModified", "trending", "upvotes"]] = None,
         limit: Optional[int] = None,
         token: Union[bool, str, None] = None,
@@ -8139,9 +7961,9 @@ def list_collections(
         > from a collection, you must use [`get_collection`].
 
         Args:
-            owner (`List[str]` or `str`, *optional*):
+            owner (`list[str]` or `str`, *optional*):
                 Filter by owner's username.
-            item (`List[str]` or `str`, *optional*):
+            item (`list[str]` or `str`, *optional*):
                 Filter collections containing a particular items. Example: `"models/teknium/OpenHermes-2.5-Mistral-7B"`, `"datasets/squad"` or `"papers/2311.12983"`.
             sort (`Literal["lastModified", "trending", "upvotes"]`, *optional*):
                 Sort collections by last modified, trending or upvotes.
@@ -8159,7 +7981,7 @@ def list_collections(
         # Construct the API endpoint
         path = f"{self.endpoint}/api/collections"
         headers = self._build_hf_headers(token=token)
-        params: Dict = {}
+        params: dict = {}
         if owner is not None:
             params.update({"owner": owner})
         if item is not None:
@@ -8276,7 +8098,7 @@ def create_collection(
         )
         try:
             hf_raise_for_status(r)
-        except HTTPError as err:
+        except HfHubHTTPError as err:
             if exists_ok and err.response.status_code == 409:
                 # Collection already exists and `exists_ok=True`
                 slug = r.json()["slug"]
@@ -8384,7 +8206,7 @@ def delete_collection(
         )
         try:
             hf_raise_for_status(r)
-        except HTTPError as err:
+        except HfHubHTTPError as err:
             if missing_ok and err.response.status_code == 404:
                 # Collection doesn't exists and `missing_ok=True`
                 return
@@ -8424,12 +8246,12 @@ def add_collection_item(
         Returns: [`Collection`]
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the item you try to add to the collection does not exist on the Hub.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False)
 
         Example:
@@ -8455,7 +8277,7 @@ def add_collection_item(
         (...)
         ```
         """
-        payload: Dict[str, Any] = {"item": {"id": item_id, "type": item_type}}
+        payload: dict[str, Any] = {"item": {"id": item_id, "type": item_type}}
         if note is not None:
             payload["note"] = note
         r = get_session().post(
@@ -8465,7 +8287,7 @@ def add_collection_item(
         )
         try:
             hf_raise_for_status(r)
-        except HTTPError as err:
+        except HfHubHTTPError as err:
             if exists_ok and err.response.status_code == 409:
                 # Item already exists and `exists_ok=True`
                 return self.get_collection(collection_slug, token=token)
@@ -8571,7 +8393,7 @@ def delete_collection_item(
         )
         try:
             hf_raise_for_status(r)
-        except HTTPError as err:
+        except HfHubHTTPError as err:
             if missing_ok and err.response.status_code == 404:
                 # Item already deleted and `missing_ok=True`
                 return
@@ -8585,7 +8407,7 @@ def delete_collection_item(
     @validate_hf_hub_args
     def list_pending_access_requests(
         self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
-    ) -> List[AccessRequest]:
+    ) -> list[AccessRequest]:
         """
         Get pending access requests for a given gated repo.
 
@@ -8608,14 +8430,14 @@ def list_pending_access_requests(
                 To disable authentication, pass `False`.
 
         Returns:
-            `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
+            `list[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
             `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
             be populated with user's answers.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the repo is not gated.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
 
@@ -8649,7 +8471,7 @@ def list_pending_access_requests(
     @validate_hf_hub_args
     def list_accepted_access_requests(
         self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
-    ) -> List[AccessRequest]:
+    ) -> list[AccessRequest]:
         """
         Get accepted access requests for a given gated repo.
 
@@ -8674,14 +8496,14 @@ def list_accepted_access_requests(
                 To disable authentication, pass `False`.
 
         Returns:
-            `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
+            `list[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
             `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
             be populated with user's answers.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the repo is not gated.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
 
@@ -8711,7 +8533,7 @@ def list_accepted_access_requests(
     @validate_hf_hub_args
     def list_rejected_access_requests(
         self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
-    ) -> List[AccessRequest]:
+    ) -> list[AccessRequest]:
         """
         Get rejected access requests for a given gated repo.
 
@@ -8736,14 +8558,14 @@ def list_rejected_access_requests(
                 To disable authentication, pass `False`.
 
         Returns:
-            `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
+            `list[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
             `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
             be populated with user's answers.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the repo is not gated.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
 
@@ -8776,7 +8598,7 @@ def _list_access_requests(
         status: Literal["accepted", "rejected", "pending"],
         repo_type: Optional[str] = None,
         token: Union[bool, str, None] = None,
-    ) -> List[AccessRequest]:
+    ) -> list[AccessRequest]:
         if repo_type not in constants.REPO_TYPES:
             raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
         if repo_type is None:
@@ -8825,16 +8647,16 @@ def cancel_access_request(
                 To disable authentication, pass `False`.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the repo is not gated.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user does not exist on the Hub.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user access request cannot be found.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user access request is already in the pending list.
         """
         self._handle_access_request(repo_id, user, "pending", repo_type=repo_type, token=token)
@@ -8867,16 +8689,16 @@ def accept_access_request(
                 To disable authentication, pass `False`.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the repo is not gated.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user does not exist on the Hub.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user access request cannot be found.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user access request is already in the accepted list.
         """
         self._handle_access_request(repo_id, user, "accepted", repo_type=repo_type, token=token)
@@ -8917,16 +8739,16 @@ def reject_access_request(
                 To disable authentication, pass `False`.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the repo is not gated.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user does not exist on the Hub.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user access request cannot be found.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user access request is already in the rejected list.
         """
         self._handle_access_request(
@@ -8990,14 +8812,14 @@ def grant_access(
                 To disable authentication, pass `False`.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the repo is not gated.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 400 if the user already has access to the repo.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
                 or `admin` role in the organization the repo belongs to or if you passed a `read` token.
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 if the user does not exist on the Hub.
         """
         if repo_type not in constants.REPO_TYPES:
@@ -9071,7 +8893,7 @@ def get_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None)
         return webhook
 
     @validate_hf_hub_args
-    def list_webhooks(self, *, token: Union[bool, str, None] = None) -> List[WebhookInfo]:
+    def list_webhooks(self, *, token: Union[bool, str, None] = None) -> list[WebhookInfo]:
         """List all configured webhooks.
 
         Args:
@@ -9081,7 +8903,7 @@ def list_webhooks(self, *, token: Union[bool, str, None] = None) -> List[Webhook
                 To disable authentication, pass `False`.
 
         Returns:
-            `List[WebhookInfo]`:
+            `list[WebhookInfo]`:
                 List of webhook info objects.
 
         Example:
@@ -9127,8 +8949,8 @@ def create_webhook(
         *,
         url: Optional[str] = None,
         job_id: Optional[str] = None,
-        watched: List[Union[Dict, WebhookWatchedItem]],
-        domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None,
+        watched: list[Union[dict, WebhookWatchedItem]],
+        domains: Optional[list[constants.WEBHOOK_DOMAIN_T]] = None,
         secret: Optional[str] = None,
         token: Union[bool, str, None] = None,
     ) -> WebhookInfo:
@@ -9143,10 +8965,10 @@ def create_webhook(
             job_id (`str`):
                 ID of the source Job to trigger with the webhook payload in the environment variable WEBHOOK_PAYLOAD.
                 Additional environment variables are available for convenience: WEBHOOK_REPO_ID, WEBHOOK_REPO_TYPE and WEBHOOK_SECRET.
-            watched (`List[WebhookWatchedItem]`):
+            watched (`list[WebhookWatchedItem]`):
                 List of [`WebhookWatchedItem`] to be watched by the webhook. It can be users, orgs, models, datasets or spaces.
                 Watched items can also be provided as plain dictionaries.
-            domains (`List[Literal["repo", "discussion"]]`, optional):
+            domains (`list[Literal["repo", "discussion"]]`, optional):
                 List of domains to watch. It can be "repo", "discussion" or both.
             secret (`str`, optional):
                 A secret to sign the payload with.
@@ -9257,8 +9079,8 @@ def update_webhook(
         webhook_id: str,
         *,
         url: Optional[str] = None,
-        watched: Optional[List[Union[Dict, WebhookWatchedItem]]] = None,
-        domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None,
+        watched: Optional[list[Union[dict, WebhookWatchedItem]]] = None,
+        domains: Optional[list[constants.WEBHOOK_DOMAIN_T]] = None,
         secret: Optional[str] = None,
         token: Union[bool, str, None] = None,
     ) -> WebhookInfo:
@@ -9269,10 +9091,10 @@ def update_webhook(
                 The unique identifier of the webhook to be updated.
             url (`str`, optional):
                 The URL to which the payload will be sent.
-            watched (`List[WebhookWatchedItem]`, optional):
+            watched (`list[WebhookWatchedItem]`, optional):
                 List of items to watch. It can be users, orgs, models, datasets, or spaces.
                 Refer to [`WebhookWatchedItem`] for more details. Watched items can also be provided as plain dictionaries.
-            domains (`List[Literal["repo", "discussion"]]`, optional):
+            domains (`list[Literal["repo", "discussion"]]`, optional):
                 The domains to watch. This can include "repo", "discussion", or both.
             secret (`str`, optional):
                 A secret to sign the payload with, providing an additional layer of security.
@@ -9474,8 +9296,8 @@ def _build_hf_headers(
         token: Union[bool, str, None] = None,
         library_name: Optional[str] = None,
         library_version: Optional[str] = None,
-        user_agent: Union[Dict, str, None] = None,
-    ) -> Dict[str, str]:
+        user_agent: Union[dict, str, None] = None,
+    ) -> dict[str, str]:
         """
         Alias for [`build_hf_headers`] that uses the token from [`HfApi`] client
         when `token` is not provided.
@@ -9497,9 +9319,9 @@ def _prepare_folder_deletions(
         repo_type: Optional[str],
         revision: Optional[str],
         path_in_repo: str,
-        delete_patterns: Optional[Union[List[str], str]],
+        delete_patterns: Optional[Union[list[str], str]],
         token: Union[bool, str, None] = None,
-    ) -> List[CommitOperationDelete]:
+    ) -> list[CommitOperationDelete]:
         """Generate the list of Delete operations for a commit to delete files from a repo.
 
         List remote files and match them against the `delete_patterns` constraints. Returns a list of [`CommitOperationDelete`]
@@ -9535,11 +9357,11 @@ def _prepare_upload_folder_additions(
         self,
         folder_path: Union[str, Path],
         path_in_repo: str,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
         repo_type: Optional[str] = None,
         token: Union[bool, str, None] = None,
-    ) -> List[CommitOperationAdd]:
+    ) -> list[CommitOperationAdd]:
         """Generate the list of Add operations for a commit to upload a folder.
 
         Files not matching the `allow_patterns` (allowlist) and `ignore_patterns` (denylist)
@@ -9654,7 +9476,7 @@ def get_user_overview(self, username: str, token: Union[bool, str, None] = None)
             `User`: A [`User`] object with the user's overview.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 If the user does not exist on the Hub.
         """
         r = get_session().get(
@@ -9680,7 +9502,7 @@ def list_organization_members(self, organization: str, token: Union[bool, str, N
             `Iterable[User]`: A list of [`User`] objects with the members of the organization.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 If the organization does not exist on the Hub.
 
         """
@@ -9708,7 +9530,7 @@ def list_user_followers(self, username: str, token: Union[bool, str, None] = Non
             `Iterable[User]`: A list of [`User`] objects with the followers of the user.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 If the user does not exist on the Hub.
 
         """
@@ -9736,7 +9558,7 @@ def list_user_following(self, username: str, token: Union[bool, str, None] = Non
             `Iterable[User]`: A list of [`User`] objects with the users followed by the user.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 If the user does not exist on the Hub.
 
         """
@@ -9805,7 +9627,7 @@ def paper_info(self, id: str) -> PaperInfo:
             `PaperInfo`: A `PaperInfo` object.
 
         Raises:
-            [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
+            [`HfHubHTTPError`]:
                 HTTP 404 If the paper does not exist on the Hub.
         """
         path = f"{self.endpoint}/api/papers/{id}"
@@ -9881,9 +9703,9 @@ def run_job(
         self,
         *,
         image: str,
-        command: List[str],
-        env: Optional[Dict[str, Any]] = None,
-        secrets: Optional[Dict[str, Any]] = None,
+        command: list[str],
+        env: Optional[dict[str, Any]] = None,
+        secrets: Optional[dict[str, Any]] = None,
         flavor: Optional[SpaceHardware] = None,
         timeout: Optional[Union[int, float, str]] = None,
         namespace: Optional[str] = None,
@@ -9898,13 +9720,13 @@ def run_job(
                 Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`.
                 Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`.
 
-            command (`List[str]`):
+            command (`list[str]`):
                 The command to run. Example: `["echo", "hello"]`.
 
-            env (`Dict[str, Any]`, *optional*):
+            env (`dict[str, Any]`, *optional*):
                 Defines the environment variables for the Job.
 
-            secrets (`Dict[str, Any]`, *optional*):
+            secrets (`dict[str, Any]`, *optional*):
                 Defines the secret environment variables for the Job.
 
             flavor (`str`, *optional*):
@@ -10013,29 +9835,28 @@ def fetch_job_logs(
             time.sleep(sleep_time)
             sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2))
             try:
-                resp = get_session().get(
+                with get_session().stream(
+                    "GET",
                     f"https://huggingface.co/api/jobs/{namespace}/{job_id}/logs",
                     headers=self._build_hf_headers(token=token),
-                    stream=True,
                     timeout=120,
-                )
-                log = None
-                for line in resp.iter_lines(chunk_size=1):
-                    line = line.decode("utf-8")
-                    if line and line.startswith("data: {"):
-                        data = json.loads(line[len("data: ") :])
-                        # timestamp = data["timestamp"]
-                        if not data["data"].startswith("===== Job started"):
-                            logging_started = True
-                            log = data["data"]
-                            yield log
-                logging_finished = logging_started
-            except requests.exceptions.ChunkedEncodingError:
+                ) as response:
+                    log = None
+                    for line in response.iter_lines():
+                        if line and line.startswith("data: {"):
+                            data = json.loads(line[len("data: ") :])
+                            # timestamp = data["timestamp"]
+                            if not data["data"].startswith("===== Job started"):
+                                logging_started = True
+                                log = data["data"]
+                                yield log
+                    logging_finished = logging_started
+            except httpx.DecodingError:
                 # Response ended prematurely
                 break
             except KeyboardInterrupt:
                 break
-            except requests.exceptions.ConnectionError as err:
+            except httpx.NetworkError as err:
                 is_timeout = err.__context__ and isinstance(getattr(err.__context__, "__cause__", None), TimeoutError)
                 if logging_started or not is_timeout:
                     raise
@@ -10058,7 +9879,7 @@ def list_jobs(
         timeout: Optional[int] = None,
         namespace: Optional[str] = None,
         token: Union[bool, str, None] = None,
-    ) -> List[JobInfo]:
+    ) -> list[JobInfo]:
         """
         List compute Jobs on Hugging Face infrastructure.
 
@@ -10169,12 +9990,12 @@ def run_uv_job(
         self,
         script: str,
         *,
-        script_args: Optional[List[str]] = None,
-        dependencies: Optional[List[str]] = None,
+        script_args: Optional[list[str]] = None,
+        dependencies: Optional[list[str]] = None,
         python: Optional[str] = None,
         image: Optional[str] = None,
-        env: Optional[Dict[str, Any]] = None,
-        secrets: Optional[Dict[str, Any]] = None,
+        env: Optional[dict[str, Any]] = None,
+        secrets: Optional[dict[str, Any]] = None,
         flavor: Optional[SpaceHardware] = None,
         timeout: Optional[Union[int, float, str]] = None,
         namespace: Optional[str] = None,
@@ -10188,10 +10009,10 @@ def run_uv_job(
             script (`str`):
                 Path or URL of the UV script, or a command.
 
-            script_args (`List[str]`, *optional*)
+            script_args (`list[str]`, *optional*)
                 Arguments to pass to the script or command.
 
-            dependencies (`List[str]`, *optional*)
+            dependencies (`list[str]`, *optional*)
                 Dependencies to use to run the UV script.
 
             python (`str`, *optional*)
@@ -10200,10 +10021,10 @@ def run_uv_job(
             image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"):
                 Use a custom Docker image with `uv` installed.
 
-            env (`Dict[str, Any]`, *optional*):
+            env (`dict[str, Any]`, *optional*):
                 Defines the environment variables for the Job.
 
-            secrets (`Dict[str, Any]`, *optional*):
+            secrets (`dict[str, Any]`, *optional*):
                 Defines the secret environment variables for the Job.
 
             flavor (`str`, *optional*):
@@ -10283,12 +10104,12 @@ def create_scheduled_job(
         self,
         *,
         image: str,
-        command: List[str],
+        command: list[str],
         schedule: str,
         suspend: Optional[bool] = None,
         concurrency: Optional[bool] = None,
-        env: Optional[Dict[str, Any]] = None,
-        secrets: Optional[Dict[str, Any]] = None,
+        env: Optional[dict[str, Any]] = None,
+        secrets: Optional[dict[str, Any]] = None,
         flavor: Optional[SpaceHardware] = None,
         timeout: Optional[Union[int, float, str]] = None,
         namespace: Optional[str] = None,
@@ -10303,7 +10124,7 @@ def create_scheduled_job(
                 Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`.
                 Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`.
 
-            command (`List[str]`):
+            command (`list[str]`):
                 The command to run. Example: `["echo", "hello"]`.
 
             schedule (`str`):
@@ -10316,10 +10137,10 @@ def create_scheduled_job(
             concurrency (`bool`, *optional*):
                 If True, multiple instances of this Job can run concurrently. Defaults to False.
 
-            env (`Dict[str, Any]`, *optional*):
+            env (`dict[str, Any]`, *optional*):
                 Defines the environment variables for the Job.
 
-            secrets (`Dict[str, Any]`, *optional*):
+            secrets (`dict[str, Any]`, *optional*):
                 Defines the secret environment variables for the Job.
 
             flavor (`str`, *optional*):
@@ -10375,7 +10196,7 @@ def create_scheduled_job(
             flavor=flavor,
             timeout=timeout,
         )
-        input_json: Dict[str, Any] = {
+        input_json: dict[str, Any] = {
             "jobSpec": job_spec,
             "schedule": schedule,
         }
@@ -10398,7 +10219,7 @@ def list_scheduled_jobs(
         timeout: Optional[int] = None,
         namespace: Optional[str] = None,
         token: Union[bool, str, None] = None,
-    ) -> List[ScheduledJobInfo]:
+    ) -> list[ScheduledJobInfo]:
         """
         List scheduled compute Jobs on Hugging Face infrastructure.
 
@@ -10556,15 +10377,15 @@ def create_scheduled_uv_job(
         self,
         script: str,
         *,
-        script_args: Optional[List[str]] = None,
+        script_args: Optional[list[str]] = None,
         schedule: str,
         suspend: Optional[bool] = None,
         concurrency: Optional[bool] = None,
-        dependencies: Optional[List[str]] = None,
+        dependencies: Optional[list[str]] = None,
         python: Optional[str] = None,
         image: Optional[str] = None,
-        env: Optional[Dict[str, Any]] = None,
-        secrets: Optional[Dict[str, Any]] = None,
+        env: Optional[dict[str, Any]] = None,
+        secrets: Optional[dict[str, Any]] = None,
         flavor: Optional[SpaceHardware] = None,
         timeout: Optional[Union[int, float, str]] = None,
         namespace: Optional[str] = None,
@@ -10578,7 +10399,7 @@ def create_scheduled_uv_job(
             script (`str`):
                 Path or URL of the UV script, or a command.
 
-            script_args (`List[str]`, *optional*)
+            script_args (`list[str]`, *optional*)
                 Arguments to pass to the script, or a command.
 
             schedule (`str`):
@@ -10591,7 +10412,7 @@ def create_scheduled_uv_job(
             concurrency (`bool`, *optional*):
                 If True, multiple instances of this Job can run concurrently. Defaults to False.
 
-            dependencies (`List[str]`, *optional*)
+            dependencies (`list[str]`, *optional*)
                 Dependencies to use to run the UV script.
 
             python (`str`, *optional*)
@@ -10600,10 +10421,10 @@ def create_scheduled_uv_job(
             image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"):
                 Use a custom Docker image with `uv` installed.
 
-            env (`Dict[str, Any]`, *optional*):
+            env (`dict[str, Any]`, *optional*):
                 Defines the environment variables for the Job.
 
-            secrets (`Dict[str, Any]`, *optional*):
+            secrets (`dict[str, Any]`, *optional*):
                 Defines the secret environment variables for the Job.
 
             flavor (`str`, *optional*):
@@ -10683,15 +10504,15 @@ def _create_uv_command_env_and_secrets(
         self,
         *,
         script: str,
-        script_args: Optional[List[str]],
-        dependencies: Optional[List[str]],
+        script_args: Optional[list[str]],
+        dependencies: Optional[list[str]],
         python: Optional[str],
-        env: Optional[Dict[str, Any]],
-        secrets: Optional[Dict[str, Any]],
+        env: Optional[dict[str, Any]],
+        secrets: Optional[dict[str, Any]],
         namespace: Optional[str],
         token: Union[bool, str, None],
         _repo: Optional[str],
-    ) -> Tuple[List[str], Dict[str, Any], Dict[str, Any]]:
+    ) -> tuple[list[str], dict[str, Any], dict[str, Any]]:
         env = env or {}
         secrets = secrets or {}
 
@@ -10823,7 +10644,6 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
 
 whoami = api.whoami
 auth_check = api.auth_check
-get_token_permission = api.get_token_permission
 
 list_models = api.list_models
 model_info = api.model_info
@@ -10853,7 +10673,6 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
 create_commit = api.create_commit
 create_repo = api.create_repo
 delete_repo = api.delete_repo
-update_repo_visibility = api.update_repo_visibility
 update_repo_settings = api.update_repo_settings
 move_repo = api.move_repo
 upload_file = api.upload_file
diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index 1c7ef8893a..ee000145b4 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -2,24 +2,25 @@
 import re
 import tempfile
 from collections import deque
+from contextlib import ExitStack
 from dataclasses import dataclass, field
 from datetime import datetime
 from itertools import chain
 from pathlib import Path
-from typing import Any, Dict, Iterator, List, NoReturn, Optional, Tuple, Union
+from typing import Any, Iterator, NoReturn, Optional, Union
 from urllib.parse import quote, unquote
 
 import fsspec
+import httpx
 from fsspec.callbacks import _DEFAULT_CALLBACK, NoOpCallback, TqdmCallback
 from fsspec.utils import isfilelike
-from requests import Response
 
 from . import constants
 from ._commit_api import CommitOperationCopy, CommitOperationDelete
-from .errors import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
+from .errors import EntryNotFoundError, HfHubHTTPError, RepositoryNotFoundError, RevisionNotFoundError
 from .file_download import hf_hub_url, http_get
 from .hf_api import HfApi, LastCommitInfo, RepoFile
-from .utils import HFValidationError, hf_raise_for_status, http_backoff
+from .utils import HFValidationError, hf_raise_for_status, http_backoff, http_stream_backoff
 
 
 # Regex used to match special revisions with "/" in them (see #1710)
@@ -110,13 +111,13 @@ def __init__(
         # Maps (repo_type, repo_id, revision) to a 2-tuple with:
         #  * the 1st element indicating whether the repositoy and the revision exist
         #  * the 2nd element being the exception raised if the repository or revision doesn't exist
-        self._repo_and_revision_exists_cache: Dict[
-            Tuple[str, str, Optional[str]], Tuple[bool, Optional[Exception]]
+        self._repo_and_revision_exists_cache: dict[
+            tuple[str, str, Optional[str]], tuple[bool, Optional[Exception]]
         ] = {}
 
     def _repo_and_revision_exist(
         self, repo_type: str, repo_id: str, revision: Optional[str]
-    ) -> Tuple[bool, Optional[Exception]]:
+    ) -> tuple[bool, Optional[Exception]]:
         if (repo_type, repo_id, revision) not in self._repo_and_revision_exists_cache:
             try:
                 self._api.repo_info(
@@ -332,7 +333,7 @@ def rm(
 
     def ls(
         self, path: str, detail: bool = True, refresh: bool = False, revision: Optional[str] = None, **kwargs
-    ) -> List[Union[str, Dict[str, Any]]]:
+    ) -> list[Union[str, dict[str, Any]]]:
         """
         List the contents of a directory.
 
@@ -353,7 +354,7 @@ def ls(
                 The git revision to list from.
 
         Returns:
-            `List[Union[str, Dict[str, Any]]]`: List of file paths (if detail=False) or list of file information
+            `list[Union[str, dict[str, Any]]]`: List of file paths (if detail=False) or list of file information
             dictionaries (if detail=True).
         """
         resolved_path = self.resolve_path(path, revision=revision)
@@ -488,7 +489,7 @@ def _ls_tree(
                     out.append(cache_path_info)
         return out
 
-    def walk(self, path: str, *args, **kwargs) -> Iterator[Tuple[str, List[str], List[str]]]:
+    def walk(self, path: str, *args, **kwargs) -> Iterator[tuple[str, list[str], list[str]]]:
         """
         Return all files below the given path.
 
@@ -499,12 +500,12 @@ def walk(self, path: str, *args, **kwargs) -> Iterator[Tuple[str, List[str], Lis
                 Root path to list files from.
 
         Returns:
-            `Iterator[Tuple[str, List[str], List[str]]]`: An iterator of (path, list of directory names, list of file names) tuples.
+            `Iterator[tuple[str, list[str], list[str]]]`: An iterator of (path, list of directory names, list of file names) tuples.
         """
         path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
         yield from super().walk(path, *args, **kwargs)
 
-    def glob(self, path: str, **kwargs) -> List[str]:
+    def glob(self, path: str, **kwargs) -> list[str]:
         """
         Find files by glob-matching.
 
@@ -515,7 +516,7 @@ def glob(self, path: str, **kwargs) -> List[str]:
                 Path pattern to match.
 
         Returns:
-            `List[str]`: List of paths matching the pattern.
+            `list[str]`: List of paths matching the pattern.
         """
         path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
         return super().glob(path, **kwargs)
@@ -529,7 +530,7 @@ def find(
         refresh: bool = False,
         revision: Optional[str] = None,
         **kwargs,
-    ) -> Union[List[str], Dict[str, Dict[str, Any]]]:
+    ) -> Union[list[str], dict[str, dict[str, Any]]]:
         """
         List all files below path.
 
@@ -550,7 +551,7 @@ def find(
                 The git revision to list from.
 
         Returns:
-            `Union[List[str], Dict[str, Dict[str, Any]]]`: List of paths or dict of file information.
+            `Union[list[str], dict[str, dict[str, Any]]]`: List of paths or dict of file information.
         """
         if maxdepth is not None and maxdepth < 1:
             raise ValueError("maxdepth must be at least 1")
@@ -655,7 +656,7 @@ def modified(self, path: str, **kwargs) -> datetime:
         info = self.info(path, **{**kwargs, "expand_info": True})
         return info["last_commit"]["date"]
 
-    def info(self, path: str, refresh: bool = False, revision: Optional[str] = None, **kwargs) -> Dict[str, Any]:
+    def info(self, path: str, refresh: bool = False, revision: Optional[str] = None, **kwargs) -> dict[str, Any]:
         """
         Get information about a file or directory.
 
@@ -673,7 +674,7 @@ def info(self, path: str, refresh: bool = False, revision: Optional[str] = None,
                 The git revision to get info from.
 
         Returns:
-            `Dict[str, Any]`: Dictionary containing file information (type, size, commit info, etc.).
+            `dict[str, Any]`: Dictionary containing file information (type, size, commit info, etc.).
 
         """
         resolved_path = self.resolve_path(path, revision=revision)
@@ -1029,8 +1030,9 @@ def __init__(
         super().__init__(
             fs, self.resolved_path.unresolve(), mode=mode, block_size=block_size, cache_type=cache_type, **kwargs
         )
-        self.response: Optional[Response] = None
+        self.response: Optional[httpx.Response] = None
         self.fs: HfFileSystem
+        self._exit_stack = ExitStack()
 
     def seek(self, loc: int, whence: int = 0):
         if loc == 0 and whence == 1:
@@ -1040,53 +1042,32 @@ def seek(self, loc: int, whence: int = 0):
         raise ValueError("Cannot seek streaming HF file")
 
     def read(self, length: int = -1):
-        read_args = (length,) if length >= 0 else ()
+        """Read the remote file.
+
+        If the file is already open, we reuse the connection.
+        Otherwise, open a new connection and read from it.
+
+        If reading the stream fails, we retry with a new connection.
+        """
         if self.response is None:
-            url = hf_hub_url(
-                repo_id=self.resolved_path.repo_id,
-                revision=self.resolved_path.revision,
-                filename=self.resolved_path.path_in_repo,
-                repo_type=self.resolved_path.repo_type,
-                endpoint=self.fs.endpoint,
-            )
-            self.response = http_backoff(
-                "GET",
-                url,
-                headers=self.fs._api._build_hf_headers(),
-                stream=True,
-                timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
-            )
-            hf_raise_for_status(self.response)
-        try:
-            self.response.raw.decode_content = True
-            out = self.response.raw.read(*read_args)
-        except Exception:
-            self.response.close()
+            self._open_connection()
 
-            # Retry by recreating the connection
-            url = hf_hub_url(
-                repo_id=self.resolved_path.repo_id,
-                revision=self.resolved_path.revision,
-                filename=self.resolved_path.path_in_repo,
-                repo_type=self.resolved_path.repo_type,
-                endpoint=self.fs.endpoint,
-            )
-            self.response = http_backoff(
-                "GET",
-                url,
-                headers={"Range": "bytes=%d-" % self.loc, **self.fs._api._build_hf_headers()},
-                stream=True,
-                timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
-            )
-            hf_raise_for_status(self.response)
+        retried_once = False
+        while True:
             try:
-                self.response.raw.decode_content = True
-                out = self.response.raw.read(*read_args)
+                if self.response is None:
+                    return b""  # Already read the entire file
+                out = _partial_read(self.response, length)
+                self.loc += len(out)
+                return out
             except Exception:
-                self.response.close()
-                raise
-        self.loc += len(out)
-        return out
+                if self.response is not None:
+                    self.response.close()
+                if retried_once:  # Already retried once, give up
+                    raise
+                # First failure, retry with range header
+                self._open_connection()
+                retried_once = True
 
     def url(self) -> str:
         return self.fs.url(self.path)
@@ -1095,11 +1076,43 @@ def __del__(self):
         if not hasattr(self, "resolved_path"):
             # Means that the constructor failed. Nothing to do.
             return
+        self._exit_stack.close()
         return super().__del__()
 
     def __reduce__(self):
         return reopen, (self.fs, self.path, self.mode, self.blocksize, self.cache.name)
 
+    def _open_connection(self):
+        """Open a connection to the remote file."""
+        url = hf_hub_url(
+            repo_id=self.resolved_path.repo_id,
+            revision=self.resolved_path.revision,
+            filename=self.resolved_path.path_in_repo,
+            repo_type=self.resolved_path.repo_type,
+            endpoint=self.fs.endpoint,
+        )
+        headers = self.fs._api._build_hf_headers()
+        if self.loc > 0:
+            headers["Range"] = f"bytes={self.loc}-"
+        self.response = self._exit_stack.enter_context(
+            http_stream_backoff(
+                "GET",
+                url,
+                headers=headers,
+                retry_on_status_codes=(500, 502, 503, 504),
+                timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
+            )
+        )
+
+        try:
+            hf_raise_for_status(self.response)
+        except HfHubHTTPError as e:
+            if e.response.status_code == 416:
+                # Range not satisfiable => means that we have already read the entire file
+                self.response = None
+                return
+            raise
+
 
 def safe_revision(revision: str) -> str:
     return revision if SPECIAL_REFS_REVISION_REGEX.match(revision) else safe_quote(revision)
@@ -1122,3 +1135,26 @@ def _raise_file_not_found(path: str, err: Optional[Exception]) -> NoReturn:
 
 def reopen(fs: HfFileSystem, path: str, mode: str, block_size: int, cache_type: str):
     return fs.open(path, mode=mode, block_size=block_size, cache_type=cache_type)
+
+
+def _partial_read(response: httpx.Response, length: int = -1) -> bytes:
+    """
+    Read up to `length` bytes from a streamed response.
+    If length == -1, read until EOF.
+    """
+    buf = bytearray()
+    if length < -1:
+        raise ValueError("length must be -1 or >= 0")
+    if length == 0:
+        return b""
+    if length == -1:
+        for chunk in response.iter_bytes():
+            buf.extend(chunk)
+        return bytes(buf)
+
+    for chunk in response.iter_bytes(chunk_size=length):
+        buf.extend(chunk)
+        if len(buf) >= length:
+            return bytes(buf[:length])
+
+    return bytes(buf)  # may be < length if response ended
diff --git a/src/huggingface_hub/hub_mixin.py b/src/huggingface_hub/hub_mixin.py
index 9fa702ceda..6397bde121 100644
--- a/src/huggingface_hub/hub_mixin.py
+++ b/src/huggingface_hub/hub_mixin.py
@@ -3,7 +3,7 @@
 import os
 from dataclasses import Field, asdict, dataclass, is_dataclass
 from pathlib import Path
-from typing import Any, Callable, ClassVar, Dict, List, Optional, Protocol, Tuple, Type, TypeVar, Union
+from typing import Any, Callable, ClassVar, Optional, Protocol, Type, TypeVar, Union
 
 import packaging.version
 
@@ -38,7 +38,7 @@
 
 # Type alias for dataclass instances, copied from https://github.com/python/typeshed/blob/9f28171658b9ca6c32a7cb93fbb99fc92b17858b/stdlib/_typeshed/__init__.pyi#L349
 class DataclassInstance(Protocol):
-    __dataclass_fields__: ClassVar[Dict[str, Field]]
+    __dataclass_fields__: ClassVar[dict[str, Field]]
 
 
 # Generic variable that is either ModelHubMixin or a subclass thereof
@@ -47,7 +47,7 @@ class DataclassInstance(Protocol):
 ARGS_T = TypeVar("ARGS_T")
 ENCODER_T = Callable[[ARGS_T], Any]
 DECODER_T = Callable[[Any], ARGS_T]
-CODER_T = Tuple[ENCODER_T, DECODER_T]
+CODER_T = tuple[ENCODER_T, DECODER_T]
 
 
 DEFAULT_MODEL_CARD = """
@@ -96,7 +96,7 @@ class ModelHubMixin:
             URL of the library documentation. Used to generate model card.
         model_card_template (`str`, *optional*):
             Template of the model card. Used to generate model card. Defaults to a generic template.
-        language (`str` or `List[str]`, *optional*):
+        language (`str` or `list[str]`, *optional*):
             Language supported by the library. Used to generate model card.
         library_name (`str`, *optional*):
             Name of the library integrating ModelHubMixin. Used to generate model card.
@@ -113,9 +113,9 @@ class ModelHubMixin:
             E.g: "https://coqui.ai/cpml".
         pipeline_tag (`str`, *optional*):
             Tag of the pipeline. Used to generate model card. E.g. "text-classification".
-        tags (`List[str]`, *optional*):
+        tags (`list[str]`, *optional*):
             Tags to be added to the model card. Used to generate model card. E.g. ["computer-vision"]
-        coders (`Dict[Type, Tuple[Callable, Callable]]`, *optional*):
+        coders (`dict[Type, tuple[Callable, Callable]]`, *optional*):
             Dictionary of custom types and their encoders/decoders. Used to encode/decode arguments that are not
             jsonable by default. E.g dataclasses, argparse.Namespace, OmegaConf, etc.
 
@@ -145,12 +145,10 @@ class ModelHubMixin:
     ...
     ...     @classmethod
     ...     def from_pretrained(
-    ...         cls: Type[T],
+    ...         cls: type[T],
     ...         pretrained_model_name_or_path: Union[str, Path],
     ...         *,
     ...         force_download: bool = False,
-    ...         resume_download: Optional[bool] = None,
-    ...         proxies: Optional[Dict] = None,
     ...         token: Optional[Union[str, bool]] = None,
     ...         cache_dir: Optional[Union[str, Path]] = None,
     ...         local_files_only: bool = False,
@@ -188,10 +186,10 @@ class ModelHubMixin:
     _hub_mixin_info: MixinInfo
     # ^ information about the library integrating ModelHubMixin (used to generate model card)
     _hub_mixin_inject_config: bool  # whether `_from_pretrained` expects `config` or not
-    _hub_mixin_init_parameters: Dict[str, inspect.Parameter]  # __init__ parameters
-    _hub_mixin_jsonable_default_values: Dict[str, Any]  # default values for __init__ parameters
-    _hub_mixin_jsonable_custom_types: Tuple[Type, ...]  # custom types that can be encoded/decoded
-    _hub_mixin_coders: Dict[Type, CODER_T]  # encoders/decoders for custom types
+    _hub_mixin_init_parameters: dict[str, inspect.Parameter]  # __init__ parameters
+    _hub_mixin_jsonable_default_values: dict[str, Any]  # default values for __init__ parameters
+    _hub_mixin_jsonable_custom_types: tuple[Type, ...]  # custom types that can be encoded/decoded
+    _hub_mixin_coders: dict[Type, CODER_T]  # encoders/decoders for custom types
     # ^ internal values to handle config
 
     def __init_subclass__(
@@ -204,16 +202,16 @@ def __init_subclass__(
         # Model card template
         model_card_template: str = DEFAULT_MODEL_CARD,
         # Model card metadata
-        language: Optional[List[str]] = None,
+        language: Optional[list[str]] = None,
         library_name: Optional[str] = None,
         license: Optional[str] = None,
         license_name: Optional[str] = None,
         license_link: Optional[str] = None,
         pipeline_tag: Optional[str] = None,
-        tags: Optional[List[str]] = None,
+        tags: Optional[list[str]] = None,
         # How to encode/decode arguments with custom type into a JSON config?
         coders: Optional[
-            Dict[Type, CODER_T]
+            dict[Type, CODER_T]
             # Key is a type.
             # Value is a tuple (encoder, decoder).
             # Example: {MyCustomType: (lambda x: x.value, lambda data: MyCustomType(data))}
@@ -288,7 +286,7 @@ def __init_subclass__(
         }
         cls._hub_mixin_inject_config = "config" in inspect.signature(cls._from_pretrained).parameters
 
-    def __new__(cls: Type[T], *args, **kwargs) -> T:
+    def __new__(cls: type[T], *args, **kwargs) -> T:
         """Create a new instance of the class and handle config.
 
         3 cases:
@@ -364,7 +362,7 @@ def _encode_arg(cls, arg: Any) -> Any:
         return arg
 
     @classmethod
-    def _decode_arg(cls, expected_type: Type[ARGS_T], value: Any) -> Optional[ARGS_T]:
+    def _decode_arg(cls, expected_type: type[ARGS_T], value: Any) -> Optional[ARGS_T]:
         """Decode a JSON serializable value into an argument."""
         if is_simple_optional_type(expected_type):
             if value is None:
@@ -387,7 +385,7 @@ def save_pretrained(
         config: Optional[Union[dict, DataclassInstance]] = None,
         repo_id: Optional[str] = None,
         push_to_hub: bool = False,
-        model_card_kwargs: Optional[Dict[str, Any]] = None,
+        model_card_kwargs: Optional[dict[str, Any]] = None,
         **push_to_hub_kwargs,
     ) -> Optional[str]:
         """
@@ -403,7 +401,7 @@ def save_pretrained(
             repo_id (`str`, *optional*):
                 ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to the folder name if
                 not provided.
-            model_card_kwargs (`Dict[str, Any]`, *optional*):
+            model_card_kwargs (`dict[str, Any]`, *optional*):
                 Additional arguments passed to the model card template to customize the model card.
             push_to_hub_kwargs:
                 Additional key word arguments passed along to the [`~ModelHubMixin.push_to_hub`] method.
@@ -462,12 +460,10 @@ def _save_pretrained(self, save_directory: Path) -> None:
     @classmethod
     @validate_hf_hub_args
     def from_pretrained(
-        cls: Type[T],
+        cls: type[T],
         pretrained_model_name_or_path: Union[str, Path],
         *,
         force_download: bool = False,
-        resume_download: Optional[bool] = None,
-        proxies: Optional[Dict] = None,
         token: Optional[Union[str, bool]] = None,
         cache_dir: Optional[Union[str, Path]] = None,
         local_files_only: bool = False,
@@ -488,9 +484,6 @@ def from_pretrained(
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
                 the existing cache.
-            proxies (`Dict[str, str]`, *optional*):
-                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
-                'http://hostname': 'foo.bar:4012'}`. The proxies are used on every request.
             token (`str` or `bool`, *optional*):
                 The token to use as HTTP bearer authorization for remote files. By default, it will use the token
                 cached when running `hf auth login`.
@@ -498,7 +491,7 @@ def from_pretrained(
                 Path to the folder where cached files are stored.
             local_files_only (`bool`, *optional*, defaults to `False`):
                 If `True`, avoid downloading the file and return the path to the local cached file if it exists.
-            model_kwargs (`Dict`, *optional*):
+            model_kwargs (`dict`, *optional*):
                 Additional kwargs to pass to the model during initialization.
         """
         model_id = str(pretrained_model_name_or_path)
@@ -516,8 +509,6 @@ def from_pretrained(
                     revision=revision,
                     cache_dir=cache_dir,
                     force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
                     token=token,
                     local_files_only=local_files_only,
                 )
@@ -557,7 +548,7 @@ def from_pretrained(
                     if key not in model_kwargs and key in config:
                         model_kwargs[key] = config[key]
             elif any(param.kind == inspect.Parameter.VAR_KEYWORD for param in cls._hub_mixin_init_parameters.values()):
-                for key, value in config.items():
+                for key, value in config.items():  # type: ignore[union-attr]
                     if key not in model_kwargs:
                         model_kwargs[key] = value
 
@@ -570,8 +561,6 @@ def from_pretrained(
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
-            proxies=proxies,
-            resume_download=resume_download,
             local_files_only=local_files_only,
             token=token,
             **model_kwargs,
@@ -586,14 +575,12 @@ def from_pretrained(
 
     @classmethod
     def _from_pretrained(
-        cls: Type[T],
+        cls: type[T],
         *,
         model_id: str,
         revision: Optional[str],
         cache_dir: Optional[Union[str, Path]],
         force_download: bool,
-        proxies: Optional[Dict],
-        resume_download: Optional[bool],
         local_files_only: bool,
         token: Optional[Union[str, bool]],
         **model_kwargs,
@@ -616,9 +603,6 @@ def _from_pretrained(
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
                 the existing cache.
-            proxies (`Dict[str, str]`, *optional*):
-                A dictionary of proxy servers to use by protocol or endpoint (e.g., `{'http': 'foo.bar:3128',
-                'http://hostname': 'foo.bar:4012'}`).
             token (`str` or `bool`, *optional*):
                 The token to use as HTTP bearer authorization for remote files. By default, it will use the token
                 cached when running `hf auth login`.
@@ -642,10 +626,10 @@ def push_to_hub(
         token: Optional[str] = None,
         branch: Optional[str] = None,
         create_pr: Optional[bool] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
-        delete_patterns: Optional[Union[List[str], str]] = None,
-        model_card_kwargs: Optional[Dict[str, Any]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
+        delete_patterns: Optional[Union[list[str], str]] = None,
+        model_card_kwargs: Optional[dict[str, Any]] = None,
     ) -> str:
         """
         Upload model checkpoint to the Hub.
@@ -671,13 +655,13 @@ def push_to_hub(
                 The git branch on which to push the model. This defaults to `"main"`.
             create_pr (`boolean`, *optional*):
                 Whether or not to create a Pull Request from `branch` with that commit. Defaults to `False`.
-            allow_patterns (`List[str]` or `str`, *optional*):
+            allow_patterns (`list[str]` or `str`, *optional*):
                 If provided, only files matching at least one pattern are pushed.
-            ignore_patterns (`List[str]` or `str`, *optional*):
+            ignore_patterns (`list[str]` or `str`, *optional*):
                 If provided, files matching any of the patterns are not pushed.
-            delete_patterns (`List[str]` or `str`, *optional*):
+            delete_patterns (`list[str]` or `str`, *optional*):
                 If provided, remote files matching any of the patterns will be deleted from the repo.
-            model_card_kwargs (`Dict[str, Any]`, *optional*):
+            model_card_kwargs (`dict[str, Any]`, *optional*):
                 Additional arguments passed to the model card template to customize the model card.
 
         Returns:
@@ -760,7 +744,7 @@ class PyTorchModelHubMixin(ModelHubMixin):
     ```
     """
 
-    def __init_subclass__(cls, *args, tags: Optional[List[str]] = None, **kwargs) -> None:
+    def __init_subclass__(cls, *args, tags: Optional[list[str]] = None, **kwargs) -> None:
         tags = tags or []
         tags.append("pytorch_model_hub_mixin")
         kwargs["tags"] = tags
@@ -779,8 +763,6 @@ def _from_pretrained(
         revision: Optional[str],
         cache_dir: Optional[Union[str, Path]],
         force_download: bool,
-        proxies: Optional[Dict],
-        resume_download: Optional[bool],
         local_files_only: bool,
         token: Union[str, bool, None],
         map_location: str = "cpu",
@@ -801,8 +783,6 @@ def _from_pretrained(
                     revision=revision,
                     cache_dir=cache_dir,
                     force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
                     token=token,
                     local_files_only=local_files_only,
                 )
@@ -814,8 +794,6 @@ def _from_pretrained(
                     revision=revision,
                     cache_dir=cache_dir,
                     force_download=force_download,
-                    proxies=proxies,
-                    resume_download=resume_download,
                     token=token,
                     local_files_only=local_files_only,
                 )
@@ -845,7 +823,7 @@ def _load_as_safetensor(cls, model: T, model_file: str, map_location: str, stric
         return model
 
 
-def _load_dataclass(datacls: Type[DataclassInstance], data: dict) -> DataclassInstance:
+def _load_dataclass(datacls: type[DataclassInstance], data: dict) -> DataclassInstance:
     """Load a dataclass instance from a dictionary.
 
     Fields not expected by the dataclass are ignored.
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
index 092512bf33..23888f5228 100644
--- a/src/huggingface_hub/inference/_client.py
+++ b/src/huggingface_hub/inference/_client.py
@@ -34,14 +34,14 @@
 # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
 import base64
 import logging
+import os
 import re
 import warnings
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
-
-from requests import HTTPError
+from contextlib import ExitStack
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
 
 from huggingface_hub import constants
-from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
+from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
 from huggingface_hub.inference._common import (
     TASKS_EXPECTING_IMAGES,
     ContentT,
@@ -101,7 +101,12 @@
     ZeroShotImageClassificationOutputElement,
 )
 from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
-from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
+from huggingface_hub.utils import (
+    build_hf_headers,
+    get_session,
+    hf_raise_for_status,
+    validate_hf_hub_args,
+)
 from huggingface_hub.utils._auth import get_token
 
 
@@ -139,16 +144,14 @@ class InferenceClient:
             arguments are mutually exclusive and have the exact same behavior.
         timeout (`float`, `optional`):
             The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
-        headers (`Dict[str, str]`, `optional`):
+        headers (`dict[str, str]`, `optional`):
             Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
             Values in this dictionary will override the default values.
         bill_to (`str`, `optional`):
             The billing account to use for the requests. By default the requests are billed on the user's account.
             Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
-        cookies (`Dict[str, str]`, `optional`):
+        cookies (`dict[str, str]`, `optional`):
             Additional cookies to send to the server.
-        proxies (`Any`, `optional`):
-            Proxies to use for the request.
         base_url (`str`, `optional`):
             Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
             follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -157,6 +160,7 @@ class InferenceClient:
             follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
     """
 
+    @validate_hf_hub_args
     def __init__(
         self,
         model: Optional[str] = None,
@@ -164,9 +168,8 @@ def __init__(
         provider: Optional[PROVIDER_OR_POLICY_T] = None,
         token: Optional[str] = None,
         timeout: Optional[float] = None,
-        headers: Optional[Dict[str, str]] = None,
-        cookies: Optional[Dict[str, str]] = None,
-        proxies: Optional[Any] = None,
+        headers: Optional[dict[str, str]] = None,
+        cookies: Optional[dict[str, str]] = None,
         bill_to: Optional[str] = None,
         # OpenAI compatibility
         base_url: Optional[str] = None,
@@ -228,11 +231,21 @@ def __init__(
 
         self.cookies = cookies
         self.timeout = timeout
-        self.proxies = proxies
+
+        self.exit_stack = ExitStack()
 
     def __repr__(self):
         return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.exit_stack.close()
+
+    def close(self):
+        self.exit_stack.close()
+
     @overload
     def _inner_post(  # type: ignore[misc]
         self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -241,44 +254,46 @@ def _inner_post(  # type: ignore[misc]
     @overload
     def _inner_post(  # type: ignore[misc]
         self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
-    ) -> Iterable[bytes]: ...
+    ) -> Iterable[str]: ...
 
     @overload
     def _inner_post(
         self, request_parameters: RequestParameters, *, stream: bool = False
-    ) -> Union[bytes, Iterable[bytes]]: ...
+    ) -> Union[bytes, Iterable[str]]: ...
 
     def _inner_post(
         self, request_parameters: RequestParameters, *, stream: bool = False
-    ) -> Union[bytes, Iterable[bytes]]:
+    ) -> Union[bytes, Iterable[str]]:
         """Make a request to the inference server."""
         # TODO: this should be handled in provider helpers directly
         if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
             request_parameters.headers["Accept"] = "image/png"
 
         try:
-            response = get_session().post(
-                request_parameters.url,
-                json=request_parameters.json,
-                data=request_parameters.data,
-                headers=request_parameters.headers,
-                cookies=self.cookies,
-                timeout=self.timeout,
-                stream=stream,
-                proxies=self.proxies,
+            response = self.exit_stack.enter_context(
+                get_session().stream(
+                    "POST",
+                    request_parameters.url,
+                    json=request_parameters.json,
+                    content=request_parameters.data,
+                    headers=request_parameters.headers,
+                    cookies=self.cookies,
+                    timeout=self.timeout,
+                )
             )
+            hf_raise_for_status(response)
+            if stream:
+                return response.iter_lines()
+            else:
+                return response.read()
         except TimeoutError as error:
             # Convert any `TimeoutError` to a `InferenceTimeoutError`
             raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error  # type: ignore
-
-        try:
-            hf_raise_for_status(response)
-            return response.iter_lines() if stream else response.content
-        except HTTPError as error:
+        except HfHubHTTPError as error:
             if error.response.status_code == 422 and request_parameters.task != "unknown":
                 msg = str(error.args[0])
                 if len(error.response.text) > 0:
-                    msg += f"\n{error.response.text}\n"
+                    msg += f"{os.linesep}{error.response.text}{os.linesep}"
                 error.args = (msg,) + error.args[1:]
             raise
 
@@ -289,7 +304,7 @@ def audio_classification(
         model: Optional[str] = None,
         top_k: Optional[int] = None,
         function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
-    ) -> List[AudioClassificationOutputElement]:
+    ) -> list[AudioClassificationOutputElement]:
         """
         Perform audio classification on the provided audio content.
 
@@ -307,12 +322,12 @@ def audio_classification(
                 The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
-            `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -344,7 +359,7 @@ def audio_to_audio(
         audio: ContentT,
         *,
         model: Optional[str] = None,
-    ) -> List[AudioToAudioOutputElement]:
+    ) -> list[AudioToAudioOutputElement]:
         """
         Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
 
@@ -358,12 +373,12 @@ def audio_to_audio(
                 audio_to_audio will be used.
 
         Returns:
-            `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
+            `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
 
         Raises:
             `InferenceTimeoutError`:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -396,7 +411,7 @@ def automatic_speech_recognition(
         audio: ContentT,
         *,
         model: Optional[str] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> AutomaticSpeechRecognitionOutput:
         """
         Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -407,7 +422,7 @@ def automatic_speech_recognition(
             model (`str`, *optional*):
                 The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. If not provided, the default recommended model for ASR will be used.
-            extra_body (`Dict`, *optional*):
+            extra_body (`dict`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -416,7 +431,7 @@ def automatic_speech_recognition(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -442,105 +457,105 @@ def automatic_speech_recognition(
     @overload
     def chat_completion(  # type: ignore
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: Literal[False] = False,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> ChatCompletionOutput: ...
 
     @overload
     def chat_completion(  # type: ignore
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: Literal[True] = True,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Iterable[ChatCompletionStreamOutput]: ...
 
     @overload
     def chat_completion(
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: bool = False,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
 
     def chat_completion(
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: bool = False,
         # Parameters from ChatCompletionInput (handled manually)
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
         """
         A method for completing conversations using a specified language model.
@@ -566,7 +581,7 @@ def chat_completion(
             frequency_penalty (`float`, *optional*):
                 Penalizes new tokens based on their existing frequency
                 in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
-            logit_bias (`List[float]`, *optional*):
+            logit_bias (`list[float]`, *optional*):
                 Adjusts the likelihood of specific tokens appearing in the generated output.
             logprobs (`bool`, *optional*):
                 Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -582,7 +597,7 @@ def chat_completion(
                 Grammar constraints. Can be either a JSONSchema or a regex.
             seed (Optional[`int`], *optional*):
                 Seed for reproducible control flow. Defaults to None.
-            stop (`List[str]`, *optional*):
+            stop (`list[str]`, *optional*):
                 Up to four strings which trigger the end of the response.
                 Defaults to None.
             stream (`bool`, *optional*):
@@ -606,7 +621,7 @@ def chat_completion(
             tools (List of [`ChatCompletionInputTool`], *optional*):
                 A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
                 provide a list of functions the model may generate JSON inputs for.
-            extra_body (`Dict`, *optional*):
+            extra_body (`dict`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -618,7 +633,7 @@ def chat_completion(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -932,8 +947,8 @@ def document_question_answering(
         max_question_len: Optional[int] = None,
         max_seq_len: Optional[int] = None,
         top_k: Optional[int] = None,
-        word_boxes: Optional[List[Union[List[float], str]]] = None,
-    ) -> List[DocumentQuestionAnsweringOutputElement]:
+        word_boxes: Optional[list[Union[list[float], str]]] = None,
+    ) -> list[DocumentQuestionAnsweringOutputElement]:
         """
         Answer questions on document images.
 
@@ -963,16 +978,16 @@ def document_question_answering(
             top_k (`int`, *optional*):
                 The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
                 answers if there are not enough options available within the context.
-            word_boxes (`List[Union[List[float], str`, *optional*):
+            word_boxes (`list[Union[list[float], str`, *optional*):
                 A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
                 step and use the provided bounding boxes instead.
         Returns:
-            `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
+            `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
 
@@ -986,7 +1001,7 @@ def document_question_answering(
         """
         model_id = model or self.model
         provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
-        inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
+        inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
         request_parameters = provider_helper.prepare_request(
             inputs=inputs,
             parameters={
@@ -1047,7 +1062,7 @@ def feature_extraction(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1084,9 +1099,9 @@ def fill_mask(
         text: str,
         *,
         model: Optional[str] = None,
-        targets: Optional[List[str]] = None,
+        targets: Optional[list[str]] = None,
         top_k: Optional[int] = None,
-    ) -> List[FillMaskOutputElement]:
+    ) -> list[FillMaskOutputElement]:
         """
         Fill in a hole with a missing word (token to be precise).
 
@@ -1096,20 +1111,20 @@ def fill_mask(
             model (`str`, *optional*):
                 The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
                 a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
-            targets (`List[str`, *optional*):
+            targets (`list[str`, *optional*):
                 When passed, the model will limit the scores to the passed targets instead of looking up in the whole
                 vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
                 resulting token will be used (with a warning, and that might be slower).
             top_k (`int`, *optional*):
                 When passed, overrides the number of predictions to return.
         Returns:
-            `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
+            `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
             probability, token reference, and completed text.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1142,7 +1157,7 @@ def image_classification(
         model: Optional[str] = None,
         function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
         top_k: Optional[int] = None,
-    ) -> List[ImageClassificationOutputElement]:
+    ) -> list[ImageClassificationOutputElement]:
         """
         Perform image classification on the given image using the specified model.
 
@@ -1157,12 +1172,12 @@ def image_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
         Returns:
-            `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
+            `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1194,7 +1209,7 @@ def image_segmentation(
         overlap_mask_area_threshold: Optional[float] = None,
         subtask: Optional["ImageSegmentationSubtask"] = None,
         threshold: Optional[float] = None,
-    ) -> List[ImageSegmentationOutputElement]:
+    ) -> list[ImageSegmentationOutputElement]:
         """
         Perform image segmentation on the given image using the specified model.
 
@@ -1216,12 +1231,12 @@ def image_segmentation(
             threshold (`float`, *optional*):
                 Probability threshold to filter out predicted masks.
         Returns:
-            `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
+            `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1296,7 +1311,7 @@ def image_to_image(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1426,7 +1441,7 @@ def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> Imag
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1449,12 +1464,12 @@ def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> Imag
             api_key=self.token,
         )
         response = self._inner_post(request_parameters)
-        output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
+        output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
         return output_list[0]
 
     def object_detection(
         self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
-    ) -> List[ObjectDetectionOutputElement]:
+    ) -> list[ObjectDetectionOutputElement]:
         """
         Perform object detection on the given image using the specified model.
 
@@ -1470,12 +1485,12 @@ def object_detection(
             threshold (`float`, *optional*):
                 The probability necessary to make a prediction.
         Returns:
-            `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
+            `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
             `ValueError`:
                 If the request output is not a List.
@@ -1513,7 +1528,7 @@ def question_answering(
         max_question_len: Optional[int] = None,
         max_seq_len: Optional[int] = None,
         top_k: Optional[int] = None,
-    ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
+    ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
         """
         Retrieve the answer to a question from a given text.
 
@@ -1545,13 +1560,13 @@ def question_answering(
                 topk answers if there are not enough options available within the context.
 
         Returns:
-            Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
+            Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
                 When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
                 When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1585,15 +1600,15 @@ def question_answering(
         return output
 
     def sentence_similarity(
-        self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
-    ) -> List[float]:
+        self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
+    ) -> list[float]:
         """
         Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
 
         Args:
             sentence (`str`):
                 The main sentence to compare to others.
-            other_sentences (`List[str]`):
+            other_sentences (`list[str]`):
                 The list of sentences to compare to.
             model (`str`, *optional*):
                 The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1601,12 +1616,12 @@ def sentence_similarity(
                 Defaults to None.
 
         Returns:
-            `List[float]`: The similarity scores between the main sentence and the given comparison sentences.
+            `list[float]`: The embedding representing the input text.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1643,7 +1658,7 @@ def summarization(
         *,
         model: Optional[str] = None,
         clean_up_tokenization_spaces: Optional[bool] = None,
-        generate_parameters: Optional[Dict[str, Any]] = None,
+        generate_parameters: Optional[dict[str, Any]] = None,
         truncation: Optional["SummarizationTruncationStrategy"] = None,
     ) -> SummarizationOutput:
         """
@@ -1657,7 +1672,7 @@ def summarization(
                 Inference Endpoint. If not provided, the default recommended model for summarization will be used.
             clean_up_tokenization_spaces (`bool`, *optional*):
                 Whether to clean up the potential extra spaces in the text output.
-            generate_parameters (`Dict[str, Any]`, *optional*):
+            generate_parameters (`dict[str, Any]`, *optional*):
                 Additional parametrization of the text generation algorithm.
             truncation (`"SummarizationTruncationStrategy"`, *optional*):
                 The truncation strategy to use.
@@ -1667,7 +1682,7 @@ def summarization(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1697,7 +1712,7 @@ def summarization(
 
     def table_question_answering(
         self,
-        table: Dict[str, Any],
+        table: dict[str, Any],
         query: str,
         *,
         model: Optional[str] = None,
@@ -1732,7 +1747,7 @@ def table_question_answering(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1757,12 +1772,12 @@ def table_question_answering(
         response = self._inner_post(request_parameters)
         return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
 
-    def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
+    def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
         """
         Classifying a target category (a group) based on a set of attributes.
 
         Args:
-            table (`Dict[str, Any]`):
+            table (`dict[str, Any]`):
                 Set of attributes to classify.
             model (`str`, *optional*):
                 The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1775,7 +1790,7 @@ def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str]
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1812,12 +1827,12 @@ def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str]
         response = self._inner_post(request_parameters)
         return _bytes_to_list(response)
 
-    def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
+    def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
         """
         Predicting a numerical target value given a set of attributes/features in a table.
 
         Args:
-            table (`Dict[str, Any]`):
+            table (`dict[str, Any]`):
                 Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
             model (`str`, *optional*):
                 The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1830,7 +1845,7 @@ def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = No
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1869,7 +1884,7 @@ def text_classification(
         model: Optional[str] = None,
         top_k: Optional[int] = None,
         function_to_apply: Optional["TextClassificationOutputTransform"] = None,
-    ) -> List[TextClassificationOutputElement]:
+    ) -> list[TextClassificationOutputElement]:
         """
         Perform text classification (e.g. sentiment-analysis) on the given text.
 
@@ -1886,12 +1901,12 @@ def text_classification(
                 The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
-            `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
+            `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1939,8 +1954,8 @@ def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -1969,8 +1984,8 @@ def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -1999,8 +2014,8 @@ def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,  # Manual default value
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2029,8 +2044,8 @@ def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2059,8 +2074,8 @@ def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2088,8 +2103,8 @@ def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2142,9 +2157,9 @@ def text_generation(
                 Whether to prepend the prompt to the generated text
             seed (`int`, *optional*):
                 Random sampling seed
-            stop (`List[str]`, *optional*):
+            stop (`list[str]`, *optional*):
                 Stop generating tokens if a member of `stop` is generated.
-            stop_sequences (`List[str]`, *optional*):
+            stop_sequences (`list[str]`, *optional*):
                 Deprecated argument. Use `stop` instead.
             temperature (`float`, *optional*):
                 The value used to module the logits distribution.
@@ -2177,7 +2192,7 @@ def text_generation(
                 If input values are not valid. No HTTP call is made to the server.
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2366,7 +2381,7 @@ def text_generation(
         # Handle errors separately for more precise error messages
         try:
             bytes_output = self._inner_post(request_parameters, stream=stream or False)
-        except HTTPError as e:
+        except HfHubHTTPError as e:
             match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
             if isinstance(e, BadRequestError) and match:
                 unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
@@ -2421,7 +2436,7 @@ def text_to_image(
         model: Optional[str] = None,
         scheduler: Optional[str] = None,
         seed: Optional[int] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> "Image":
         """
         Generate an image based on a given text using a specified model.
@@ -2455,7 +2470,7 @@ def text_to_image(
                 Override the scheduler with a compatible one.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
 
@@ -2465,7 +2480,7 @@ def text_to_image(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2554,11 +2569,11 @@ def text_to_video(
         *,
         model: Optional[str] = None,
         guidance_scale: Optional[float] = None,
-        negative_prompt: Optional[List[str]] = None,
+        negative_prompt: Optional[list[str]] = None,
         num_frames: Optional[float] = None,
         num_inference_steps: Optional[int] = None,
         seed: Optional[int] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> bytes:
         """
         Generate a video based on a given text.
@@ -2576,7 +2591,7 @@ def text_to_video(
             guidance_scale (`float`, *optional*):
                 A higher guidance scale value encourages the model to generate videos closely linked to the text
                 prompt, but values too high may cause saturation and other artifacts.
-            negative_prompt (`List[str]`, *optional*):
+            negative_prompt (`list[str]`, *optional*):
                 One or several prompt to guide what NOT to include in video generation.
             num_frames (`float`, *optional*):
                 The num_frames parameter determines how many video frames are generated.
@@ -2585,7 +2600,7 @@ def text_to_video(
                 expense of slower inference.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
 
@@ -2665,7 +2680,7 @@ def text_to_speech(
         top_p: Optional[float] = None,
         typical_p: Optional[float] = None,
         use_cache: Optional[bool] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> bytes:
         """
         Synthesize an audio of a voice pronouncing a given text.
@@ -2726,7 +2741,7 @@ def text_to_speech(
                 paper](https://hf.co/papers/2202.00666) for more details.
             use_cache (`bool`, *optional*):
                 Whether the model should use the past last key/values attentions to speed up decoding
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -2735,7 +2750,7 @@ def text_to_speech(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2858,9 +2873,9 @@ def token_classification(
         *,
         model: Optional[str] = None,
         aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
-        ignore_labels: Optional[List[str]] = None,
+        ignore_labels: Optional[list[str]] = None,
         stride: Optional[int] = None,
-    ) -> List[TokenClassificationOutputElement]:
+    ) -> list[TokenClassificationOutputElement]:
         """
         Perform token classification on the given text.
         Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2874,18 +2889,18 @@ def token_classification(
                 Defaults to None.
             aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
                 The strategy used to fuse tokens based on model predictions
-            ignore_labels (`List[str`, *optional*):
+            ignore_labels (`list[str`, *optional*):
                 A list of labels to ignore
             stride (`int`, *optional*):
                 The number of overlapping tokens between chunks when splitting the input text.
 
         Returns:
-            `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
+            `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2936,7 +2951,7 @@ def translation(
         tgt_lang: Optional[str] = None,
         clean_up_tokenization_spaces: Optional[bool] = None,
         truncation: Optional["TranslationTruncationStrategy"] = None,
-        generate_parameters: Optional[Dict[str, Any]] = None,
+        generate_parameters: Optional[dict[str, Any]] = None,
     ) -> TranslationOutput:
         """
         Convert text from one language to another.
@@ -2961,7 +2976,7 @@ def translation(
                 Whether to clean up the potential extra spaces in the text output.
             truncation (`"TranslationTruncationStrategy"`, *optional*):
                 The truncation strategy to use.
-            generate_parameters (`Dict[str, Any]`, *optional*):
+            generate_parameters (`dict[str, Any]`, *optional*):
                 Additional parametrization of the text generation algorithm.
 
         Returns:
@@ -2970,7 +2985,7 @@ def translation(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
             `ValueError`:
                 If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3023,7 +3038,7 @@ def visual_question_answering(
         *,
         model: Optional[str] = None,
         top_k: Optional[int] = None,
-    ) -> List[VisualQuestionAnsweringOutputElement]:
+    ) -> list[VisualQuestionAnsweringOutputElement]:
         """
         Answering open-ended questions based on an image.
 
@@ -3040,12 +3055,12 @@ def visual_question_answering(
                 The number of answers to return (will be chosen by order of likelihood). Note that we return less than
                 topk answers if there are not enough options available within the context.
         Returns:
-            `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
+            `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
 
         Raises:
             `InferenceTimeoutError`:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -3078,21 +3093,21 @@ def visual_question_answering(
     def zero_shot_classification(
         self,
         text: str,
-        candidate_labels: List[str],
+        candidate_labels: list[str],
         *,
         multi_label: Optional[bool] = False,
         hypothesis_template: Optional[str] = None,
         model: Optional[str] = None,
-    ) -> List[ZeroShotClassificationOutputElement]:
+    ) -> list[ZeroShotClassificationOutputElement]:
         """
         Provide as input a text and a set of candidate labels to classify the input text.
 
         Args:
             text (`str`):
                 The input text to classify.
-            candidate_labels (`List[str]`):
+            candidate_labels (`list[str]`):
                 The set of possible class labels to classify the text into.
-            labels (`List[str]`, *optional*):
+            labels (`list[str]`, *optional*):
                 (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
             multi_label (`bool`, *optional*):
                 Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3107,12 +3122,12 @@ def zero_shot_classification(
 
 
         Returns:
-            `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example with `multi_label=False`:
@@ -3184,22 +3199,22 @@ def zero_shot_classification(
     def zero_shot_image_classification(
         self,
         image: ContentT,
-        candidate_labels: List[str],
+        candidate_labels: list[str],
         *,
         model: Optional[str] = None,
         hypothesis_template: Optional[str] = None,
         # deprecated argument
-        labels: List[str] = None,  # type: ignore
-    ) -> List[ZeroShotImageClassificationOutputElement]:
+        labels: list[str] = None,  # type: ignore
+    ) -> list[ZeroShotImageClassificationOutputElement]:
         """
         Provide input image and text labels to predict text labels for the image.
 
         Args:
             image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
                 The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
-            candidate_labels (`List[str]`):
+            candidate_labels (`list[str]`):
                 The candidate labels for this image
-            labels (`List[str]`, *optional*):
+            labels (`list[str]`, *optional*):
                 (deprecated) List of string possible labels. There must be at least 2 labels.
             model (`str`, *optional*):
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3209,12 +3224,12 @@ def zero_shot_image_classification(
                 replacing the placeholder with the candidate labels.
 
         Returns:
-            `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -3248,7 +3263,7 @@ def zero_shot_image_classification(
         response = self._inner_post(request_parameters)
         return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
 
-    def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
+    def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
         """
         Get information about the deployed endpoint.
 
@@ -3261,7 +3276,7 @@ def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
                 Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
 
         Returns:
-            `Dict[str, Any]`: Information about the endpoint.
+            `dict[str, Any]`: Information about the endpoint.
 
         Example:
         ```py
diff --git a/src/huggingface_hub/inference/_common.py b/src/huggingface_hub/inference/_common.py
index c7803d14ee..b79713a934 100644
--- a/src/huggingface_hub/inference/_common.py
+++ b/src/huggingface_hub/inference/_common.py
@@ -21,25 +21,13 @@
 import mimetypes
 from dataclasses import dataclass
 from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    AsyncIterable,
-    BinaryIO,
-    Dict,
-    Iterable,
-    List,
-    Literal,
-    NoReturn,
-    Optional,
-    Union,
-    overload,
-)
+from typing import TYPE_CHECKING, Any, AsyncIterable, BinaryIO, Iterable, Literal, NoReturn, Optional, Union, overload
 
-from requests import HTTPError
+import httpx
 
 from huggingface_hub.errors import (
     GenerationError,
+    HfHubHTTPError,
     IncompleteGenerationError,
     OverloadedError,
     TextGenerationError,
@@ -52,7 +40,6 @@
 
 
 if TYPE_CHECKING:
-    from aiohttp import ClientResponse, ClientSession
     from PIL.Image import Image
 
 # TYPES
@@ -71,9 +58,9 @@ class RequestParameters:
     url: str
     task: str
     model: Optional[str]
-    json: Optional[Union[str, Dict, List]]
+    json: Optional[Union[str, dict, list]]
     data: Optional[bytes]
-    headers: Dict[str, Any]
+    headers: dict[str, Any]
 
 
 class MimeBytes(bytes):
@@ -240,7 +227,7 @@ def _b64_to_image(encoded_image: str) -> "Image":
     return Image.open(io.BytesIO(base64.b64decode(encoded_image)))
 
 
-def _bytes_to_list(content: bytes) -> List:
+def _bytes_to_list(content: bytes) -> list:
     """Parse bytes from a Response object into a Python list.
 
     Expects the response body to be JSON-encoded data.
@@ -251,7 +238,7 @@ def _bytes_to_list(content: bytes) -> List:
     return json.loads(content.decode())
 
 
-def _bytes_to_dict(content: bytes) -> Dict:
+def _bytes_to_dict(content: bytes) -> dict:
     """Parse bytes from a Response object into a Python dictionary.
 
     Expects the response body to be JSON-encoded data.
@@ -271,7 +258,7 @@ def _bytes_to_image(content: bytes) -> "Image":
     return Image.open(io.BytesIO(content))
 
 
-def _as_dict(response: Union[bytes, Dict]) -> Dict:
+def _as_dict(response: Union[bytes, dict]) -> dict:
     return json.loads(response) if isinstance(response, bytes) else response
 
 
@@ -279,13 +266,13 @@ def _as_dict(response: Union[bytes, Dict]) -> Dict:
 
 
 def _stream_text_generation_response(
-    bytes_output_as_lines: Iterable[bytes], details: bool
+    output_lines: Iterable[str], details: bool
 ) -> Union[Iterable[str], Iterable[TextGenerationStreamOutput]]:
     """Used in `InferenceClient.text_generation`."""
     # Parse ServerSentEvents
-    for byte_payload in bytes_output_as_lines:
+    for line in output_lines:
         try:
-            output = _format_text_generation_stream_output(byte_payload, details)
+            output = _format_text_generation_stream_output(line, details)
         except StopIteration:
             break
         if output is not None:
@@ -293,13 +280,13 @@ def _stream_text_generation_response(
 
 
 async def _async_stream_text_generation_response(
-    bytes_output_as_lines: AsyncIterable[bytes], details: bool
+    output_lines: AsyncIterable[str], details: bool
 ) -> Union[AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]:
     """Used in `AsyncInferenceClient.text_generation`."""
     # Parse ServerSentEvents
-    async for byte_payload in bytes_output_as_lines:
+    async for line in output_lines:
         try:
-            output = _format_text_generation_stream_output(byte_payload, details)
+            output = _format_text_generation_stream_output(line, details)
         except StopIteration:
             break
         if output is not None:
@@ -307,17 +294,17 @@ async def _async_stream_text_generation_response(
 
 
 def _format_text_generation_stream_output(
-    byte_payload: bytes, details: bool
+    line: str, details: bool
 ) -> Optional[Union[str, TextGenerationStreamOutput]]:
-    if not byte_payload.startswith(b"data:"):
+    if not line.startswith("data:"):
         return None  # empty line
 
-    if byte_payload.strip() == b"data: [DONE]":
+    if line.strip() == "data: [DONE]":
         raise StopIteration("[DONE] signal received.")
 
     # Decode payload
-    payload = byte_payload.decode("utf-8")
-    json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
+    payload = line.lstrip("data:").rstrip("/n")
+    json_payload = json.loads(payload)
 
     # Either an error as being returned
     if json_payload.get("error") is not None:
@@ -329,12 +316,12 @@ def _format_text_generation_stream_output(
 
 
 def _stream_chat_completion_response(
-    bytes_lines: Iterable[bytes],
+    lines: Iterable[str],
 ) -> Iterable[ChatCompletionStreamOutput]:
     """Used in `InferenceClient.chat_completion` if model is served with TGI."""
-    for item in bytes_lines:
+    for line in lines:
         try:
-            output = _format_chat_completion_stream_output(item)
+            output = _format_chat_completion_stream_output(line)
         except StopIteration:
             break
         if output is not None:
@@ -342,12 +329,12 @@ def _stream_chat_completion_response(
 
 
 async def _async_stream_chat_completion_response(
-    bytes_lines: AsyncIterable[bytes],
+    lines: AsyncIterable[str],
 ) -> AsyncIterable[ChatCompletionStreamOutput]:
     """Used in `AsyncInferenceClient.chat_completion`."""
-    async for item in bytes_lines:
+    async for line in lines:
         try:
-            output = _format_chat_completion_stream_output(item)
+            output = _format_chat_completion_stream_output(line)
         except StopIteration:
             break
         if output is not None:
@@ -355,17 +342,16 @@ async def _async_stream_chat_completion_response(
 
 
 def _format_chat_completion_stream_output(
-    byte_payload: bytes,
+    line: str,
 ) -> Optional[ChatCompletionStreamOutput]:
-    if not byte_payload.startswith(b"data:"):
+    if not line.startswith("data:"):
         return None  # empty line
 
-    if byte_payload.strip() == b"data: [DONE]":
+    if line.strip() == "data: [DONE]":
         raise StopIteration("[DONE] signal received.")
 
     # Decode payload
-    payload = byte_payload.decode("utf-8")
-    json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
+    json_payload = json.loads(line.lstrip("data:").strip())
 
     # Either an error as being returned
     if json_payload.get("error") is not None:
@@ -375,13 +361,9 @@ def _format_chat_completion_stream_output(
     return ChatCompletionStreamOutput.parse_obj_as_instance(json_payload)
 
 
-async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
-    try:
-        async for byte_payload in response.content:
-            yield byte_payload.strip()
-    finally:
-        # Always close the underlying HTTP session to avoid resource leaks
-        await client.close()
+async def _async_yield_from(client: httpx.AsyncClient, response: httpx.Response) -> AsyncIterable[str]:
+    async for line in response.aiter_lines():
+        yield line.strip()
 
 
 # "TGI servers" are servers running with the `text-generation-inference` backend.
@@ -402,14 +384,14 @@ async def _async_yield_from(client: "ClientSession", response: "ClientResponse")
 # For more details, see https://github.com/huggingface/text-generation-inference and
 # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task.
 
-_UNSUPPORTED_TEXT_GENERATION_KWARGS: Dict[Optional[str], List[str]] = {}
+_UNSUPPORTED_TEXT_GENERATION_KWARGS: dict[Optional[str], list[str]] = {}
 
 
-def _set_unsupported_text_generation_kwargs(model: Optional[str], unsupported_kwargs: List[str]) -> None:
+def _set_unsupported_text_generation_kwargs(model: Optional[str], unsupported_kwargs: list[str]) -> None:
     _UNSUPPORTED_TEXT_GENERATION_KWARGS.setdefault(model, []).extend(unsupported_kwargs)
 
 
-def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
+def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> list[str]:
     return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
 
 
@@ -420,7 +402,7 @@ def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
 # ----------------------
 
 
-def raise_text_generation_error(http_error: HTTPError) -> NoReturn:
+def raise_text_generation_error(http_error: HfHubHTTPError) -> NoReturn:
     """
     Try to parse text-generation-inference error message and raise HTTPError in any case.
 
@@ -429,6 +411,8 @@ def raise_text_generation_error(http_error: HTTPError) -> NoReturn:
             The HTTPError that have been raised.
     """
     # Try to parse a Text Generation Inference error
+    if http_error.response is None:
+        raise http_error
 
     try:
         # Hacky way to retrieve payload in case of aiohttp error
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
index e5809617ec..8ce850d6c8 100644
--- a/src/huggingface_hub/inference/_generated/_async_client.py
+++ b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -21,12 +21,16 @@
 import asyncio
 import base64
 import logging
+import os
 import re
 import warnings
-from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
+from contextlib import AsyncExitStack
+from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
+
+import httpx
 
 from huggingface_hub import constants
-from huggingface_hub.errors import InferenceTimeoutError
+from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
 from huggingface_hub.inference._common import (
     TASKS_EXPECTING_IMAGES,
     ContentT,
@@ -86,15 +90,19 @@
     ZeroShotImageClassificationOutputElement,
 )
 from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
-from huggingface_hub.utils import build_hf_headers
+from huggingface_hub.utils import (
+    build_hf_headers,
+    get_async_session,
+    hf_raise_for_status,
+    validate_hf_hub_args,
+)
 from huggingface_hub.utils._auth import get_token
 
-from .._common import _async_yield_from, _import_aiohttp
+from .._common import _async_yield_from
 
 
 if TYPE_CHECKING:
     import numpy as np
-    from aiohttp import ClientResponse, ClientSession
     from PIL.Image import Image
 
 logger = logging.getLogger(__name__)
@@ -127,18 +135,14 @@ class AsyncInferenceClient:
             arguments are mutually exclusive and have the exact same behavior.
         timeout (`float`, `optional`):
             The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
-        headers (`Dict[str, str]`, `optional`):
+        headers (`dict[str, str]`, `optional`):
             Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
             Values in this dictionary will override the default values.
         bill_to (`str`, `optional`):
             The billing account to use for the requests. By default the requests are billed on the user's account.
             Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
-        cookies (`Dict[str, str]`, `optional`):
+        cookies (`dict[str, str]`, `optional`):
             Additional cookies to send to the server.
-        trust_env ('bool', 'optional'):
-            Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
-        proxies (`Any`, `optional`):
-            Proxies to use for the request.
         base_url (`str`, `optional`):
             Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
             follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -147,6 +151,7 @@ class AsyncInferenceClient:
             follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
     """
 
+    @validate_hf_hub_args
     def __init__(
         self,
         model: Optional[str] = None,
@@ -154,10 +159,8 @@ def __init__(
         provider: Optional[PROVIDER_OR_POLICY_T] = None,
         token: Optional[str] = None,
         timeout: Optional[float] = None,
-        headers: Optional[Dict[str, str]] = None,
-        cookies: Optional[Dict[str, str]] = None,
-        trust_env: bool = False,
-        proxies: Optional[Any] = None,
+        headers: Optional[dict[str, str]] = None,
+        cookies: Optional[dict[str, str]] = None,
         bill_to: Optional[str] = None,
         # OpenAI compatibility
         base_url: Optional[str] = None,
@@ -219,15 +222,36 @@ def __init__(
 
         self.cookies = cookies
         self.timeout = timeout
-        self.trust_env = trust_env
-        self.proxies = proxies
 
-        # Keep track of the sessions to close them properly
-        self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
+        self.exit_stack = AsyncExitStack()
+        self._async_client: Optional[httpx.AsyncClient] = None
 
     def __repr__(self):
         return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
 
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        await self.close()
+
+    async def close(self):
+        """Close the client.
+
+        This method is automatically called when using the client as a context manager.
+        """
+        await self.exit_stack.aclose()
+
+    async def _get_async_client(self):
+        """Get a unique async client for this AsyncInferenceClient instance.
+
+        Returns the same client instance on subsequent calls, ensuring proper
+        connection reuse and resource management through the exit stack.
+        """
+        if self._async_client is None:
+            self._async_client = await self.exit_stack.enter_async_context(get_async_session())
+        return self._async_client
+
     @overload
     async def _inner_post(  # type: ignore[misc]
         self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -236,83 +260,60 @@ async def _inner_post(  # type: ignore[misc]
     @overload
     async def _inner_post(  # type: ignore[misc]
         self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
-    ) -> AsyncIterable[bytes]: ...
+    ) -> AsyncIterable[str]: ...
 
     @overload
     async def _inner_post(
         self, request_parameters: RequestParameters, *, stream: bool = False
-    ) -> Union[bytes, AsyncIterable[bytes]]: ...
+    ) -> Union[bytes, AsyncIterable[str]]: ...
 
     async def _inner_post(
         self, request_parameters: RequestParameters, *, stream: bool = False
-    ) -> Union[bytes, AsyncIterable[bytes]]:
+    ) -> Union[bytes, AsyncIterable[str]]:
         """Make a request to the inference server."""
 
-        aiohttp = _import_aiohttp()
-
         # TODO: this should be handled in provider helpers directly
         if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
             request_parameters.headers["Accept"] = "image/png"
 
-        # Do not use context manager as we don't want to close the connection immediately when returning
-        # a stream
-        session = self._get_client_session(headers=request_parameters.headers)
-
         try:
-            response = await session.post(
-                request_parameters.url, json=request_parameters.json, data=request_parameters.data, proxy=self.proxies
-            )
-            response_error_payload = None
-            if response.status != 200:
-                try:
-                    response_error_payload = await response.json()  # get payload before connection closed
-                except Exception:
-                    pass
-            response.raise_for_status()
+            client = await self._get_async_client()
             if stream:
-                return _async_yield_from(session, response)
+                response = await self.exit_stack.enter_async_context(
+                    client.stream(
+                        "POST",
+                        request_parameters.url,
+                        json=request_parameters.json,
+                        data=request_parameters.data,
+                        headers=request_parameters.headers,
+                        cookies=self.cookies,
+                        timeout=self.timeout,
+                    )
+                )
+                hf_raise_for_status(response)
+                return _async_yield_from(client, response)
             else:
-                content = await response.read()
-                await session.close()
-                return content
+                response = await client.post(
+                    request_parameters.url,
+                    json=request_parameters.json,
+                    data=request_parameters.data,
+                    headers=request_parameters.headers,
+                    cookies=self.cookies,
+                    timeout=self.timeout,
+                )
+                hf_raise_for_status(response)
+                return response.content
         except asyncio.TimeoutError as error:
-            await session.close()
             # Convert any `TimeoutError` to a `InferenceTimeoutError`
             raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error  # type: ignore
-        except aiohttp.ClientResponseError as error:
-            error.response_error_payload = response_error_payload
-            await session.close()
-            raise error
-        except Exception:
-            await session.close()
+        except HfHubHTTPError as error:
+            if error.response.status_code == 422 and request_parameters.task != "unknown":
+                msg = str(error.args[0])
+                if len(error.response.text) > 0:
+                    msg += f"{os.linesep}{error.response.text}{os.linesep}"
+                error.args = (msg,) + error.args[1:]
             raise
 
-    async def __aenter__(self):
-        return self
-
-    async def __aexit__(self, exc_type, exc_value, traceback):
-        await self.close()
-
-    def __del__(self):
-        if len(self._sessions) > 0:
-            warnings.warn(
-                "Deleting 'AsyncInferenceClient' client but some sessions are still open. "
-                "This can happen if you've stopped streaming data from the server before the stream was complete. "
-                "To close the client properly, you must call `await client.close()` "
-                "or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
-            )
-
-    async def close(self):
-        """Close all open sessions.
-
-        By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
-        are streaming data from the server and you stop before the stream is complete, you must call this method to
-        close the session properly.
-
-        Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
-        """
-        await asyncio.gather(*[session.close() for session in self._sessions.keys()])
-
     async def audio_classification(
         self,
         audio: ContentT,
@@ -320,7 +321,7 @@ async def audio_classification(
         model: Optional[str] = None,
         top_k: Optional[int] = None,
         function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
-    ) -> List[AudioClassificationOutputElement]:
+    ) -> list[AudioClassificationOutputElement]:
         """
         Perform audio classification on the provided audio content.
 
@@ -338,12 +339,12 @@ async def audio_classification(
                 The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
-            `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -376,7 +377,7 @@ async def audio_to_audio(
         audio: ContentT,
         *,
         model: Optional[str] = None,
-    ) -> List[AudioToAudioOutputElement]:
+    ) -> list[AudioToAudioOutputElement]:
         """
         Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
 
@@ -390,12 +391,12 @@ async def audio_to_audio(
                 audio_to_audio will be used.
 
         Returns:
-            `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
+            `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
 
         Raises:
             `InferenceTimeoutError`:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -429,7 +430,7 @@ async def automatic_speech_recognition(
         audio: ContentT,
         *,
         model: Optional[str] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> AutomaticSpeechRecognitionOutput:
         """
         Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -440,7 +441,7 @@ async def automatic_speech_recognition(
             model (`str`, *optional*):
                 The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. If not provided, the default recommended model for ASR will be used.
-            extra_body (`Dict`, *optional*):
+            extra_body (`dict`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -449,7 +450,7 @@ async def automatic_speech_recognition(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -476,105 +477,105 @@ async def automatic_speech_recognition(
     @overload
     async def chat_completion(  # type: ignore
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: Literal[False] = False,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> ChatCompletionOutput: ...
 
     @overload
     async def chat_completion(  # type: ignore
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: Literal[True] = True,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
 
     @overload
     async def chat_completion(
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: bool = False,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
 
     async def chat_completion(
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: bool = False,
         # Parameters from ChatCompletionInput (handled manually)
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
         """
         A method for completing conversations using a specified language model.
@@ -600,7 +601,7 @@ async def chat_completion(
             frequency_penalty (`float`, *optional*):
                 Penalizes new tokens based on their existing frequency
                 in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
-            logit_bias (`List[float]`, *optional*):
+            logit_bias (`list[float]`, *optional*):
                 Adjusts the likelihood of specific tokens appearing in the generated output.
             logprobs (`bool`, *optional*):
                 Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -616,7 +617,7 @@ async def chat_completion(
                 Grammar constraints. Can be either a JSONSchema or a regex.
             seed (Optional[`int`], *optional*):
                 Seed for reproducible control flow. Defaults to None.
-            stop (`List[str]`, *optional*):
+            stop (`list[str]`, *optional*):
                 Up to four strings which trigger the end of the response.
                 Defaults to None.
             stream (`bool`, *optional*):
@@ -640,7 +641,7 @@ async def chat_completion(
             tools (List of [`ChatCompletionInputTool`], *optional*):
                 A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
                 provide a list of functions the model may generate JSON inputs for.
-            extra_body (`Dict`, *optional*):
+            extra_body (`dict`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -652,7 +653,7 @@ async def chat_completion(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -972,8 +973,8 @@ async def document_question_answering(
         max_question_len: Optional[int] = None,
         max_seq_len: Optional[int] = None,
         top_k: Optional[int] = None,
-        word_boxes: Optional[List[Union[List[float], str]]] = None,
-    ) -> List[DocumentQuestionAnsweringOutputElement]:
+        word_boxes: Optional[list[Union[list[float], str]]] = None,
+    ) -> list[DocumentQuestionAnsweringOutputElement]:
         """
         Answer questions on document images.
 
@@ -1003,16 +1004,16 @@ async def document_question_answering(
             top_k (`int`, *optional*):
                 The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
                 answers if there are not enough options available within the context.
-            word_boxes (`List[Union[List[float], str`, *optional*):
+            word_boxes (`list[Union[list[float], str`, *optional*):
                 A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
                 step and use the provided bounding boxes instead.
         Returns:
-            `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
+            `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
 
@@ -1027,7 +1028,7 @@ async def document_question_answering(
         """
         model_id = model or self.model
         provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
-        inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
+        inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
         request_parameters = provider_helper.prepare_request(
             inputs=inputs,
             parameters={
@@ -1088,7 +1089,7 @@ async def feature_extraction(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1126,9 +1127,9 @@ async def fill_mask(
         text: str,
         *,
         model: Optional[str] = None,
-        targets: Optional[List[str]] = None,
+        targets: Optional[list[str]] = None,
         top_k: Optional[int] = None,
-    ) -> List[FillMaskOutputElement]:
+    ) -> list[FillMaskOutputElement]:
         """
         Fill in a hole with a missing word (token to be precise).
 
@@ -1138,20 +1139,20 @@ async def fill_mask(
             model (`str`, *optional*):
                 The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
                 a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
-            targets (`List[str`, *optional*):
+            targets (`list[str`, *optional*):
                 When passed, the model will limit the scores to the passed targets instead of looking up in the whole
                 vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
                 resulting token will be used (with a warning, and that might be slower).
             top_k (`int`, *optional*):
                 When passed, overrides the number of predictions to return.
         Returns:
-            `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
+            `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
             probability, token reference, and completed text.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1185,7 +1186,7 @@ async def image_classification(
         model: Optional[str] = None,
         function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
         top_k: Optional[int] = None,
-    ) -> List[ImageClassificationOutputElement]:
+    ) -> list[ImageClassificationOutputElement]:
         """
         Perform image classification on the given image using the specified model.
 
@@ -1200,12 +1201,12 @@ async def image_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
         Returns:
-            `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
+            `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1238,7 +1239,7 @@ async def image_segmentation(
         overlap_mask_area_threshold: Optional[float] = None,
         subtask: Optional["ImageSegmentationSubtask"] = None,
         threshold: Optional[float] = None,
-    ) -> List[ImageSegmentationOutputElement]:
+    ) -> list[ImageSegmentationOutputElement]:
         """
         Perform image segmentation on the given image using the specified model.
 
@@ -1260,12 +1261,12 @@ async def image_segmentation(
             threshold (`float`, *optional*):
                 Probability threshold to filter out predicted masks.
         Returns:
-            `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
+            `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1341,7 +1342,7 @@ async def image_to_image(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1473,7 +1474,7 @@ async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1497,12 +1498,12 @@ async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -
             api_key=self.token,
         )
         response = await self._inner_post(request_parameters)
-        output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
+        output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
         return output_list[0]
 
     async def object_detection(
         self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
-    ) -> List[ObjectDetectionOutputElement]:
+    ) -> list[ObjectDetectionOutputElement]:
         """
         Perform object detection on the given image using the specified model.
 
@@ -1518,12 +1519,12 @@ async def object_detection(
             threshold (`float`, *optional*):
                 The probability necessary to make a prediction.
         Returns:
-            `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
+            `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
             `ValueError`:
                 If the request output is not a List.
@@ -1562,7 +1563,7 @@ async def question_answering(
         max_question_len: Optional[int] = None,
         max_seq_len: Optional[int] = None,
         top_k: Optional[int] = None,
-    ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
+    ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
         """
         Retrieve the answer to a question from a given text.
 
@@ -1594,13 +1595,13 @@ async def question_answering(
                 topk answers if there are not enough options available within the context.
 
         Returns:
-            Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
+            Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
                 When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
                 When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1635,15 +1636,15 @@ async def question_answering(
         return output
 
     async def sentence_similarity(
-        self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
-    ) -> List[float]:
+        self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
+    ) -> list[float]:
         """
         Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
 
         Args:
             sentence (`str`):
                 The main sentence to compare to others.
-            other_sentences (`List[str]`):
+            other_sentences (`list[str]`):
                 The list of sentences to compare to.
             model (`str`, *optional*):
                 The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1651,12 +1652,12 @@ async def sentence_similarity(
                 Defaults to None.
 
         Returns:
-            `List[float]`: The similarity scores between the main sentence and the given comparison sentences.
+            `list[float]`: The embedding representing the input text.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1694,7 +1695,7 @@ async def summarization(
         *,
         model: Optional[str] = None,
         clean_up_tokenization_spaces: Optional[bool] = None,
-        generate_parameters: Optional[Dict[str, Any]] = None,
+        generate_parameters: Optional[dict[str, Any]] = None,
         truncation: Optional["SummarizationTruncationStrategy"] = None,
     ) -> SummarizationOutput:
         """
@@ -1708,7 +1709,7 @@ async def summarization(
                 Inference Endpoint. If not provided, the default recommended model for summarization will be used.
             clean_up_tokenization_spaces (`bool`, *optional*):
                 Whether to clean up the potential extra spaces in the text output.
-            generate_parameters (`Dict[str, Any]`, *optional*):
+            generate_parameters (`dict[str, Any]`, *optional*):
                 Additional parametrization of the text generation algorithm.
             truncation (`"SummarizationTruncationStrategy"`, *optional*):
                 The truncation strategy to use.
@@ -1718,7 +1719,7 @@ async def summarization(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1749,7 +1750,7 @@ async def summarization(
 
     async def table_question_answering(
         self,
-        table: Dict[str, Any],
+        table: dict[str, Any],
         query: str,
         *,
         model: Optional[str] = None,
@@ -1784,7 +1785,7 @@ async def table_question_answering(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1810,12 +1811,12 @@ async def table_question_answering(
         response = await self._inner_post(request_parameters)
         return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
 
-    async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
+    async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
         """
         Classifying a target category (a group) based on a set of attributes.
 
         Args:
-            table (`Dict[str, Any]`):
+            table (`dict[str, Any]`):
                 Set of attributes to classify.
             model (`str`, *optional*):
                 The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1828,7 +1829,7 @@ async def tabular_classification(self, table: Dict[str, Any], *, model: Optional
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1866,12 +1867,12 @@ async def tabular_classification(self, table: Dict[str, Any], *, model: Optional
         response = await self._inner_post(request_parameters)
         return _bytes_to_list(response)
 
-    async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
+    async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
         """
         Predicting a numerical target value given a set of attributes/features in a table.
 
         Args:
-            table (`Dict[str, Any]`):
+            table (`dict[str, Any]`):
                 Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
             model (`str`, *optional*):
                 The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1884,7 +1885,7 @@ async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1924,7 +1925,7 @@ async def text_classification(
         model: Optional[str] = None,
         top_k: Optional[int] = None,
         function_to_apply: Optional["TextClassificationOutputTransform"] = None,
-    ) -> List[TextClassificationOutputElement]:
+    ) -> list[TextClassificationOutputElement]:
         """
         Perform text classification (e.g. sentiment-analysis) on the given text.
 
@@ -1941,12 +1942,12 @@ async def text_classification(
                 The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
-            `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
+            `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -1995,8 +1996,8 @@ async def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2025,8 +2026,8 @@ async def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2055,8 +2056,8 @@ async def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,  # Manual default value
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2085,8 +2086,8 @@ async def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2115,8 +2116,8 @@ async def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2144,8 +2145,8 @@ async def text_generation(
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2198,9 +2199,9 @@ async def text_generation(
                 Whether to prepend the prompt to the generated text
             seed (`int`, *optional*):
                 Random sampling seed
-            stop (`List[str]`, *optional*):
+            stop (`list[str]`, *optional*):
                 Stop generating tokens if a member of `stop` is generated.
-            stop_sequences (`List[str]`, *optional*):
+            stop_sequences (`list[str]`, *optional*):
                 Deprecated argument. Use `stop` instead.
             temperature (`float`, *optional*):
                 The value used to module the logits distribution.
@@ -2221,10 +2222,10 @@ async def text_generation(
                 Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
 
         Returns:
-            `Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]`:
+            `Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
             Generated text returned from the server:
             - if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
-            - if `stream=True` and `details=False`, the generated text is returned token by token as a `Iterable[str]`
+            - if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
             - if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
             - if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
 
@@ -2233,7 +2234,7 @@ async def text_generation(
                 If input values are not valid. No HTTP call is made to the server.
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2423,9 +2424,9 @@ async def text_generation(
         # Handle errors separately for more precise error messages
         try:
             bytes_output = await self._inner_post(request_parameters, stream=stream or False)
-        except _import_aiohttp().ClientResponseError as e:
-            match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
-            if e.status == 400 and match:
+        except HfHubHTTPError as e:
+            match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
+            if isinstance(e, BadRequestError) and match:
                 unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
                 _set_unsupported_text_generation_kwargs(model, unused_params)
                 return await self.text_generation(  # type: ignore
@@ -2478,7 +2479,7 @@ async def text_to_image(
         model: Optional[str] = None,
         scheduler: Optional[str] = None,
         seed: Optional[int] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> "Image":
         """
         Generate an image based on a given text using a specified model.
@@ -2512,7 +2513,7 @@ async def text_to_image(
                 Override the scheduler with a compatible one.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
 
@@ -2522,7 +2523,7 @@ async def text_to_image(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2612,11 +2613,11 @@ async def text_to_video(
         *,
         model: Optional[str] = None,
         guidance_scale: Optional[float] = None,
-        negative_prompt: Optional[List[str]] = None,
+        negative_prompt: Optional[list[str]] = None,
         num_frames: Optional[float] = None,
         num_inference_steps: Optional[int] = None,
         seed: Optional[int] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> bytes:
         """
         Generate a video based on a given text.
@@ -2634,7 +2635,7 @@ async def text_to_video(
             guidance_scale (`float`, *optional*):
                 A higher guidance scale value encourages the model to generate videos closely linked to the text
                 prompt, but values too high may cause saturation and other artifacts.
-            negative_prompt (`List[str]`, *optional*):
+            negative_prompt (`list[str]`, *optional*):
                 One or several prompt to guide what NOT to include in video generation.
             num_frames (`float`, *optional*):
                 The num_frames parameter determines how many video frames are generated.
@@ -2643,7 +2644,7 @@ async def text_to_video(
                 expense of slower inference.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
 
@@ -2723,7 +2724,7 @@ async def text_to_speech(
         top_p: Optional[float] = None,
         typical_p: Optional[float] = None,
         use_cache: Optional[bool] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> bytes:
         """
         Synthesize an audio of a voice pronouncing a given text.
@@ -2784,7 +2785,7 @@ async def text_to_speech(
                 paper](https://hf.co/papers/2202.00666) for more details.
             use_cache (`bool`, *optional*):
                 Whether the model should use the past last key/values attentions to speed up decoding
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -2793,7 +2794,7 @@ async def text_to_speech(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2917,9 +2918,9 @@ async def token_classification(
         *,
         model: Optional[str] = None,
         aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
-        ignore_labels: Optional[List[str]] = None,
+        ignore_labels: Optional[list[str]] = None,
         stride: Optional[int] = None,
-    ) -> List[TokenClassificationOutputElement]:
+    ) -> list[TokenClassificationOutputElement]:
         """
         Perform token classification on the given text.
         Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2933,18 +2934,18 @@ async def token_classification(
                 Defaults to None.
             aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
                 The strategy used to fuse tokens based on model predictions
-            ignore_labels (`List[str`, *optional*):
+            ignore_labels (`list[str`, *optional*):
                 A list of labels to ignore
             stride (`int`, *optional*):
                 The number of overlapping tokens between chunks when splitting the input text.
 
         Returns:
-            `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
+            `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -2996,7 +2997,7 @@ async def translation(
         tgt_lang: Optional[str] = None,
         clean_up_tokenization_spaces: Optional[bool] = None,
         truncation: Optional["TranslationTruncationStrategy"] = None,
-        generate_parameters: Optional[Dict[str, Any]] = None,
+        generate_parameters: Optional[dict[str, Any]] = None,
     ) -> TranslationOutput:
         """
         Convert text from one language to another.
@@ -3021,7 +3022,7 @@ async def translation(
                 Whether to clean up the potential extra spaces in the text output.
             truncation (`"TranslationTruncationStrategy"`, *optional*):
                 The truncation strategy to use.
-            generate_parameters (`Dict[str, Any]`, *optional*):
+            generate_parameters (`dict[str, Any]`, *optional*):
                 Additional parametrization of the text generation algorithm.
 
         Returns:
@@ -3030,7 +3031,7 @@ async def translation(
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
             `ValueError`:
                 If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3084,7 +3085,7 @@ async def visual_question_answering(
         *,
         model: Optional[str] = None,
         top_k: Optional[int] = None,
-    ) -> List[VisualQuestionAnsweringOutputElement]:
+    ) -> list[VisualQuestionAnsweringOutputElement]:
         """
         Answering open-ended questions based on an image.
 
@@ -3101,12 +3102,12 @@ async def visual_question_answering(
                 The number of answers to return (will be chosen by order of likelihood). Note that we return less than
                 topk answers if there are not enough options available within the context.
         Returns:
-            `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
+            `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
 
         Raises:
             `InferenceTimeoutError`:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -3140,21 +3141,21 @@ async def visual_question_answering(
     async def zero_shot_classification(
         self,
         text: str,
-        candidate_labels: List[str],
+        candidate_labels: list[str],
         *,
         multi_label: Optional[bool] = False,
         hypothesis_template: Optional[str] = None,
         model: Optional[str] = None,
-    ) -> List[ZeroShotClassificationOutputElement]:
+    ) -> list[ZeroShotClassificationOutputElement]:
         """
         Provide as input a text and a set of candidate labels to classify the input text.
 
         Args:
             text (`str`):
                 The input text to classify.
-            candidate_labels (`List[str]`):
+            candidate_labels (`list[str]`):
                 The set of possible class labels to classify the text into.
-            labels (`List[str]`, *optional*):
+            labels (`list[str]`, *optional*):
                 (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
             multi_label (`bool`, *optional*):
                 Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3169,12 +3170,12 @@ async def zero_shot_classification(
 
 
         Returns:
-            `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example with `multi_label=False`:
@@ -3248,22 +3249,22 @@ async def zero_shot_classification(
     async def zero_shot_image_classification(
         self,
         image: ContentT,
-        candidate_labels: List[str],
+        candidate_labels: list[str],
         *,
         model: Optional[str] = None,
         hypothesis_template: Optional[str] = None,
         # deprecated argument
-        labels: List[str] = None,  # type: ignore
-    ) -> List[ZeroShotImageClassificationOutputElement]:
+        labels: list[str] = None,  # type: ignore
+    ) -> list[ZeroShotImageClassificationOutputElement]:
         """
         Provide input image and text labels to predict text labels for the image.
 
         Args:
             image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
                 The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
-            candidate_labels (`List[str]`):
+            candidate_labels (`list[str]`):
                 The candidate labels for this image
-            labels (`List[str]`, *optional*):
+            labels (`list[str]`, *optional*):
                 (deprecated) List of string possible labels. There must be at least 2 labels.
             model (`str`, *optional*):
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3273,12 +3274,12 @@ async def zero_shot_image_classification(
                 replacing the placeholder with the candidate labels.
 
         Returns:
-            `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
 
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `aiohttp.ClientResponseError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
 
         Example:
@@ -3313,48 +3314,7 @@ async def zero_shot_image_classification(
         response = await self._inner_post(request_parameters)
         return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
 
-    def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
-        aiohttp = _import_aiohttp()
-        client_headers = self.headers.copy()
-        if headers is not None:
-            client_headers.update(headers)
-
-        # Return a new aiohttp ClientSession with correct settings.
-        session = aiohttp.ClientSession(
-            headers=client_headers,
-            cookies=self.cookies,
-            timeout=aiohttp.ClientTimeout(self.timeout),
-            trust_env=self.trust_env,
-        )
-
-        # Keep track of sessions to close them later
-        self._sessions[session] = set()
-
-        # Override the `._request` method to register responses to be closed
-        session._wrapped_request = session._request
-
-        async def _request(method, url, **kwargs):
-            response = await session._wrapped_request(method, url, **kwargs)
-            self._sessions[session].add(response)
-            return response
-
-        session._request = _request
-
-        # Override the 'close' method to
-        # 1. close ongoing responses
-        # 2. deregister the session when closed
-        session._close = session.close
-
-        async def close_session():
-            for response in self._sessions[session]:
-                response.close()
-            await session._close()
-            self._sessions.pop(session, None)
-
-        session.close = close_session
-        return session
-
-    async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
+    async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
         """
         Get information about the deployed endpoint.
 
@@ -3367,7 +3327,7 @@ async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, A
                 Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
 
         Returns:
-            `Dict[str, Any]`: Information about the endpoint.
+            `dict[str, Any]`: Information about the endpoint.
 
         Example:
         ```py
@@ -3409,10 +3369,10 @@ async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, A
         else:
             url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
 
-        async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
-            response = await client.get(url, proxy=self.proxies)
-            response.raise_for_status()
-            return await response.json()
+        client = await self._get_async_client()
+        response = await client.get(url, headers=build_hf_headers(token=self.token))
+        hf_raise_for_status(response)
+        return response.json()
 
     async def health_check(self, model: Optional[str] = None) -> bool:
         """
@@ -3446,9 +3406,9 @@ async def health_check(self, model: Optional[str] = None) -> bool:
             raise ValueError("Model must be an Inference Endpoint URL.")
         url = model.rstrip("/") + "/health"
 
-        async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
-            response = await client.get(url, proxy=self.proxies)
-            return response.status == 200
+        client = await self._get_async_client()
+        response = await client.get(url, headers=build_hf_headers(token=self.token))
+        return response.status_code == 200
 
     @property
     def chat(self) -> "ProxyClientChat":
diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
index f6bfd28256..2e6afc4411 100644
--- a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
+++ b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import List, Literal, Optional, Union
+from typing import Literal, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -97,7 +97,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType):
 class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType):
     text: str
     """A chunk of text identified by the model"""
-    timestamp: List[float]
+    timestamp: list[float]
     """The start and end timestamps corresponding with the text"""
 
 
@@ -107,7 +107,7 @@ class AutomaticSpeechRecognitionOutput(BaseInferenceType):
 
     text: str
     """The recognized text."""
-    chunks: Optional[List[AutomaticSpeechRecognitionOutputChunk]] = None
+    chunks: Optional[list[AutomaticSpeechRecognitionOutputChunk]] = None
     """When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
     the model.
     """
diff --git a/src/huggingface_hub/inference/_generated/types/base.py b/src/huggingface_hub/inference/_generated/types/base.py
index 1f0c4687ce..2c6df61c0e 100644
--- a/src/huggingface_hub/inference/_generated/types/base.py
+++ b/src/huggingface_hub/inference/_generated/types/base.py
@@ -15,8 +15,9 @@
 
 import inspect
 import json
+import types
 from dataclasses import asdict, dataclass
-from typing import Any, Dict, List, Type, TypeVar, Union, get_args
+from typing import Any, TypeVar, Union, get_args
 
 
 T = TypeVar("T", bound="BaseInferenceType")
@@ -28,7 +29,7 @@ def _repr_with_extra(self):
     return f"{self.__class__.__name__}({', '.join(f'{k}={self.__dict__[k]!r}' for k in fields + other_fields)})"
 
 
-def dataclass_with_extra(cls: Type[T]) -> Type[T]:
+def dataclass_with_extra(cls: type[T]) -> type[T]:
     """Decorator to add a custom __repr__ method to a dataclass, showing all fields, including extra ones.
 
     This decorator only works with dataclasses that inherit from `BaseInferenceType`.
@@ -49,7 +50,7 @@ class BaseInferenceType(dict):
     """
 
     @classmethod
-    def parse_obj_as_list(cls: Type[T], data: Union[bytes, str, List, Dict]) -> List[T]:
+    def parse_obj_as_list(cls: type[T], data: Union[bytes, str, list, dict]) -> list[T]:
         """Alias to parse server response and return a single instance.
 
         See `parse_obj` for more details.
@@ -60,7 +61,7 @@ def parse_obj_as_list(cls: Type[T], data: Union[bytes, str, List, Dict]) -> List
         return output
 
     @classmethod
-    def parse_obj_as_instance(cls: Type[T], data: Union[bytes, str, List, Dict]) -> T:
+    def parse_obj_as_instance(cls: type[T], data: Union[bytes, str, list, dict]) -> T:
         """Alias to parse server response and return a single instance.
 
         See `parse_obj` for more details.
@@ -71,7 +72,7 @@ def parse_obj_as_instance(cls: Type[T], data: Union[bytes, str, List, Dict]) ->
         return output
 
     @classmethod
-    def parse_obj(cls: Type[T], data: Union[bytes, str, List, Dict]) -> Union[List[T], T]:
+    def parse_obj(cls: type[T], data: Union[bytes, str, list, dict]) -> Union[list[T], T]:
         """Parse server response as a dataclass or list of dataclasses.
 
         To enable future-compatibility, we want to handle cases where the server return more fields than expected.
@@ -85,7 +86,7 @@ def parse_obj(cls: Type[T], data: Union[bytes, str, List, Dict]) -> Union[List[T
             data = json.loads(data)
 
         # If a list, parse each item individually
-        if isinstance(data, List):
+        if isinstance(data, list):
             return [cls.parse_obj(d) for d in data]  # type: ignore [misc]
 
         # At this point, we expect a dict
@@ -109,7 +110,9 @@ def parse_obj(cls: Type[T], data: Union[bytes, str, List, Dict]) -> Union[List[T
                     else:
                         expected_types = get_args(field_type)
                         for expected_type in expected_types:
-                            if getattr(expected_type, "_name", None) == "List":
+                            if (
+                                isinstance(expected_type, types.GenericAlias) and expected_type.__origin__ is list
+                            ) or getattr(expected_type, "_name", None) == "List":
                                 expected_type = get_args(expected_type)[
                                     0
                                 ]  # assume same type for all items in the list
diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py
index ba708a7009..db814b01ae 100644
--- a/src/huggingface_hub/inference/_generated/types/chat_completion.py
+++ b/src/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Literal, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -40,9 +40,9 @@ class ChatCompletionInputToolCall(BaseInferenceType):
 @dataclass_with_extra
 class ChatCompletionInputMessage(BaseInferenceType):
     role: str
-    content: Optional[Union[List[ChatCompletionInputMessageChunk], str]] = None
+    content: Optional[Union[list[ChatCompletionInputMessageChunk], str]] = None
     name: Optional[str] = None
-    tool_calls: Optional[List[ChatCompletionInputToolCall]] = None
+    tool_calls: Optional[list[ChatCompletionInputToolCall]] = None
 
 
 @dataclass_with_extra
@@ -56,7 +56,7 @@ class ChatCompletionInputJSONSchema(BaseInferenceType):
     A description of what the response format is for, used by the model to determine
     how to respond in the format.
     """
-    schema: Optional[Dict[str, object]] = None
+    schema: Optional[dict[str, object]] = None
     """
     The schema for the response format, described as a JSON Schema object. Learn how
     to build JSON schemas [here](https://json-schema.org/).
@@ -129,14 +129,14 @@ class ChatCompletionInput(BaseInferenceType):
     https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
     """
 
-    messages: List[ChatCompletionInputMessage]
+    messages: list[ChatCompletionInputMessage]
     """A list of messages comprising the conversation so far."""
     frequency_penalty: Optional[float] = None
     """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
     frequency in the text so far,
     decreasing the model's likelihood to repeat the same line verbatim.
     """
-    logit_bias: Optional[List[float]] = None
+    logit_bias: Optional[list[float]] = None
     """UNUSED
     Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
     object that maps tokens
@@ -172,7 +172,7 @@ class ChatCompletionInput(BaseInferenceType):
     """
     response_format: Optional[ChatCompletionInputGrammarType] = None
     seed: Optional[int] = None
-    stop: Optional[List[str]] = None
+    stop: Optional[list[str]] = None
     """Up to 4 sequences where the API will stop generating further tokens."""
     stream: Optional[bool] = None
     stream_options: Optional[ChatCompletionInputStreamOptions] = None
@@ -185,7 +185,7 @@ class ChatCompletionInput(BaseInferenceType):
     tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None
     tool_prompt: Optional[str] = None
     """A prompt to be appended before the tools"""
-    tools: Optional[List[ChatCompletionInputTool]] = None
+    tools: Optional[list[ChatCompletionInputTool]] = None
     """A list of tools the model may call. Currently, only functions are supported as a tool.
     Use this to provide a list of
     functions the model may generate JSON inputs for.
@@ -213,12 +213,12 @@ class ChatCompletionOutputTopLogprob(BaseInferenceType):
 class ChatCompletionOutputLogprob(BaseInferenceType):
     logprob: float
     token: str
-    top_logprobs: List[ChatCompletionOutputTopLogprob]
+    top_logprobs: list[ChatCompletionOutputTopLogprob]
 
 
 @dataclass_with_extra
 class ChatCompletionOutputLogprobs(BaseInferenceType):
-    content: List[ChatCompletionOutputLogprob]
+    content: list[ChatCompletionOutputLogprob]
 
 
 @dataclass_with_extra
@@ -241,7 +241,7 @@ class ChatCompletionOutputMessage(BaseInferenceType):
     content: Optional[str] = None
     reasoning: Optional[str] = None
     tool_call_id: Optional[str] = None
-    tool_calls: Optional[List[ChatCompletionOutputToolCall]] = None
+    tool_calls: Optional[list[ChatCompletionOutputToolCall]] = None
 
 
 @dataclass_with_extra
@@ -267,7 +267,7 @@ class ChatCompletionOutput(BaseInferenceType):
     https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
     """
 
-    choices: List[ChatCompletionOutputComplete]
+    choices: list[ChatCompletionOutputComplete]
     created: int
     id: str
     model: str
@@ -295,7 +295,7 @@ class ChatCompletionStreamOutputDelta(BaseInferenceType):
     content: Optional[str] = None
     reasoning: Optional[str] = None
     tool_call_id: Optional[str] = None
-    tool_calls: Optional[List[ChatCompletionStreamOutputDeltaToolCall]] = None
+    tool_calls: Optional[list[ChatCompletionStreamOutputDeltaToolCall]] = None
 
 
 @dataclass_with_extra
@@ -308,12 +308,12 @@ class ChatCompletionStreamOutputTopLogprob(BaseInferenceType):
 class ChatCompletionStreamOutputLogprob(BaseInferenceType):
     logprob: float
     token: str
-    top_logprobs: List[ChatCompletionStreamOutputTopLogprob]
+    top_logprobs: list[ChatCompletionStreamOutputTopLogprob]
 
 
 @dataclass_with_extra
 class ChatCompletionStreamOutputLogprobs(BaseInferenceType):
-    content: List[ChatCompletionStreamOutputLogprob]
+    content: list[ChatCompletionStreamOutputLogprob]
 
 
 @dataclass_with_extra
@@ -339,7 +339,7 @@ class ChatCompletionStreamOutput(BaseInferenceType):
     https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
     """
 
-    choices: List[ChatCompletionStreamOutputChoice]
+    choices: list[ChatCompletionStreamOutputChoice]
     created: int
     id: str
     model: str
diff --git a/src/huggingface_hub/inference/_generated/types/depth_estimation.py b/src/huggingface_hub/inference/_generated/types/depth_estimation.py
index 1e09bdffa1..765c3635f9 100644
--- a/src/huggingface_hub/inference/_generated/types/depth_estimation.py
+++ b/src/huggingface_hub/inference/_generated/types/depth_estimation.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -14,7 +14,7 @@ class DepthEstimationInput(BaseInferenceType):
 
     inputs: Any
     """The input image data"""
-    parameters: Optional[Dict[str, Any]] = None
+    parameters: Optional[dict[str, Any]] = None
     """Additional inference parameters for Depth Estimation"""
 
 
diff --git a/src/huggingface_hub/inference/_generated/types/document_question_answering.py b/src/huggingface_hub/inference/_generated/types/document_question_answering.py
index 2457d2c8c2..e3886041d6 100644
--- a/src/huggingface_hub/inference/_generated/types/document_question_answering.py
+++ b/src/huggingface_hub/inference/_generated/types/document_question_answering.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, List, Optional, Union
+from typing import Any, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -46,7 +46,7 @@ class DocumentQuestionAnsweringParameters(BaseInferenceType):
     """The number of answers to return (will be chosen by order of likelihood). Can return less
     than top_k answers if there are not enough options available within the context.
     """
-    word_boxes: Optional[List[Union[List[float], str]]] = None
+    word_boxes: Optional[list[Union[list[float], str]]] = None
     """A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
     skip the OCR step and use the provided bounding boxes instead.
     """
diff --git a/src/huggingface_hub/inference/_generated/types/feature_extraction.py b/src/huggingface_hub/inference/_generated/types/feature_extraction.py
index e965ddbac2..a6b9aa1937 100644
--- a/src/huggingface_hub/inference/_generated/types/feature_extraction.py
+++ b/src/huggingface_hub/inference/_generated/types/feature_extraction.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import List, Literal, Optional, Union
+from typing import Literal, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -19,7 +19,7 @@ class FeatureExtractionInput(BaseInferenceType):
     https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.
     """
 
-    inputs: Union[List[str], str]
+    inputs: Union[list[str], str]
     """The text or list of texts to embed."""
     normalize: Optional[bool] = None
     prompt_name: Optional[str] = None
diff --git a/src/huggingface_hub/inference/_generated/types/fill_mask.py b/src/huggingface_hub/inference/_generated/types/fill_mask.py
index dfcdc56bc5..848421dc13 100644
--- a/src/huggingface_hub/inference/_generated/types/fill_mask.py
+++ b/src/huggingface_hub/inference/_generated/types/fill_mask.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, List, Optional
+from typing import Any, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -12,7 +12,7 @@
 class FillMaskParameters(BaseInferenceType):
     """Additional inference parameters for Fill Mask"""
 
-    targets: Optional[List[str]] = None
+    targets: Optional[list[str]] = None
     """When passed, the model will limit the scores to the passed targets instead of looking up
     in the whole vocabulary. If the provided targets are not in the model vocab, they will be
     tokenized and the first resulting token will be used (with a warning, and that might be
diff --git a/src/huggingface_hub/inference/_generated/types/sentence_similarity.py b/src/huggingface_hub/inference/_generated/types/sentence_similarity.py
index 66e8bb4d93..4dd42c0bd8 100644
--- a/src/huggingface_hub/inference/_generated/types/sentence_similarity.py
+++ b/src/huggingface_hub/inference/_generated/types/sentence_similarity.py
@@ -3,14 +3,14 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
 
 @dataclass_with_extra
 class SentenceSimilarityInputData(BaseInferenceType):
-    sentences: List[str]
+    sentences: list[str]
     """A list of strings which will be compared against the source_sentence."""
     source_sentence: str
     """The string that you wish to compare the other strings with. This can be a phrase,
@@ -23,5 +23,5 @@ class SentenceSimilarityInput(BaseInferenceType):
     """Inputs for Sentence similarity inference"""
 
     inputs: SentenceSimilarityInputData
-    parameters: Optional[Dict[str, Any]] = None
+    parameters: Optional[dict[str, Any]] = None
     """Additional inference parameters for Sentence Similarity"""
diff --git a/src/huggingface_hub/inference/_generated/types/summarization.py b/src/huggingface_hub/inference/_generated/types/summarization.py
index 33eae6fcba..0103853aa6 100644
--- a/src/huggingface_hub/inference/_generated/types/summarization.py
+++ b/src/huggingface_hub/inference/_generated/types/summarization.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, Dict, Literal, Optional
+from typing import Any, Literal, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -17,7 +17,7 @@ class SummarizationParameters(BaseInferenceType):
 
     clean_up_tokenization_spaces: Optional[bool] = None
     """Whether to clean up the potential extra spaces in the text output."""
-    generate_parameters: Optional[Dict[str, Any]] = None
+    generate_parameters: Optional[dict[str, Any]] = None
     """Additional parametrization of the text generation algorithm."""
     truncation: Optional["SummarizationTruncationStrategy"] = None
     """The truncation strategy to use."""
diff --git a/src/huggingface_hub/inference/_generated/types/table_question_answering.py b/src/huggingface_hub/inference/_generated/types/table_question_answering.py
index 10e208eeeb..cceb59fde9 100644
--- a/src/huggingface_hub/inference/_generated/types/table_question_answering.py
+++ b/src/huggingface_hub/inference/_generated/types/table_question_answering.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Dict, List, Literal, Optional
+from typing import Literal, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -14,7 +14,7 @@ class TableQuestionAnsweringInputData(BaseInferenceType):
 
     question: str
     """The question to be answered about the table"""
-    table: Dict[str, List[str]]
+    table: dict[str, list[str]]
     """The table to serve as context for the questions"""
 
 
@@ -54,9 +54,9 @@ class TableQuestionAnsweringOutputElement(BaseInferenceType):
     """The answer of the question given the table. If there is an aggregator, the answer will be
     preceded by `AGGREGATOR >`.
     """
-    cells: List[str]
+    cells: list[str]
     """List of strings made up of the answer cell values."""
-    coordinates: List[List[int]]
+    coordinates: list[list[int]]
     """Coordinates of the cells of the answers."""
     aggregator: Optional[str] = None
     """If the model has an aggregator, this returns the aggregator."""
diff --git a/src/huggingface_hub/inference/_generated/types/text2text_generation.py b/src/huggingface_hub/inference/_generated/types/text2text_generation.py
index 34ac74e21e..bda2211902 100644
--- a/src/huggingface_hub/inference/_generated/types/text2text_generation.py
+++ b/src/huggingface_hub/inference/_generated/types/text2text_generation.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, Dict, Literal, Optional
+from typing import Any, Literal, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -17,7 +17,7 @@ class Text2TextGenerationParameters(BaseInferenceType):
 
     clean_up_tokenization_spaces: Optional[bool] = None
     """Whether to clean up the potential extra spaces in the text output."""
-    generate_parameters: Optional[Dict[str, Any]] = None
+    generate_parameters: Optional[dict[str, Any]] = None
     """Additional parametrization of the text generation algorithm"""
     truncation: Optional["Text2TextGenerationTruncationStrategy"] = None
     """The truncation strategy to use"""
diff --git a/src/huggingface_hub/inference/_generated/types/text_generation.py b/src/huggingface_hub/inference/_generated/types/text_generation.py
index 9b79cc691d..b470198b40 100644
--- a/src/huggingface_hub/inference/_generated/types/text_generation.py
+++ b/src/huggingface_hub/inference/_generated/types/text_generation.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, List, Literal, Optional
+from typing import Any, Literal, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -49,7 +49,7 @@ class TextGenerationInputGenerateParameters(BaseInferenceType):
     """Whether to prepend the prompt to the generated text"""
     seed: Optional[int] = None
     """Random sampling seed."""
-    stop: Optional[List[str]] = None
+    stop: Optional[list[str]] = None
     """Stop generating tokens if a member of `stop` is generated."""
     temperature: Optional[float] = None
     """The value used to module the logits distribution."""
@@ -108,21 +108,21 @@ class TextGenerationOutputBestOfSequence(BaseInferenceType):
     finish_reason: "TextGenerationOutputFinishReason"
     generated_text: str
     generated_tokens: int
-    prefill: List[TextGenerationOutputPrefillToken]
-    tokens: List[TextGenerationOutputToken]
+    prefill: list[TextGenerationOutputPrefillToken]
+    tokens: list[TextGenerationOutputToken]
     seed: Optional[int] = None
-    top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
+    top_tokens: Optional[list[list[TextGenerationOutputToken]]] = None
 
 
 @dataclass_with_extra
 class TextGenerationOutputDetails(BaseInferenceType):
     finish_reason: "TextGenerationOutputFinishReason"
     generated_tokens: int
-    prefill: List[TextGenerationOutputPrefillToken]
-    tokens: List[TextGenerationOutputToken]
-    best_of_sequences: Optional[List[TextGenerationOutputBestOfSequence]] = None
+    prefill: list[TextGenerationOutputPrefillToken]
+    tokens: list[TextGenerationOutputToken]
+    best_of_sequences: Optional[list[TextGenerationOutputBestOfSequence]] = None
     seed: Optional[int] = None
-    top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
+    top_tokens: Optional[list[list[TextGenerationOutputToken]]] = None
 
 
 @dataclass_with_extra
@@ -165,4 +165,4 @@ class TextGenerationStreamOutput(BaseInferenceType):
     token: TextGenerationStreamOutputToken
     details: Optional[TextGenerationStreamOutputStreamDetails] = None
     generated_text: Optional[str] = None
-    top_tokens: Optional[List[TextGenerationStreamOutputToken]] = None
+    top_tokens: Optional[list[TextGenerationStreamOutputToken]] = None
diff --git a/src/huggingface_hub/inference/_generated/types/text_to_video.py b/src/huggingface_hub/inference/_generated/types/text_to_video.py
index e54a1bc094..a7e9637821 100644
--- a/src/huggingface_hub/inference/_generated/types/text_to_video.py
+++ b/src/huggingface_hub/inference/_generated/types/text_to_video.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, List, Optional
+from typing import Any, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -16,7 +16,7 @@ class TextToVideoParameters(BaseInferenceType):
     """A higher guidance scale value encourages the model to generate videos closely linked to
     the text prompt, but values too high may cause saturation and other artifacts.
     """
-    negative_prompt: Optional[List[str]] = None
+    negative_prompt: Optional[list[str]] = None
     """One or several prompt to guide what NOT to include in video generation."""
     num_frames: Optional[float] = None
     """The num_frames parameter determines how many video frames are generated."""
diff --git a/src/huggingface_hub/inference/_generated/types/token_classification.py b/src/huggingface_hub/inference/_generated/types/token_classification.py
index e039b6a1db..b40f4b5f6f 100644
--- a/src/huggingface_hub/inference/_generated/types/token_classification.py
+++ b/src/huggingface_hub/inference/_generated/types/token_classification.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import List, Literal, Optional
+from typing import Literal, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -17,7 +17,7 @@ class TokenClassificationParameters(BaseInferenceType):
 
     aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None
     """The strategy used to fuse tokens based on model predictions"""
-    ignore_labels: Optional[List[str]] = None
+    ignore_labels: Optional[list[str]] = None
     """A list of labels to ignore"""
     stride: Optional[int] = None
     """The number of overlapping tokens between chunks when splitting the input text."""
diff --git a/src/huggingface_hub/inference/_generated/types/translation.py b/src/huggingface_hub/inference/_generated/types/translation.py
index df95b7dbb1..59619e9a90 100644
--- a/src/huggingface_hub/inference/_generated/types/translation.py
+++ b/src/huggingface_hub/inference/_generated/types/translation.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any, Dict, Literal, Optional
+from typing import Any, Literal, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -17,7 +17,7 @@ class TranslationParameters(BaseInferenceType):
 
     clean_up_tokenization_spaces: Optional[bool] = None
     """Whether to clean up the potential extra spaces in the text output."""
-    generate_parameters: Optional[Dict[str, Any]] = None
+    generate_parameters: Optional[dict[str, Any]] = None
     """Additional parametrization of the text generation algorithm."""
     src_lang: Optional[str] = None
     """The source language of the text. Required for models that can translate from multiple
diff --git a/src/huggingface_hub/inference/_generated/types/zero_shot_classification.py b/src/huggingface_hub/inference/_generated/types/zero_shot_classification.py
index 47b32492e3..7b0dd13237 100644
--- a/src/huggingface_hub/inference/_generated/types/zero_shot_classification.py
+++ b/src/huggingface_hub/inference/_generated/types/zero_shot_classification.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import List, Optional
+from typing import Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -12,7 +12,7 @@
 class ZeroShotClassificationParameters(BaseInferenceType):
     """Additional inference parameters for Zero Shot Classification"""
 
-    candidate_labels: List[str]
+    candidate_labels: list[str]
     """The set of possible class labels to classify the text into."""
     hypothesis_template: Optional[str] = None
     """The sentence used in conjunction with `candidate_labels` to attempt the text
diff --git a/src/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py b/src/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py
index 998d66b6b4..ed138eada5 100644
--- a/src/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py
+++ b/src/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import List, Optional
+from typing import Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -12,7 +12,7 @@
 class ZeroShotImageClassificationParameters(BaseInferenceType):
     """Additional inference parameters for Zero Shot Image Classification"""
 
-    candidate_labels: List[str]
+    candidate_labels: list[str]
     """The candidate labels for this image"""
     hypothesis_template: Optional[str] = None
     """The sentence used in conjunction with `candidate_labels` to attempt the image
diff --git a/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py b/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py
index 8ef76b5fcb..e981463b25 100644
--- a/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py
+++ b/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py
@@ -3,8 +3,6 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import List
-
 from .base import BaseInferenceType, dataclass_with_extra
 
 
@@ -12,7 +10,7 @@
 class ZeroShotObjectDetectionParameters(BaseInferenceType):
     """Additional inference parameters for Zero Shot Object Detection"""
 
-    candidate_labels: List[str]
+    candidate_labels: list[str]
     """The candidate labels for this image"""
 
 
diff --git a/src/huggingface_hub/inference/_mcp/agent.py b/src/huggingface_hub/inference/_mcp/agent.py
index b9eb347ed6..1d867032fa 100644
--- a/src/huggingface_hub/inference/_mcp/agent.py
+++ b/src/huggingface_hub/inference/_mcp/agent.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import asyncio
-from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union
+from typing import AsyncGenerator, Iterable, Optional, Union
 
 from huggingface_hub import ChatCompletionInputMessage, ChatCompletionStreamOutput, MCPClient
 
@@ -21,7 +21,7 @@ class Agent(MCPClient):
         model (`str`, *optional*):
             The model to run inference with. Can be a model id hosted on the Hugging Face Hub, e.g. `meta-llama/Meta-Llama-3-8B-Instruct`
             or a URL to a deployed Inference Endpoint or other local or remote endpoint.
-        servers (`Iterable[Dict]`):
+        servers (`Iterable[dict]`):
             MCP servers to connect to. Each server is a dictionary containing a `type` key and a `config` key. The `type` key can be `"stdio"` or `"sse"`, and the `config` key is a dictionary of arguments for the server.
         provider (`str`, *optional*):
             Name of the provider to use for inference. Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
@@ -46,7 +46,7 @@ def __init__(
     ):
         super().__init__(model=model, provider=provider, base_url=base_url, api_key=api_key)
         self._servers_cfg = list(servers)
-        self.messages: List[Union[Dict, ChatCompletionInputMessage]] = [
+        self.messages: list[Union[dict, ChatCompletionInputMessage]] = [
             {"role": "system", "content": prompt or DEFAULT_SYSTEM_PROMPT}
         ]
 
diff --git a/src/huggingface_hub/inference/_mcp/constants.py b/src/huggingface_hub/inference/_mcp/constants.py
index 1ccade43b1..737a9ae549 100644
--- a/src/huggingface_hub/inference/_mcp/constants.py
+++ b/src/huggingface_hub/inference/_mcp/constants.py
@@ -2,7 +2,6 @@
 
 import sys
 from pathlib import Path
-from typing import List
 
 from huggingface_hub import ChatCompletionInputTool
 
@@ -76,7 +75,7 @@
     }
 )
 
-EXIT_LOOP_TOOLS: List[ChatCompletionInputTool] = [TASK_COMPLETE_TOOL, ASK_QUESTION_TOOL]
+EXIT_LOOP_TOOLS: list[ChatCompletionInputTool] = [TASK_COMPLETE_TOOL, ASK_QUESTION_TOOL]
 
 
 DEFAULT_REPO_ID = "tiny-agents/tiny-agents"
diff --git a/src/huggingface_hub/inference/_mcp/mcp_client.py b/src/huggingface_hub/inference/_mcp/mcp_client.py
index 67d1fc5d15..7331d13fa6 100644
--- a/src/huggingface_hub/inference/_mcp/mcp_client.py
+++ b/src/huggingface_hub/inference/_mcp/mcp_client.py
@@ -3,9 +3,9 @@
 from contextlib import AsyncExitStack
 from datetime import timedelta
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Union, overload
+from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, TypedDict, Union, overload
 
-from typing_extensions import NotRequired, TypeAlias, TypedDict, Unpack
+from typing_extensions import NotRequired, TypeAlias, Unpack
 
 from ...utils._runtime import get_hf_hub_version
 from .._generated._async_client import AsyncInferenceClient
@@ -32,14 +32,14 @@
 
 class StdioServerParameters_T(TypedDict):
     command: str
-    args: NotRequired[List[str]]
-    env: NotRequired[Dict[str, str]]
+    args: NotRequired[list[str]]
+    env: NotRequired[dict[str, str]]
     cwd: NotRequired[Union[str, Path, None]]
 
 
 class SSEServerParameters_T(TypedDict):
     url: str
-    headers: NotRequired[Dict[str, Any]]
+    headers: NotRequired[dict[str, Any]]
     timeout: NotRequired[float]
     sse_read_timeout: NotRequired[float]
 
@@ -81,9 +81,9 @@ def __init__(
         api_key: Optional[str] = None,
     ):
         # Initialize MCP sessions as a dictionary of ClientSession objects
-        self.sessions: Dict[ToolName, "ClientSession"] = {}
+        self.sessions: dict[ToolName, "ClientSession"] = {}
         self.exit_stack = AsyncExitStack()
-        self.available_tools: List[ChatCompletionInputTool] = []
+        self.available_tools: list[ChatCompletionInputTool] = []
         # To be able to send the model in the payload if `base_url` is provided
         if model is None and base_url is None:
             raise ValueError("At least one of `model` or `base_url` should be set in `MCPClient`.")
@@ -129,27 +129,27 @@ async def add_mcp_server(self, type: ServerType, **params: Any):
                 - "stdio": Standard input/output server (local)
                 - "sse": Server-sent events (SSE) server
                 - "http": StreamableHTTP server
-            **params (`Dict[str, Any]`):
+            **params (`dict[str, Any]`):
                 Server parameters that can be either:
                     - For stdio servers:
                         - command (str): The command to run the MCP server
-                        - args (List[str], optional): Arguments for the command
-                        - env (Dict[str, str], optional): Environment variables for the command
+                        - args (list[str], optional): Arguments for the command
+                        - env (dict[str, str], optional): Environment variables for the command
                         - cwd (Union[str, Path, None], optional): Working directory for the command
-                        - allowed_tools (List[str], optional): List of tool names to allow from this server
+                        - allowed_tools (list[str], optional): List of tool names to allow from this server
                     - For SSE servers:
                         - url (str): The URL of the SSE server
-                        - headers (Dict[str, Any], optional): Headers for the SSE connection
+                        - headers (dict[str, Any], optional): Headers for the SSE connection
                         - timeout (float, optional): Connection timeout
                         - sse_read_timeout (float, optional): SSE read timeout
-                        - allowed_tools (List[str], optional): List of tool names to allow from this server
+                        - allowed_tools (list[str], optional): List of tool names to allow from this server
                     - For StreamableHTTP servers:
                         - url (str): The URL of the StreamableHTTP server
-                        - headers (Dict[str, Any], optional): Headers for the StreamableHTTP connection
+                        - headers (dict[str, Any], optional): Headers for the StreamableHTTP connection
                         - timeout (timedelta, optional): Connection timeout
                         - sse_read_timeout (timedelta, optional): SSE read timeout
                         - terminate_on_close (bool, optional): Whether to terminate on close
-                        - allowed_tools (List[str], optional): List of tool names to allow from this server
+                        - allowed_tools (list[str], optional): List of tool names to allow from this server
         """
         from mcp import ClientSession, StdioServerParameters
         from mcp import types as mcp_types
@@ -247,16 +247,16 @@ async def add_mcp_server(self, type: ServerType, **params: Any):
 
     async def process_single_turn_with_tools(
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
-        exit_loop_tools: Optional[List[ChatCompletionInputTool]] = None,
+        messages: list[Union[dict, ChatCompletionInputMessage]],
+        exit_loop_tools: Optional[list[ChatCompletionInputTool]] = None,
         exit_if_first_chunk_no_tool: bool = False,
     ) -> AsyncIterable[Union[ChatCompletionStreamOutput, ChatCompletionInputMessage]]:
         """Process a query using `self.model` and available tools, yielding chunks and tool outputs.
 
         Args:
-            messages (`List[Dict]`):
+            messages (`list[dict]`):
                 List of message objects representing the conversation history
-            exit_loop_tools (`List[ChatCompletionInputTool]`, *optional*):
+            exit_loop_tools (`list[ChatCompletionInputTool]`, *optional*):
                 List of tools that should exit the generator when called
             exit_if_first_chunk_no_tool (`bool`, *optional*):
                 Exit if no tool is present in the first chunks. Default to False.
@@ -278,8 +278,8 @@ async def process_single_turn_with_tools(
             stream=True,
         )
 
-        message: Dict[str, Any] = {"role": "unknown", "content": ""}
-        final_tool_calls: Dict[int, ChatCompletionStreamOutputDeltaToolCall] = {}
+        message: dict[str, Any] = {"role": "unknown", "content": ""}
+        final_tool_calls: dict[int, ChatCompletionStreamOutputDeltaToolCall] = {}
         num_of_chunks = 0
 
         # Read from stream
@@ -326,7 +326,7 @@ async def process_single_turn_with_tools(
                 message["role"] = "assistant"
             # Convert final_tool_calls to the format expected by OpenAI
             if final_tool_calls:
-                tool_calls_list: List[Dict[str, Any]] = []
+                tool_calls_list: list[dict[str, Any]] = []
                 for tc in final_tool_calls.values():
                     tool_calls_list.append(
                         {
@@ -344,6 +344,17 @@ async def process_single_turn_with_tools(
         # Process tool calls one by one
         for tool_call in final_tool_calls.values():
             function_name = tool_call.function.name
+            if function_name is None:
+                message = ChatCompletionInputMessage.parse_obj_as_instance(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call.id,
+                        "content": "Invalid tool call with no function name.",
+                    }
+                )
+                messages.append(message)
+                yield message
+                continue  # move to next tool call
             try:
                 function_args = json.loads(tool_call.function.arguments or "{}")
             except json.JSONDecodeError as err:
diff --git a/src/huggingface_hub/inference/_mcp/types.py b/src/huggingface_hub/inference/_mcp/types.py
index 100f67832e..a531929a8e 100644
--- a/src/huggingface_hub/inference/_mcp/types.py
+++ b/src/huggingface_hub/inference/_mcp/types.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Literal, TypedDict, Union
+from typing import Literal, TypedDict, Union
 
 from typing_extensions import NotRequired
 
@@ -13,24 +13,24 @@ class InputConfig(TypedDict, total=False):
 class StdioServerConfig(TypedDict):
     type: Literal["stdio"]
     command: str
-    args: List[str]
-    env: Dict[str, str]
+    args: list[str]
+    env: dict[str, str]
     cwd: str
-    allowed_tools: NotRequired[List[str]]
+    allowed_tools: NotRequired[list[str]]
 
 
 class HTTPServerConfig(TypedDict):
     type: Literal["http"]
     url: str
-    headers: Dict[str, str]
-    allowed_tools: NotRequired[List[str]]
+    headers: dict[str, str]
+    allowed_tools: NotRequired[list[str]]
 
 
 class SSEServerConfig(TypedDict):
     type: Literal["sse"]
     url: str
-    headers: Dict[str, str]
-    allowed_tools: NotRequired[List[str]]
+    headers: dict[str, str]
+    allowed_tools: NotRequired[list[str]]
 
 
 ServerConfig = Union[StdioServerConfig, HTTPServerConfig, SSEServerConfig]
@@ -41,5 +41,5 @@ class AgentConfig(TypedDict):
     model: str
     provider: str
     apiKey: NotRequired[str]
-    inputs: List[InputConfig]
-    servers: List[ServerConfig]
+    inputs: list[InputConfig]
+    servers: list[ServerConfig]
diff --git a/src/huggingface_hub/inference/_mcp/utils.py b/src/huggingface_hub/inference/_mcp/utils.py
index ddab10d677..09c902815b 100644
--- a/src/huggingface_hub/inference/_mcp/utils.py
+++ b/src/huggingface_hub/inference/_mcp/utils.py
@@ -6,7 +6,7 @@
 
 import json
 from pathlib import Path
-from typing import TYPE_CHECKING, List, Optional, Tuple
+from typing import TYPE_CHECKING, Optional
 
 from huggingface_hub import snapshot_download
 from huggingface_hub.errors import EntryNotFoundError
@@ -36,7 +36,7 @@ def format_result(result: "mcp_types.CallToolResult") -> str:
     if len(content) == 0:
         return "[No content]"
 
-    formatted_parts: List[str] = []
+    formatted_parts: list[str] = []
 
     for item in content:
         if item.type == "text":
@@ -84,10 +84,10 @@ def _get_base64_size(base64_str: str) -> int:
     return (len(base64_str) * 3) // 4 - padding
 
 
-def _load_agent_config(agent_path: Optional[str]) -> Tuple[AgentConfig, Optional[str]]:
+def _load_agent_config(agent_path: Optional[str]) -> tuple[AgentConfig, Optional[str]]:
     """Load server config and prompt."""
 
-    def _read_dir(directory: Path) -> Tuple[AgentConfig, Optional[str]]:
+    def _read_dir(directory: Path) -> tuple[AgentConfig, Optional[str]]:
         cfg_file = directory / FILENAME_CONFIG
         if not cfg_file.exists():
             raise FileNotFoundError(f" Config file not found in {directory}! Please make sure it exists locally")
diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py
index 69051ff914..a938d2ba48 100644
--- a/src/huggingface_hub/inference/_providers/__init__.py
+++ b/src/huggingface_hub/inference/_providers/__init__.py
@@ -1,4 +1,4 @@
-from typing import Dict, Literal, Optional, Union
+from typing import Literal, Optional, Union
 
 from huggingface_hub.inference._providers.featherless_ai import (
     FeatherlessConversationalTask,
@@ -71,7 +71,7 @@
 
 PROVIDER_OR_POLICY_T = Union[PROVIDER_T, Literal["auto"]]
 
-PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
+PROVIDERS: dict[PROVIDER_T, dict[str, TaskProviderHelper]] = {
     "black-forest-labs": {
         "text-to-image": BlackForestLabsTextToImageTask(),
     },
diff --git a/src/huggingface_hub/inference/_providers/_common.py b/src/huggingface_hub/inference/_providers/_common.py
index 4194c356b9..adbe05e611 100644
--- a/src/huggingface_hub/inference/_providers/_common.py
+++ b/src/huggingface_hub/inference/_providers/_common.py
@@ -1,5 +1,5 @@
 from functools import lru_cache
-from typing import Any, Dict, List, Optional, Union, overload
+from typing import Any, Optional, Union, overload
 
 from huggingface_hub import constants
 from huggingface_hub.hf_api import InferenceProviderMapping
@@ -14,7 +14,7 @@
 # Dev purposes only.
 # If you want to try to run inference for a new model locally before it's registered on huggingface.co
 # for a given Inference Provider, you can add it to the following dictionary.
-HARDCODED_MODEL_INFERENCE_MAPPING: Dict[str, Dict[str, InferenceProviderMapping]] = {
+HARDCODED_MODEL_INFERENCE_MAPPING: dict[str, dict[str, InferenceProviderMapping]] = {
     # "HF model ID" => InferenceProviderMapping object initialized with "Model ID on Inference Provider's side"
     #
     # Example:
@@ -40,14 +40,14 @@
 
 
 @overload
-def filter_none(obj: Dict[str, Any]) -> Dict[str, Any]: ...
+def filter_none(obj: dict[str, Any]) -> dict[str, Any]: ...
 @overload
-def filter_none(obj: List[Any]) -> List[Any]: ...
+def filter_none(obj: list[Any]) -> list[Any]: ...
 
 
-def filter_none(obj: Union[Dict[str, Any], List[Any]]) -> Union[Dict[str, Any], List[Any]]:
+def filter_none(obj: Union[dict[str, Any], list[Any]]) -> Union[dict[str, Any], list[Any]]:
     if isinstance(obj, dict):
-        cleaned: Dict[str, Any] = {}
+        cleaned: dict[str, Any] = {}
         for k, v in obj.items():
             if v is None:
                 continue
@@ -74,11 +74,11 @@ def prepare_request(
         self,
         *,
         inputs: Any,
-        parameters: Dict[str, Any],
-        headers: Dict,
+        parameters: dict[str, Any],
+        headers: dict,
         model: Optional[str],
         api_key: Optional[str],
-        extra_payload: Optional[Dict[str, Any]] = None,
+        extra_payload: Optional[dict[str, Any]] = None,
     ) -> RequestParameters:
         """
         Prepare the request to be sent to the provider.
@@ -125,7 +125,7 @@ def prepare_request(
 
     def get_response(
         self,
-        response: Union[bytes, Dict],
+        response: Union[bytes, dict],
         request_params: Optional[RequestParameters] = None,
     ) -> Any:
         """
@@ -184,8 +184,8 @@ def _prepare_mapping_info(self, model: Optional[str]) -> InferenceProviderMappin
         return provider_mapping
 
     def _normalize_headers(
-        self, headers: Dict[str, Any], payload: Optional[Dict[str, Any]], data: Optional[MimeBytes]
-    ) -> Dict[str, Any]:
+        self, headers: dict[str, Any], payload: Optional[dict[str, Any]], data: Optional[MimeBytes]
+    ) -> dict[str, Any]:
         """Normalize the headers to use for the request.
 
         Override this method in subclasses for customized headers.
@@ -198,7 +198,7 @@ def _normalize_headers(
                 normalized_headers["content-type"] = "application/json"
         return normalized_headers
 
-    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
+    def _prepare_headers(self, headers: dict, api_key: str) -> dict[str, Any]:
         """Return the headers to use for the request.
 
         Override this method in subclasses for customized headers.
@@ -233,8 +233,8 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return ""
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         """Return the payload to use for the request, as a dict.
 
         Override this method in subclasses for customized payloads.
@@ -245,9 +245,9 @@ def _prepare_payload_as_dict(
     def _prepare_payload_as_bytes(
         self,
         inputs: Any,
-        parameters: Dict,
+        parameters: dict,
         provider_mapping_info: InferenceProviderMapping,
-        extra_payload: Optional[Dict],
+        extra_payload: Optional[dict],
     ) -> Optional[MimeBytes]:
         """Return the body to use for the request, as bytes.
 
@@ -271,10 +271,10 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
 
     def _prepare_payload_as_dict(
         self,
-        inputs: List[Union[Dict, ChatCompletionInputMessage]],
-        parameters: Dict,
+        inputs: list[Union[dict, ChatCompletionInputMessage]],
+        parameters: dict,
         provider_mapping_info: InferenceProviderMapping,
-    ) -> Optional[Dict]:
+    ) -> Optional[dict]:
         return filter_none({"messages": inputs, **parameters, "model": provider_mapping_info.provider_id})
 
 
@@ -291,13 +291,13 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/v1/completions"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         return filter_none({"prompt": inputs, **parameters, "model": provider_mapping_info.provider_id})
 
 
 @lru_cache(maxsize=None)
-def _fetch_inference_provider_mapping(model: str) -> List["InferenceProviderMapping"]:
+def _fetch_inference_provider_mapping(model: str) -> list["InferenceProviderMapping"]:
     """
     Fetch provider mappings for a model from the Hub.
     """
@@ -310,7 +310,7 @@ def _fetch_inference_provider_mapping(model: str) -> List["InferenceProviderMapp
     return provider_mapping
 
 
-def recursive_merge(dict1: Dict, dict2: Dict) -> Dict:
+def recursive_merge(dict1: dict, dict2: dict) -> dict:
     return {
         **dict1,
         **{
diff --git a/src/huggingface_hub/inference/_providers/black_forest_labs.py b/src/huggingface_hub/inference/_providers/black_forest_labs.py
index a5d9683225..1d91b0b842 100644
--- a/src/huggingface_hub/inference/_providers/black_forest_labs.py
+++ b/src/huggingface_hub/inference/_providers/black_forest_labs.py
@@ -1,5 +1,5 @@
 import time
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -18,7 +18,7 @@ class BlackForestLabsTextToImageTask(TaskProviderHelper):
     def __init__(self):
         super().__init__(provider="black-forest-labs", base_url="https://api.us1.bfl.ai", task="text-to-image")
 
-    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
+    def _prepare_headers(self, headers: dict, api_key: str) -> dict[str, Any]:
         headers = super()._prepare_headers(headers, api_key)
         if not api_key.startswith("hf_"):
             _ = headers.pop("authorization")
@@ -29,8 +29,8 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return f"/v1/{mapped_model}"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         parameters = filter_none(parameters)
         if "num_inference_steps" in parameters:
             parameters["steps"] = parameters.pop("num_inference_steps")
@@ -39,7 +39,7 @@ def _prepare_payload_as_dict(
 
         return {"prompt": inputs, **parameters}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         """
         Polling mechanism for Black Forest Labs since the API is asynchronous.
         """
@@ -50,7 +50,7 @@ def get_response(self, response: Union[bytes, Dict], request_params: Optional[Re
 
             response = session.get(url, headers={"Content-Type": "application/json"})  # type: ignore
             response.raise_for_status()  # type: ignore
-            response_json: Dict = response.json()  # type: ignore
+            response_json: dict = response.json()  # type: ignore
             status = response_json.get("status")
             logger.info(
                 f"Polling generation result from {url}. Current status: {status}. "
diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py
index a5e9191cae..0190d5449b 100644
--- a/src/huggingface_hub/inference/_providers/cohere.py
+++ b/src/huggingface_hub/inference/_providers/cohere.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 
@@ -17,8 +17,8 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/compatibility/v1/chat/completions"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
         response_format = parameters.get("response_format")
         if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
diff --git a/src/huggingface_hub/inference/_providers/fal_ai.py b/src/huggingface_hub/inference/_providers/fal_ai.py
index bc2c41d04f..4ae98a16ee 100644
--- a/src/huggingface_hub/inference/_providers/fal_ai.py
+++ b/src/huggingface_hub/inference/_providers/fal_ai.py
@@ -1,7 +1,7 @@
 import base64
 import time
 from abc import ABC
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 from urllib.parse import urlparse
 
 from huggingface_hub import constants
@@ -22,7 +22,7 @@ class FalAITask(TaskProviderHelper, ABC):
     def __init__(self, task: str):
         super().__init__(provider="fal-ai", base_url="https://fal.run", task=task)
 
-    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
+    def _prepare_headers(self, headers: dict, api_key: str) -> dict[str, Any]:
         headers = super()._prepare_headers(headers, api_key)
         if not api_key.startswith("hf_"):
             headers["authorization"] = f"Key {api_key}"
@@ -36,7 +36,7 @@ class FalAIQueueTask(TaskProviderHelper, ABC):
     def __init__(self, task: str):
         super().__init__(provider="fal-ai", base_url="https://queue.fal.run", task=task)
 
-    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
+    def _prepare_headers(self, headers: dict, api_key: str) -> dict[str, Any]:
         headers = super()._prepare_headers(headers, api_key)
         if not api_key.startswith("hf_"):
             headers["authorization"] = f"Key {api_key}"
@@ -50,7 +50,7 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
 
     def get_response(
         self,
-        response: Union[bytes, Dict],
+        response: Union[bytes, dict],
         request_params: Optional[RequestParameters] = None,
     ) -> Any:
         response_dict = _as_dict(response)
@@ -91,8 +91,8 @@ def __init__(self):
         super().__init__("automatic-speech-recognition")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         if isinstance(inputs, str) and inputs.startswith(("http://", "https://")):
             # If input is a URL, pass it directly
             audio_url = inputs
@@ -108,7 +108,7 @@ def _prepare_payload_as_dict(
 
         return {"audio_url": audio_url, **filter_none(parameters)}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         text = _as_dict(response)["text"]
         if not isinstance(text, str):
             raise ValueError(f"Unexpected output format from FalAI API. Expected string, got {type(text)}.")
@@ -120,9 +120,9 @@ def __init__(self):
         super().__init__("text-to-image")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
-        payload: Dict[str, Any] = {
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
+        payload: dict[str, Any] = {
             "prompt": inputs,
             **filter_none(parameters),
         }
@@ -145,7 +145,7 @@ def _prepare_payload_as_dict(
 
         return payload
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         url = _as_dict(response)["images"][0]["url"]
         return get_session().get(url).content
 
@@ -155,11 +155,11 @@ def __init__(self):
         super().__init__("text-to-speech")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         return {"text": inputs, **filter_none(parameters)}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         url = _as_dict(response)["audio"]["url"]
         return get_session().get(url).content
 
@@ -169,13 +169,13 @@ def __init__(self):
         super().__init__("text-to-video")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         return {"prompt": inputs, **filter_none(parameters)}
 
     def get_response(
         self,
-        response: Union[bytes, Dict],
+        response: Union[bytes, dict],
         request_params: Optional[RequestParameters] = None,
     ) -> Any:
         output = super().get_response(response, request_params)
@@ -188,12 +188,12 @@ def __init__(self):
         super().__init__("image-to-image")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         image_url = _as_url(inputs, default_mime_type="image/jpeg")
         if "target_size" in parameters:
             parameters["image_size"] = parameters.pop("target_size")
-        payload: Dict[str, Any] = {
+        payload: dict[str, Any] = {
             "image_url": image_url,
             **filter_none(parameters),
         }
@@ -209,7 +209,7 @@ def _prepare_payload_as_dict(
 
     def get_response(
         self,
-        response: Union[bytes, Dict],
+        response: Union[bytes, dict],
         request_params: Optional[RequestParameters] = None,
     ) -> Any:
         output = super().get_response(response, request_params)
@@ -222,10 +222,10 @@ def __init__(self):
         super().__init__("image-to-video")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         image_url = _as_url(inputs, default_mime_type="image/jpeg")
-        payload: Dict[str, Any] = {
+        payload: dict[str, Any] = {
             "image_url": image_url,
             **filter_none(parameters),
         }
@@ -240,7 +240,7 @@ def _prepare_payload_as_dict(
 
     def get_response(
         self,
-        response: Union[bytes, Dict],
+        response: Union[bytes, dict],
         request_params: Optional[RequestParameters] = None,
     ) -> Any:
         output = super().get_response(response, request_params)
diff --git a/src/huggingface_hub/inference/_providers/featherless_ai.py b/src/huggingface_hub/inference/_providers/featherless_ai.py
index 6ad1c48134..ab119636c0 100644
--- a/src/huggingface_hub/inference/_providers/featherless_ai.py
+++ b/src/huggingface_hub/inference/_providers/featherless_ai.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -15,14 +15,14 @@ def __init__(self):
         super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         params = filter_none(parameters.copy())
         params["max_tokens"] = params.pop("max_new_tokens", None)
 
         return {"prompt": inputs, **params, "model": provider_mapping_info.provider_id}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         output = _as_dict(response)["choices"][0]
         return {
             "generated_text": output["text"],
diff --git a/src/huggingface_hub/inference/_providers/fireworks_ai.py b/src/huggingface_hub/inference/_providers/fireworks_ai.py
index b4cc19a570..d76c58478b 100644
--- a/src/huggingface_hub/inference/_providers/fireworks_ai.py
+++ b/src/huggingface_hub/inference/_providers/fireworks_ai.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 
@@ -13,8 +13,8 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/inference/v1/chat/completions"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
         response_format = parameters.get("response_format")
         if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py
index d90d00c4f3..dddfaaea85 100644
--- a/src/huggingface_hub/inference/_providers/hf_inference.py
+++ b/src/huggingface_hub/inference/_providers/hf_inference.py
@@ -1,7 +1,7 @@
 import json
 from functools import lru_cache
 from pathlib import Path
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 from urllib.parse import urlparse, urlunparse
 
 from huggingface_hub import constants
@@ -60,8 +60,8 @@ def _prepare_url(self, api_key: str, mapped_model: str) -> str:
         )
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         if isinstance(inputs, bytes):
             raise ValueError(f"Unexpected binary input for task {self.task}.")
         if isinstance(inputs, Path):
@@ -71,16 +71,16 @@ def _prepare_payload_as_dict(
 
 class HFInferenceBinaryInputTask(HFInferenceTask):
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         return None
 
     def _prepare_payload_as_bytes(
         self,
         inputs: Any,
-        parameters: Dict,
+        parameters: dict,
         provider_mapping_info: InferenceProviderMapping,
-        extra_payload: Optional[Dict],
+        extra_payload: Optional[dict],
     ) -> Optional[MimeBytes]:
         parameters = filter_none(parameters)
         extra_payload = extra_payload or {}
@@ -106,8 +106,8 @@ def __init__(self):
         super().__init__("conversational")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         payload = filter_none(parameters)
         mapped_model = provider_mapping_info.provider_id
         payload_model = parameters.get("model") or mapped_model
@@ -156,7 +156,7 @@ def _build_chat_completion_url(model_url: str) -> str:
 
 
 @lru_cache(maxsize=1)
-def _fetch_recommended_models() -> Dict[str, Optional[str]]:
+def _fetch_recommended_models() -> dict[str, Optional[str]]:
     response = get_session().get(f"{constants.ENDPOINT}/api/tasks", headers=build_hf_headers())
     hf_raise_for_status(response)
     return {task: next(iter(details["widgetModels"]), None) for task, details in response.json().items()}
@@ -211,8 +211,8 @@ def __init__(self):
         super().__init__("feature-extraction")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         if isinstance(inputs, bytes):
             raise ValueError(f"Unexpected binary input for task {self.task}.")
         if isinstance(inputs, Path):
@@ -222,7 +222,7 @@ def _prepare_payload_as_dict(
         # See specs: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/feature-extraction/spec/input.json
         return {"inputs": inputs, **filter_none(parameters)}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         if isinstance(response, bytes):
             return _bytes_to_dict(response)
         return response
diff --git a/src/huggingface_hub/inference/_providers/hyperbolic.py b/src/huggingface_hub/inference/_providers/hyperbolic.py
index 6dcb14cc27..af512b1624 100644
--- a/src/huggingface_hub/inference/_providers/hyperbolic.py
+++ b/src/huggingface_hub/inference/_providers/hyperbolic.py
@@ -1,5 +1,5 @@
 import base64
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -14,8 +14,8 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/v1/images/generations"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         mapped_model = provider_mapping_info.provider_id
         parameters = filter_none(parameters)
         if "num_inference_steps" in parameters:
@@ -29,7 +29,7 @@ def _prepare_payload_as_dict(
             parameters["height"] = 512
         return {"prompt": inputs, "model_name": mapped_model, **parameters}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         response_dict = _as_dict(response)
         return base64.b64decode(response_dict["images"][0]["image"])
 
diff --git a/src/huggingface_hub/inference/_providers/nebius.py b/src/huggingface_hub/inference/_providers/nebius.py
index 85ad67c4c8..6731855049 100644
--- a/src/huggingface_hub/inference/_providers/nebius.py
+++ b/src/huggingface_hub/inference/_providers/nebius.py
@@ -1,5 +1,5 @@
 import base64
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -15,7 +15,7 @@ class NebiusTextGenerationTask(BaseTextGenerationTask):
     def __init__(self):
         super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         output = _as_dict(response)["choices"][0]
         return {
             "generated_text": output["text"],
@@ -31,8 +31,8 @@ def __init__(self):
         super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
         response_format = parameters.get("response_format")
         if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
@@ -50,8 +50,8 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/v1/images/generations"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         mapped_model = provider_mapping_info.provider_id
         parameters = filter_none(parameters)
         if "guidance_scale" in parameters:
@@ -61,7 +61,7 @@ def _prepare_payload_as_dict(
 
         return {"prompt": inputs, **parameters, "model": mapped_model}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         response_dict = _as_dict(response)
         return base64.b64decode(response_dict["data"][0]["b64_json"])
 
@@ -74,10 +74,10 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/v1/embeddings"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         return {"input": inputs, "model": provider_mapping_info.provider_id}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         embeddings = _as_dict(response)["data"]
         return [embedding["embedding"] for embedding in embeddings]
diff --git a/src/huggingface_hub/inference/_providers/new_provider.md b/src/huggingface_hub/inference/_providers/new_provider.md
index 923463284a..4a488df6bb 100644
--- a/src/huggingface_hub/inference/_providers/new_provider.md
+++ b/src/huggingface_hub/inference/_providers/new_provider.md
@@ -13,7 +13,7 @@ If the provider supports multiple tasks that require different implementations,
 For `text-generation` and `conversational` tasks, one can just inherit from `BaseTextGenerationTask` and `BaseConversationalTask` respectively (defined in `_common.py`) and override the methods if needed. Examples can be found in `fireworks_ai.py` and `together.py`.
 
 ```py
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from ._common import TaskProviderHelper, MimeBytes
 
@@ -25,7 +25,7 @@ class MyNewProviderTaskProviderHelper(TaskProviderHelper):
 
     def get_response(
         self,
-        response: Union[bytes, Dict],
+        response: Union[bytes, dict],
         request_params: Optional[RequestParameters] = None,
     ) -> Any:
         """
@@ -34,7 +34,7 @@ class MyNewProviderTaskProviderHelper(TaskProviderHelper):
         Override this method in subclasses for customized response handling."""
         return super().get_response(response)
 
-    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
+    def _prepare_headers(self, headers: dict, api_key: str) -> dict[str, Any]:
         """Return the headers to use for the request.
 
         Override this method in subclasses for customized headers.
@@ -48,7 +48,7 @@ class MyNewProviderTaskProviderHelper(TaskProviderHelper):
         """
         return super()._prepare_route(mapped_model)
 
-    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: dict, mapped_model: str) -> Optional[dict]:
         """Return the payload to use for the request, as a dict.
 
         Override this method in subclasses for customized payloads.
@@ -57,7 +57,7 @@ class MyNewProviderTaskProviderHelper(TaskProviderHelper):
         return super()._prepare_payload_as_dict(inputs, parameters, mapped_model)
 
     def _prepare_payload_as_bytes(
-        self, inputs: Any, parameters: Dict, mapped_model: str, extra_payload: Optional[Dict]
+        self, inputs: Any, parameters: dict, mapped_model: str, extra_payload: Optional[dict]
     ) -> Optional[MimeBytes]:
         """Return the body to use for the request, as bytes.
 
diff --git a/src/huggingface_hub/inference/_providers/novita.py b/src/huggingface_hub/inference/_providers/novita.py
index 44adc9017b..301d7a589d 100644
--- a/src/huggingface_hub/inference/_providers/novita.py
+++ b/src/huggingface_hub/inference/_providers/novita.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -23,7 +23,7 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         # there is no v1/ route for novita
         return "/v3/openai/completions"
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         output = _as_dict(response)["choices"][0]
         return {
             "generated_text": output["text"],
@@ -51,11 +51,11 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return f"/v3/hf/{mapped_model}"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         return {"prompt": inputs, **filter_none(parameters)}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         response_dict = _as_dict(response)
         if not (
             isinstance(response_dict, dict)
diff --git a/src/huggingface_hub/inference/_providers/nscale.py b/src/huggingface_hub/inference/_providers/nscale.py
index ce5b20e354..65b15147a2 100644
--- a/src/huggingface_hub/inference/_providers/nscale.py
+++ b/src/huggingface_hub/inference/_providers/nscale.py
@@ -1,5 +1,5 @@
 import base64
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -20,8 +20,8 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/v1/images/generations"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         mapped_model = provider_mapping_info.provider_id
         # Combine all parameters except inputs and parameters
         parameters = filter_none(parameters)
@@ -39,6 +39,6 @@ def _prepare_payload_as_dict(
         }
         return payload
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         response_dict = _as_dict(response)
         return base64.b64decode(response_dict["data"][0]["b64_json"])
diff --git a/src/huggingface_hub/inference/_providers/replicate.py b/src/huggingface_hub/inference/_providers/replicate.py
index 139582cc80..5a1d1b71f0 100644
--- a/src/huggingface_hub/inference/_providers/replicate.py
+++ b/src/huggingface_hub/inference/_providers/replicate.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict, _as_url
@@ -14,7 +14,7 @@ class ReplicateTask(TaskProviderHelper):
     def __init__(self, task: str):
         super().__init__(provider=_PROVIDER, base_url=_BASE_URL, task=task)
 
-    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
+    def _prepare_headers(self, headers: dict, api_key: str) -> dict[str, Any]:
         headers = super()._prepare_headers(headers, api_key)
         headers["Prefer"] = "wait"
         return headers
@@ -25,16 +25,16 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return f"/v1/models/{mapped_model}/predictions"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         mapped_model = provider_mapping_info.provider_id
-        payload: Dict[str, Any] = {"input": {"prompt": inputs, **filter_none(parameters)}}
+        payload: dict[str, Any] = {"input": {"prompt": inputs, **filter_none(parameters)}}
         if ":" in mapped_model:
             version = mapped_model.split(":", 1)[1]
             payload["version"] = version
         return payload
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         response_dict = _as_dict(response)
         if response_dict.get("output") is None:
             raise TimeoutError(
@@ -52,9 +52,9 @@ def __init__(self):
         super().__init__("text-to-image")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
-        payload: Dict = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)  # type: ignore[assignment]
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
+        payload: dict = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)  # type: ignore[assignment]
         if provider_mapping_info.adapter_weights_path is not None:
             payload["input"]["lora_weights"] = f"https://huggingface.co/{provider_mapping_info.hf_model_id}"
         return payload
@@ -65,9 +65,9 @@ def __init__(self):
         super().__init__("text-to-speech")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
-        payload: Dict = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)  # type: ignore[assignment]
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
+        payload: dict = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)  # type: ignore[assignment]
         payload["input"]["text"] = payload["input"].pop("prompt")  # rename "prompt" to "text" for TTS
         return payload
 
@@ -77,11 +77,11 @@ def __init__(self):
         super().__init__("image-to-image")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         image_url = _as_url(inputs, default_mime_type="image/jpeg")
 
-        payload: Dict[str, Any] = {"input": {"input_image": image_url, **filter_none(parameters)}}
+        payload: dict[str, Any] = {"input": {"input_image": image_url, **filter_none(parameters)}}
 
         mapped_model = provider_mapping_info.provider_id
         if ":" in mapped_model:
diff --git a/src/huggingface_hub/inference/_providers/sambanova.py b/src/huggingface_hub/inference/_providers/sambanova.py
index ed96fb766c..4b7b1ee57b 100644
--- a/src/huggingface_hub/inference/_providers/sambanova.py
+++ b/src/huggingface_hub/inference/_providers/sambanova.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -10,8 +10,8 @@ def __init__(self):
         super().__init__(provider="sambanova", base_url="https://api.sambanova.ai")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         response_format_config = parameters.get("response_format")
         if isinstance(response_format_config, dict):
             if response_format_config.get("type") == "json_schema":
@@ -32,11 +32,11 @@ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
         return "/v1/embeddings"
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         parameters = filter_none(parameters)
         return {"input": inputs, "model": provider_mapping_info.provider_id, **parameters}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         embeddings = _as_dict(response)["data"]
         return [embedding["embedding"] for embedding in embeddings]
diff --git a/src/huggingface_hub/inference/_providers/together.py b/src/huggingface_hub/inference/_providers/together.py
index de166b7baf..338057d438 100644
--- a/src/huggingface_hub/inference/_providers/together.py
+++ b/src/huggingface_hub/inference/_providers/together.py
@@ -1,6 +1,6 @@
 import base64
 from abc import ABC
-from typing import Any, Dict, Optional, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.hf_api import InferenceProviderMapping
 from huggingface_hub.inference._common import RequestParameters, _as_dict
@@ -36,7 +36,7 @@ class TogetherTextGenerationTask(BaseTextGenerationTask):
     def __init__(self):
         super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         output = _as_dict(response)["choices"][0]
         return {
             "generated_text": output["text"],
@@ -52,8 +52,8 @@ def __init__(self):
         super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
         response_format = parameters.get("response_format")
         if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
@@ -72,8 +72,8 @@ def __init__(self):
         super().__init__("text-to-image")
 
     def _prepare_payload_as_dict(
-        self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
-    ) -> Optional[Dict]:
+        self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
+    ) -> Optional[dict]:
         mapped_model = provider_mapping_info.provider_id
         parameters = filter_none(parameters)
         if "num_inference_steps" in parameters:
@@ -83,6 +83,6 @@ def _prepare_payload_as_dict(
 
         return {"prompt": inputs, "response_format": "base64", **parameters, "model": mapped_model}
 
-    def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
         response_dict = _as_dict(response)
         return base64.b64decode(response_dict["data"][0]["b64_json"])
diff --git a/src/huggingface_hub/inference_api.py b/src/huggingface_hub/inference_api.py
deleted file mode 100644
index f895fcc61c..0000000000
--- a/src/huggingface_hub/inference_api.py
+++ /dev/null
@@ -1,217 +0,0 @@
-import io
-from typing import Any, Dict, List, Optional, Union
-
-from . import constants
-from .hf_api import HfApi
-from .utils import build_hf_headers, get_session, is_pillow_available, logging, validate_hf_hub_args
-from .utils._deprecation import _deprecate_method
-
-
-logger = logging.get_logger(__name__)
-
-
-ALL_TASKS = [
-    # NLP
-    "text-classification",
-    "token-classification",
-    "table-question-answering",
-    "question-answering",
-    "zero-shot-classification",
-    "translation",
-    "summarization",
-    "conversational",
-    "feature-extraction",
-    "text-generation",
-    "text2text-generation",
-    "fill-mask",
-    "sentence-similarity",
-    # Audio
-    "text-to-speech",
-    "automatic-speech-recognition",
-    "audio-to-audio",
-    "audio-classification",
-    "voice-activity-detection",
-    # Computer vision
-    "image-classification",
-    "object-detection",
-    "image-segmentation",
-    "text-to-image",
-    "image-to-image",
-    # Others
-    "tabular-classification",
-    "tabular-regression",
-]
-
-
-class InferenceApi:
-    """Client to configure requests and make calls to the HuggingFace Inference API.
-
-    Example:
-
-    ```python
-    >>> from huggingface_hub.inference_api import InferenceApi
-
-    >>> # Mask-fill example
-    >>> inference = InferenceApi("bert-base-uncased")
-    >>> inference(inputs="The goal of life is [MASK].")
-    [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}]
-
-    >>> # Question Answering example
-    >>> inference = InferenceApi("deepset/roberta-base-squad2")
-    >>> inputs = {
-    ...     "question": "What's my name?",
-    ...     "context": "My name is Clara and I live in Berkeley.",
-    ... }
-    >>> inference(inputs)
-    {'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'}
-
-    >>> # Zero-shot example
-    >>> inference = InferenceApi("typeform/distilbert-base-uncased-mnli")
-    >>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"
-    >>> params = {"candidate_labels": ["refund", "legal", "faq"]}
-    >>> inference(inputs, params)
-    {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]}
-
-    >>> # Overriding configured task
-    >>> inference = InferenceApi("bert-base-uncased", task="feature-extraction")
-
-    >>> # Text-to-image
-    >>> inference = InferenceApi("stabilityai/stable-diffusion-2-1")
-    >>> inference("cat")
-    <PIL.PngImagePlugin.PngImageFile image (...)>
-
-    >>> # Return as raw response to parse the output yourself
-    >>> inference = InferenceApi("mio/amadeus")
-    >>> response = inference("hello world", raw_response=True)
-    >>> response.headers
-    {"Content-Type": "audio/flac", ...}
-    >>> response.content # raw bytes from server
-    b'(...)'
-    ```
-    """
-
-    @validate_hf_hub_args
-    @_deprecate_method(
-        version="1.0",
-        message=(
-            "`InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out"
-            " this guide to learn how to convert your script to use it:"
-            " https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client."
-        ),
-    )
-    def __init__(
-        self,
-        repo_id: str,
-        task: Optional[str] = None,
-        token: Optional[str] = None,
-        gpu: bool = False,
-    ):
-        """Inits headers and API call information.
-
-        Args:
-            repo_id (``str``):
-                Id of repository (e.g. `user/bert-base-uncased`).
-            task (``str``, `optional`, defaults ``None``):
-                Whether to force a task instead of using task specified in the
-                repository.
-            token (`str`, `optional`):
-                The API token to use as HTTP bearer authorization. This is not
-                the authentication token. You can find the token in
-                https://huggingface.co/settings/token. Alternatively, you can
-                find both your organizations and personal API tokens using
-                `HfApi().whoami(token)`.
-            gpu (`bool`, `optional`, defaults `False`):
-                Whether to use GPU instead of CPU for inference(requires Startup
-                plan at least).
-        """
-        self.options = {"wait_for_model": True, "use_gpu": gpu}
-        self.headers = build_hf_headers(token=token)
-
-        # Configure task
-        model_info = HfApi(token=token).model_info(repo_id=repo_id)
-        if not model_info.pipeline_tag and not task:
-            raise ValueError(
-                "Task not specified in the repository. Please add it to the model card"
-                " using pipeline_tag"
-                " (https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined)"
-            )
-
-        if task and task != model_info.pipeline_tag:
-            if task not in ALL_TASKS:
-                raise ValueError(f"Invalid task {task}. Make sure it's valid.")
-
-            logger.warning(
-                "You're using a different task than the one specified in the"
-                " repository. Be sure to know what you're doing :)"
-            )
-            self.task = task
-        else:
-            assert model_info.pipeline_tag is not None, "Pipeline tag cannot be None"
-            self.task = model_info.pipeline_tag
-
-        self.api_url = f"{constants.INFERENCE_ENDPOINT}/pipeline/{self.task}/{repo_id}"
-
-    def __repr__(self):
-        # Do not add headers to repr to avoid leaking token.
-        return f"InferenceAPI(api_url='{self.api_url}', task='{self.task}', options={self.options})"
-
-    def __call__(
-        self,
-        inputs: Optional[Union[str, Dict, List[str], List[List[str]]]] = None,
-        params: Optional[Dict] = None,
-        data: Optional[bytes] = None,
-        raw_response: bool = False,
-    ) -> Any:
-        """Make a call to the Inference API.
-
-        Args:
-            inputs (`str` or `Dict` or `List[str]` or `List[List[str]]`, *optional*):
-                Inputs for the prediction.
-            params (`Dict`, *optional*):
-                Additional parameters for the models. Will be sent as `parameters` in the
-                payload.
-            data (`bytes`, *optional*):
-                Bytes content of the request. In this case, leave `inputs` and `params` empty.
-            raw_response (`bool`, defaults to `False`):
-                If `True`, the raw `Response` object is returned. You can parse its content
-                as preferred. By default, the content is parsed into a more practical format
-                (json dictionary or PIL Image for example).
-        """
-        # Build payload
-        payload: Dict[str, Any] = {
-            "options": self.options,
-        }
-        if inputs:
-            payload["inputs"] = inputs
-        if params:
-            payload["parameters"] = params
-
-        # Make API call
-        response = get_session().post(self.api_url, headers=self.headers, json=payload, data=data)
-
-        # Let the user handle the response
-        if raw_response:
-            return response
-
-        # By default, parse the response for the user.
-        content_type = response.headers.get("Content-Type") or ""
-        if content_type.startswith("image"):
-            if not is_pillow_available():
-                raise ImportError(
-                    f"Task '{self.task}' returned as image but Pillow is not installed."
-                    " Please install it (`pip install Pillow`) or pass"
-                    " `raw_response=True` to get the raw `Response` object and parse"
-                    " the image by yourself."
-                )
-
-            from PIL import Image
-
-            return Image.open(io.BytesIO(response.content))
-        elif content_type == "application/json":
-            return response.json()
-        else:
-            raise NotImplementedError(
-                f"{content_type} output type is not implemented yet. You can pass"
-                " `raw_response=True` to get the raw `Response` object and parse the"
-                " output by yourself."
-            )
diff --git a/src/huggingface_hub/keras_mixin.py b/src/huggingface_hub/keras_mixin.py
deleted file mode 100644
index c284947c1d..0000000000
--- a/src/huggingface_hub/keras_mixin.py
+++ /dev/null
@@ -1,497 +0,0 @@
-import collections.abc as collections
-import json
-import os
-import warnings
-from functools import wraps
-from pathlib import Path
-from shutil import copytree
-from typing import Any, Dict, List, Optional, Union
-
-from huggingface_hub import ModelHubMixin, snapshot_download
-from huggingface_hub.utils import (
-    get_tf_version,
-    is_graphviz_available,
-    is_pydot_available,
-    is_tf_available,
-    yaml_dump,
-)
-
-from . import constants
-from .hf_api import HfApi
-from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
-from .utils._typing import CallableT
-
-
-logger = logging.get_logger(__name__)
-
-keras = None
-if is_tf_available():
-    # Depending on which version of TensorFlow is installed, we need to import
-    # keras from the correct location.
-    # See https://github.com/tensorflow/tensorflow/releases/tag/v2.16.1.
-    # Note: saving a keras model only works with Keras<3.0.
-    try:
-        import tf_keras as keras  # type: ignore
-    except ImportError:
-        import tensorflow as tf  # type: ignore
-
-        keras = tf.keras
-
-
-def _requires_keras_2_model(fn: CallableT) -> CallableT:
-    # Wrapper to raise if user tries to save a Keras 3.x model
-    @wraps(fn)
-    def _inner(model, *args, **kwargs):
-        if not hasattr(model, "history"):  # hacky way to check if model is Keras 2.x
-            raise NotImplementedError(
-                f"Cannot use '{fn.__name__}': Keras 3.x is not supported."
-                " Please save models manually and upload them using `upload_folder` or `hf upload`."
-            )
-        return fn(model, *args, **kwargs)
-
-    return _inner  # type: ignore [return-value]
-
-
-def _flatten_dict(dictionary, parent_key=""):
-    """Flatten a nested dictionary.
-    Reference: https://stackoverflow.com/a/6027615/10319735
-
-    Args:
-        dictionary (`dict`):
-            The nested dictionary to be flattened.
-        parent_key (`str`):
-            The parent key to be prefixed to the children keys.
-            Necessary for recursing over the nested dictionary.
-
-    Returns:
-        The flattened dictionary.
-    """
-    items = []
-    for key, value in dictionary.items():
-        new_key = f"{parent_key}.{key}" if parent_key else key
-        if isinstance(value, collections.MutableMapping):
-            items.extend(
-                _flatten_dict(
-                    value,
-                    new_key,
-                ).items()
-            )
-        else:
-            items.append((new_key, value))
-    return dict(items)
-
-
-def _create_hyperparameter_table(model):
-    """Parse hyperparameter dictionary into a markdown table."""
-    table = None
-    if model.optimizer is not None:
-        optimizer_params = model.optimizer.get_config()
-        # flatten the configuration
-        optimizer_params = _flatten_dict(optimizer_params)
-        optimizer_params["training_precision"] = keras.mixed_precision.global_policy().name
-        table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
-        for key, value in optimizer_params.items():
-            table += f"| {key} | {value} |\n"
-    return table
-
-
-def _plot_network(model, save_directory):
-    keras.utils.plot_model(
-        model,
-        to_file=f"{save_directory}/model.png",
-        show_shapes=False,
-        show_dtype=False,
-        show_layer_names=True,
-        rankdir="TB",
-        expand_nested=False,
-        dpi=96,
-        layer_range=None,
-    )
-
-
-def _create_model_card(
-    model,
-    repo_dir: Path,
-    plot_model: bool = True,
-    metadata: Optional[dict] = None,
-):
-    """
-    Creates a model card for the repository.
-
-    Do not overwrite an existing README.md file.
-    """
-    readme_path = repo_dir / "README.md"
-    if readme_path.exists():
-        return
-
-    hyperparameters = _create_hyperparameter_table(model)
-    if plot_model and is_graphviz_available() and is_pydot_available():
-        _plot_network(model, repo_dir)
-    if metadata is None:
-        metadata = {}
-    metadata["library_name"] = "keras"
-    model_card: str = "---\n"
-    model_card += yaml_dump(metadata, default_flow_style=False)
-    model_card += "---\n"
-    model_card += "\n## Model description\n\nMore information needed\n"
-    model_card += "\n## Intended uses & limitations\n\nMore information needed\n"
-    model_card += "\n## Training and evaluation data\n\nMore information needed\n"
-    if hyperparameters is not None:
-        model_card += "\n## Training procedure\n"
-        model_card += "\n### Training hyperparameters\n"
-        model_card += "\nThe following hyperparameters were used during training:\n\n"
-        model_card += hyperparameters
-        model_card += "\n"
-    if plot_model and os.path.exists(f"{repo_dir}/model.png"):
-        model_card += "\n ## Model Plot\n"
-        model_card += "\n<details>"
-        model_card += "\n<summary>View Model Plot</summary>\n"
-        path_to_plot = "./model.png"
-        model_card += f"\n![Model Image]({path_to_plot})\n"
-        model_card += "\n</details>"
-
-    readme_path.write_text(model_card)
-
-
-@_requires_keras_2_model
-def save_pretrained_keras(
-    model,
-    save_directory: Union[str, Path],
-    config: Optional[Dict[str, Any]] = None,
-    include_optimizer: bool = False,
-    plot_model: bool = True,
-    tags: Optional[Union[list, str]] = None,
-    **model_save_kwargs,
-):
-    """
-    Saves a Keras model to save_directory in SavedModel format. Use this if
-    you're using the Functional or Sequential APIs.
-
-    Args:
-        model (`Keras.Model`):
-            The [Keras
-            model](https://www.tensorflow.org/api_docs/python/tf/keras/Model)
-            you'd like to save. The model must be compiled and built.
-        save_directory (`str` or `Path`):
-            Specify directory in which you want to save the Keras model.
-        config (`dict`, *optional*):
-            Configuration object to be saved alongside the model weights.
-        include_optimizer(`bool`, *optional*, defaults to `False`):
-            Whether or not to include optimizer in serialization.
-        plot_model (`bool`, *optional*, defaults to `True`):
-            Setting this to `True` will plot the model and put it in the model
-            card. Requires graphviz and pydot to be installed.
-        tags (Union[`str`,`list`], *optional*):
-            List of tags that are related to model or string of a single tag. See example tags
-            [here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1).
-        model_save_kwargs(`dict`, *optional*):
-            model_save_kwargs will be passed to
-            [`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model).
-    """
-    if keras is None:
-        raise ImportError("Called a Tensorflow-specific function but could not import it.")
-
-    if not model.built:
-        raise ValueError("Model should be built before trying to save")
-
-    save_directory = Path(save_directory)
-    save_directory.mkdir(parents=True, exist_ok=True)
-
-    # saving config
-    if config:
-        if not isinstance(config, dict):
-            raise RuntimeError(f"Provided config to save_pretrained_keras should be a dict. Got: '{type(config)}'")
-
-        with (save_directory / constants.CONFIG_NAME).open("w") as f:
-            json.dump(config, f)
-
-    metadata = {}
-    if isinstance(tags, list):
-        metadata["tags"] = tags
-    elif isinstance(tags, str):
-        metadata["tags"] = [tags]
-
-    task_name = model_save_kwargs.pop("task_name", None)
-    if task_name is not None:
-        warnings.warn(
-            "`task_name` input argument is deprecated. Pass `tags` instead.",
-            FutureWarning,
-        )
-        if "tags" in metadata:
-            metadata["tags"].append(task_name)
-        else:
-            metadata["tags"] = [task_name]
-
-    if model.history is not None:
-        if model.history.history != {}:
-            path = save_directory / "history.json"
-            if path.exists():
-                warnings.warn(
-                    "`history.json` file already exists, it will be overwritten by the history of this version.",
-                    UserWarning,
-                )
-            with path.open("w", encoding="utf-8") as f:
-                json.dump(model.history.history, f, indent=2, sort_keys=True)
-
-    _create_model_card(model, save_directory, plot_model, metadata)
-    keras.models.save_model(model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs)
-
-
-def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin":
-    r"""
-    Instantiate a pretrained Keras model from a pre-trained model from the Hub.
-    The model is expected to be in `SavedModel` format.
-
-    Args:
-        pretrained_model_name_or_path (`str` or `os.PathLike`):
-            Can be either:
-                - A string, the `model id` of a pretrained model hosted inside a
-                  model repo on huggingface.co. Valid model ids can be located
-                  at the root-level, like `bert-base-uncased`, or namespaced
-                  under a user or organization name, like
-                  `dbmdz/bert-base-german-cased`.
-                - You can add `revision` by appending `@` at the end of model_id
-                  simply like this: `dbmdz/bert-base-german-cased@main` Revision
-                  is the specific model version to use. It can be a branch name,
-                  a tag name, or a commit id, since we use a git-based system
-                  for storing models and other artifacts on huggingface.co, so
-                  `revision` can be any identifier allowed by git.
-                - A path to a `directory` containing model weights saved using
-                  [`~transformers.PreTrainedModel.save_pretrained`], e.g.,
-                  `./my_model_directory/`.
-                - `None` if you are both providing the configuration and state
-                  dictionary (resp. with keyword arguments `config` and
-                  `state_dict`).
-        force_download (`bool`, *optional*, defaults to `False`):
-            Whether to force the (re-)download of the model weights and
-            configuration files, overriding the cached versions if they exist.
-        proxies (`Dict[str, str]`, *optional*):
-            A dictionary of proxy servers to use by protocol or endpoint, e.g.,
-            `{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The
-            proxies are used on each request.
-        token (`str` or `bool`, *optional*):
-            The token to use as HTTP bearer authorization for remote files. If
-            `True`, will use the token generated when running `transformers-cli
-            login` (stored in `~/.huggingface`).
-        cache_dir (`Union[str, os.PathLike]`, *optional*):
-            Path to a directory in which a downloaded pretrained model
-            configuration should be cached if the standard cache should not be
-            used.
-        local_files_only(`bool`, *optional*, defaults to `False`):
-            Whether to only look at local files (i.e., do not try to download
-            the model).
-        model_kwargs (`Dict`, *optional*):
-            model_kwargs will be passed to the model during initialization
-
-    > [!TIP]
-    > Passing `token=True` is required when you want to use a private
-    > model.
-    """
-    return KerasModelHubMixin.from_pretrained(*args, **kwargs)
-
-
-@validate_hf_hub_args
-@_requires_keras_2_model
-def push_to_hub_keras(
-    model,
-    repo_id: str,
-    *,
-    config: Optional[dict] = None,
-    commit_message: str = "Push Keras model using huggingface_hub.",
-    private: Optional[bool] = None,
-    api_endpoint: Optional[str] = None,
-    token: Optional[str] = None,
-    branch: Optional[str] = None,
-    create_pr: Optional[bool] = None,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
-    delete_patterns: Optional[Union[List[str], str]] = None,
-    log_dir: Optional[str] = None,
-    include_optimizer: bool = False,
-    tags: Optional[Union[list, str]] = None,
-    plot_model: bool = True,
-    **model_save_kwargs,
-):
-    """
-    Upload model checkpoint to the Hub.
-
-    Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
-    `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
-    details.
-
-    Args:
-        model (`Keras.Model`):
-            The [Keras model](`https://www.tensorflow.org/api_docs/python/tf/keras/Model`) you'd like to push to the
-            Hub. The model must be compiled and built.
-        repo_id (`str`):
-                ID of the repository to push to (example: `"username/my-model"`).
-        commit_message (`str`, *optional*, defaults to "Add Keras model"):
-            Message to commit while pushing.
-        private (`bool`, *optional*):
-            Whether the repository created should be private.
-            If `None` (default), the repo will be public unless the organization's default is private.
-        api_endpoint (`str`, *optional*):
-            The API endpoint to use when pushing the model to the hub.
-        token (`str`, *optional*):
-            The token to use as HTTP bearer authorization for remote files. If
-            not set, will use the token set when logging in with
-            `hf auth login` (stored in `~/.huggingface`).
-        branch (`str`, *optional*):
-            The git branch on which to push the model. This defaults to
-            the default branch as specified in your repository, which
-            defaults to `"main"`.
-        create_pr (`boolean`, *optional*):
-            Whether or not to create a Pull Request from `branch` with that commit.
-            Defaults to `False`.
-        config (`dict`, *optional*):
-            Configuration object to be saved alongside the model weights.
-        allow_patterns (`List[str]` or `str`, *optional*):
-            If provided, only files matching at least one pattern are pushed.
-        ignore_patterns (`List[str]` or `str`, *optional*):
-            If provided, files matching any of the patterns are not pushed.
-        delete_patterns (`List[str]` or `str`, *optional*):
-            If provided, remote files matching any of the patterns will be deleted from the repo.
-        log_dir (`str`, *optional*):
-            TensorBoard logging directory to be pushed. The Hub automatically
-            hosts and displays a TensorBoard instance if log files are included
-            in the repository.
-        include_optimizer (`bool`, *optional*, defaults to `False`):
-            Whether or not to include optimizer during serialization.
-        tags (Union[`list`, `str`], *optional*):
-            List of tags that are related to model or string of a single tag. See example tags
-            [here](https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1).
-        plot_model (`bool`, *optional*, defaults to `True`):
-            Setting this to `True` will plot the model and put it in the model
-            card. Requires graphviz and pydot to be installed.
-        model_save_kwargs(`dict`, *optional*):
-            model_save_kwargs will be passed to
-            [`tf.keras.models.save_model()`](https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model).
-
-    Returns:
-        The url of the commit of your model in the given repository.
-    """
-    api = HfApi(endpoint=api_endpoint)
-    repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
-
-    # Push the files to the repo in a single commit
-    with SoftTemporaryDirectory() as tmp:
-        saved_path = Path(tmp) / repo_id
-        save_pretrained_keras(
-            model,
-            saved_path,
-            config=config,
-            include_optimizer=include_optimizer,
-            tags=tags,
-            plot_model=plot_model,
-            **model_save_kwargs,
-        )
-
-        # If `log_dir` provided, delete remote logs and upload new ones
-        if log_dir is not None:
-            delete_patterns = (
-                []
-                if delete_patterns is None
-                else (
-                    [delete_patterns]  # convert `delete_patterns` to a list
-                    if isinstance(delete_patterns, str)
-                    else delete_patterns
-                )
-            )
-            delete_patterns.append("logs/*")
-            copytree(log_dir, saved_path / "logs")
-
-        return api.upload_folder(
-            repo_type="model",
-            repo_id=repo_id,
-            folder_path=saved_path,
-            commit_message=commit_message,
-            token=token,
-            revision=branch,
-            create_pr=create_pr,
-            allow_patterns=allow_patterns,
-            ignore_patterns=ignore_patterns,
-            delete_patterns=delete_patterns,
-        )
-
-
-class KerasModelHubMixin(ModelHubMixin):
-    """
-    Implementation of [`ModelHubMixin`] to provide model Hub upload/download
-    capabilities to Keras models.
-
-
-    ```python
-    >>> import tensorflow as tf
-    >>> from huggingface_hub import KerasModelHubMixin
-
-
-    >>> class MyModel(tf.keras.Model, KerasModelHubMixin):
-    ...     def __init__(self, **kwargs):
-    ...         super().__init__()
-    ...         self.config = kwargs.pop("config", None)
-    ...         self.dummy_inputs = ...
-    ...         self.layer = ...
-
-    ...     def call(self, *args):
-    ...         return ...
-
-
-    >>> # Initialize and compile the model as you normally would
-    >>> model = MyModel()
-    >>> model.compile(...)
-    >>> # Build the graph by training it or passing dummy inputs
-    >>> _ = model(model.dummy_inputs)
-    >>> # Save model weights to local directory
-    >>> model.save_pretrained("my-awesome-model")
-    >>> # Push model weights to the Hub
-    >>> model.push_to_hub("my-awesome-model")
-    >>> # Download and initialize weights from the Hub
-    >>> model = MyModel.from_pretrained("username/super-cool-model")
-    ```
-    """
-
-    def _save_pretrained(self, save_directory):
-        save_pretrained_keras(self, save_directory)
-
-    @classmethod
-    def _from_pretrained(
-        cls,
-        model_id,
-        revision,
-        cache_dir,
-        force_download,
-        proxies,
-        resume_download,
-        local_files_only,
-        token,
-        config: Optional[Dict[str, Any]] = None,
-        **model_kwargs,
-    ):
-        """Here we just call [`from_pretrained_keras`] function so both the mixin and
-        functional APIs stay in sync.
-
-                TODO - Some args above aren't used since we are calling
-                snapshot_download instead of hf_hub_download.
-        """
-        if keras is None:
-            raise ImportError("Called a TensorFlow-specific function but could not import it.")
-
-        # Root is either a local filepath matching model_id or a cached snapshot
-        if not os.path.isdir(model_id):
-            storage_folder = snapshot_download(
-                repo_id=model_id,
-                revision=revision,
-                cache_dir=cache_dir,
-                library_name="keras",
-                library_version=get_tf_version(),
-            )
-        else:
-            storage_folder = model_id
-
-        # TODO: change this in a future PR. We are not returning a KerasModelHubMixin instance here...
-        model = keras.models.load_model(storage_folder)
-
-        # For now, we add a new attribute, config, to store the config loaded from the hub/a local dir.
-        model.config = config
-
-        return model
diff --git a/src/huggingface_hub/lfs.py b/src/huggingface_hub/lfs.py
index 40b6ad087c..70935555ae 100644
--- a/src/huggingface_hub/lfs.py
+++ b/src/huggingface_hub/lfs.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Git LFS related type definitions and utilities"""
 
-import inspect
 import io
 import re
 import warnings
@@ -22,7 +21,7 @@
 from math import ceil
 from os.path import getsize
 from pathlib import Path
-from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional, Tuple, TypedDict
+from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, TypedDict
 from urllib.parse import unquote
 
 from huggingface_hub import constants
@@ -107,9 +106,9 @@ def post_lfs_batch_info(
     repo_id: str,
     revision: Optional[str] = None,
     endpoint: Optional[str] = None,
-    headers: Optional[Dict[str, str]] = None,
-    transfers: Optional[List[str]] = None,
-) -> Tuple[List[dict], List[dict], Optional[str]]:
+    headers: Optional[dict[str, str]] = None,
+    transfers: Optional[list[str]] = None,
+) -> tuple[list[dict], list[dict], Optional[str]]:
     """
     Requests the LFS batch endpoint to retrieve upload instructions
 
@@ -140,7 +139,7 @@ def post_lfs_batch_info(
     Raises:
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If an argument is invalid or the server response is malformed.
-        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+        [`HfHubHTTPError`]
             If the server returned an error.
     """
     endpoint = endpoint if endpoint is not None else constants.ENDPOINT
@@ -148,7 +147,7 @@ def post_lfs_batch_info(
     if repo_type in constants.REPO_TYPES_URL_PREFIXES:
         url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type]
     batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
-    payload: Dict = {
+    payload: dict = {
         "operation": "upload",
         "transfers": transfers if transfers is not None else ["basic", "multipart"],
         "objects": [
@@ -195,14 +194,14 @@ class CompletionPayloadT(TypedDict):
     """Payload that will be sent to the Hub when uploading multi-part."""
 
     oid: str
-    parts: List[PayloadPartT]
+    parts: list[PayloadPartT]
 
 
 def lfs_upload(
     operation: "CommitOperationAdd",
-    lfs_batch_action: Dict,
+    lfs_batch_action: dict,
     token: Optional[str] = None,
-    headers: Optional[Dict[str, str]] = None,
+    headers: Optional[dict[str, str]] = None,
     endpoint: Optional[str] = None,
 ) -> None:
     """
@@ -222,7 +221,7 @@ def lfs_upload(
     Raises:
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If `lfs_batch_action` is improperly formatted
-        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+        [`HfHubHTTPError`]
             If the upload resulted in an error
     """
     # 0. If LFS file is already present, skip upload
@@ -316,11 +315,9 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
         fileobj:
             The file-like object holding the data to upload.
 
-    Returns: `requests.Response`
-
     Raises:
-     [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
-        If the upload resulted in an error.
+        [`HfHubHTTPError`]
+            If the upload resulted in an error.
     """
     with operation.as_file(with_tqdm=True) as fileobj:
         # S3 might raise a transient 500 error -> let's retry if that happens
@@ -328,7 +325,7 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
         hf_raise_for_status(response)
 
 
-def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size: int, upload_url: str) -> None:
+def _upload_multi_part(operation: "CommitOperationAdd", header: dict, chunk_size: int, upload_url: str) -> None:
     """
     Uploads file using HF multipart LFS transfer protocol.
     """
@@ -363,7 +360,7 @@ def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size
     hf_raise_for_status(completion_res)
 
 
-def _get_sorted_parts_urls(header: Dict, upload_info: UploadInfo, chunk_size: int) -> List[str]:
+def _get_sorted_parts_urls(header: dict, upload_info: UploadInfo, chunk_size: int) -> list[str]:
     sorted_part_upload_urls = [
         upload_url
         for _, upload_url in sorted(
@@ -381,8 +378,8 @@ def _get_sorted_parts_urls(header: Dict, upload_info: UploadInfo, chunk_size: in
     return sorted_part_upload_urls
 
 
-def _get_completion_payload(response_headers: List[Dict], oid: str) -> CompletionPayloadT:
-    parts: List[PayloadPartT] = []
+def _get_completion_payload(response_headers: list[dict], oid: str) -> CompletionPayloadT:
+    parts: list[PayloadPartT] = []
     for part_number, header in enumerate(response_headers):
         etag = header.get("etag")
         if etag is None or etag == "":
@@ -397,8 +394,8 @@ def _get_completion_payload(response_headers: List[Dict], oid: str) -> Completio
 
 
 def _upload_parts_iteratively(
-    operation: "CommitOperationAdd", sorted_parts_urls: List[str], chunk_size: int
-) -> List[Dict]:
+    operation: "CommitOperationAdd", sorted_parts_urls: list[str], chunk_size: int
+) -> list[dict]:
     headers = []
     with operation.as_file(with_tqdm=True) as fileobj:
         for part_idx, part_upload_url in enumerate(sorted_parts_urls):
@@ -415,8 +412,8 @@ def _upload_parts_iteratively(
 
 
 def _upload_parts_hf_transfer(
-    operation: "CommitOperationAdd", sorted_parts_urls: List[str], chunk_size: int
-) -> List[Dict]:
+    operation: "CommitOperationAdd", sorted_parts_urls: list[str], chunk_size: int
+) -> list[dict]:
     # Upload file using an external Rust-based package. Upload is faster but support less features (no progress bars).
     try:
         from hf_transfer import multipart_upload
@@ -426,12 +423,6 @@ def _upload_parts_hf_transfer(
             " not available in your environment. Try `pip install hf_transfer`."
         )
 
-    supports_callback = "callback" in inspect.signature(multipart_upload).parameters
-    if not supports_callback:
-        warnings.warn(
-            "You are using an outdated version of `hf_transfer`. Consider upgrading to latest version to enable progress bars using `pip install -U hf_transfer`."
-        )
-
     total = operation.upload_info.size
     desc = operation.path_in_repo
     if len(desc) > 40:
@@ -454,13 +445,11 @@ def _upload_parts_hf_transfer(
                 max_files=128,
                 parallel_failures=127,  # could be removed
                 max_retries=5,
-                **({"callback": progress.update} if supports_callback else {}),
+                callback=progress.update,
             )
         except Exception as e:
             raise RuntimeError(
                 "An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for"
                 " better error handling."
             ) from e
-        if not supports_callback:
-            progress.update(total)
         return output
diff --git a/src/huggingface_hub/repocard.py b/src/huggingface_hub/repocard.py
index 357935c3f1..0275aee3aa 100644
--- a/src/huggingface_hub/repocard.py
+++ b/src/huggingface_hub/repocard.py
@@ -1,9 +1,8 @@
 import os
 import re
 from pathlib import Path
-from typing import Any, Dict, Literal, Optional, Type, Union
+from typing import Any, Literal, Optional, Union
 
-import requests
 import yaml
 
 from huggingface_hub.file_download import hf_hub_download
@@ -17,7 +16,7 @@
     eval_results_to_model_index,
     model_index_to_eval_results,
 )
-from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
+from huggingface_hub.utils import HfHubHTTPError, get_session, hf_raise_for_status, is_jinja_available, yaml_dump
 
 from . import constants
 from .errors import EntryNotFoundError
@@ -216,11 +215,11 @@ def validate(self, repo_type: Optional[str] = None):
         headers = {"Accept": "text/plain"}
 
         try:
-            r = get_session().post("https://huggingface.co/api/validate-yaml", body, headers=headers)
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as exc:
-            if r.status_code == 400:
-                raise ValueError(r.text)
+            response = get_session().post("https://huggingface.co/api/validate-yaml", json=body, headers=headers)
+            hf_raise_for_status(response)
+        except HfHubHTTPError as exc:
+            if response.status_code == 400:
+                raise ValueError(response.text)
             else:
                 raise exc
 
@@ -332,7 +331,7 @@ def from_template(
 
 
 class ModelCard(RepoCard):
-    card_data_class = ModelCardData
+    card_data_class = ModelCardData  # type: ignore[assignment]
     default_template_path = TEMPLATE_MODELCARD_PATH
     repo_type = "model"
 
@@ -413,7 +412,7 @@ def from_template(  # type: ignore # violates Liskov property but easier to use
 
 
 class DatasetCard(RepoCard):
-    card_data_class = DatasetCardData
+    card_data_class = DatasetCardData  # type: ignore[assignment]
     default_template_path = TEMPLATE_DATASETCARD_PATH
     repo_type = "dataset"
 
@@ -478,7 +477,7 @@ def from_template(  # type: ignore # violates Liskov property but easier to use
 
 
 class SpaceCard(RepoCard):
-    card_data_class = SpaceCardData
+    card_data_class = SpaceCardData  # type: ignore[assignment]
     default_template_path = TEMPLATE_MODELCARD_PATH
     repo_type = "space"
 
@@ -504,7 +503,7 @@ def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]:  # n
         return "\n"
 
 
-def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
+def metadata_load(local_path: Union[str, Path]) -> Optional[dict]:
     content = Path(local_path).read_text()
     match = REGEX_YAML_BLOCK.search(content)
     if match:
@@ -517,7 +516,7 @@ def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
         return None
 
 
-def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
+def metadata_save(local_path: Union[str, Path], data: dict) -> None:
     """
     Save the metadata dict in the upper YAML part Trying to preserve newlines as
     in the existing file. Docs about open() with newline="" parameter:
@@ -565,7 +564,7 @@ def metadata_eval_result(
     dataset_split: Optional[str] = None,
     dataset_revision: Optional[str] = None,
     metrics_verification_token: Optional[str] = None,
-) -> Dict:
+) -> dict:
     """
     Creates a metadata dict with the result from a model evaluated on a dataset.
 
@@ -680,7 +679,7 @@ def metadata_eval_result(
 @validate_hf_hub_args
 def metadata_update(
     repo_id: str,
-    metadata: Dict,
+    metadata: dict,
     *,
     repo_type: Optional[str] = None,
     overwrite: bool = False,
@@ -748,7 +747,7 @@ def metadata_update(
     commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
 
     # Card class given repo_type
-    card_class: Type[RepoCard]
+    card_class: type[RepoCard]
     if repo_type is None or repo_type == "model":
         card_class = ModelCard
     elif repo_type == "dataset":
diff --git a/src/huggingface_hub/repocard_data.py b/src/huggingface_hub/repocard_data.py
index 62215f2274..1d283f5baa 100644
--- a/src/huggingface_hub/repocard_data.py
+++ b/src/huggingface_hub/repocard_data.py
@@ -1,7 +1,7 @@
 import copy
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Optional, Union
 
 from huggingface_hub.utils import logging, yaml_dump
 
@@ -38,7 +38,7 @@ class EvalResult:
         dataset_revision (`str`, *optional*):
             The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
             Example: 5503434ddd753f426f4b38109466949a1217c2bb
-        dataset_args (`Dict[str, Any]`, *optional*):
+        dataset_args (`dict[str, Any]`, *optional*):
             The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}`
         metric_name (`str`, *optional*):
             A pretty name for the metric. Example: "Test WER".
@@ -46,7 +46,7 @@ class EvalResult:
             The name of the metric configuration used in `load_metric()`.
             Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
             See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
-        metric_args (`Dict[str, Any]`, *optional*):
+        metric_args (`dict[str, Any]`, *optional*):
             The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
         verified (`bool`, *optional*):
             Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
@@ -102,7 +102,7 @@ class EvalResult:
 
     # The arguments passed during `Metric.compute()`.
     # Example for `bleu`: max_order: 4
-    dataset_args: Optional[Dict[str, Any]] = None
+    dataset_args: Optional[dict[str, Any]] = None
 
     # A pretty name for the metric.
     # Example: Test WER
@@ -115,7 +115,7 @@ class EvalResult:
 
     # The arguments passed during `Metric.compute()`.
     # Example for `bleu`: max_order: 4
-    metric_args: Optional[Dict[str, Any]] = None
+    metric_args: Optional[dict[str, Any]] = None
 
     # Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
     verified: Optional[bool] = None
@@ -195,7 +195,7 @@ def _to_dict(self, data_dict):
         """
         pass
 
-    def to_yaml(self, line_break=None, original_order: Optional[List[str]] = None) -> str:
+    def to_yaml(self, line_break=None, original_order: Optional[list[str]] = None) -> str:
         """Dumps CardData to a YAML block for inclusion in a README.md file.
 
         Args:
@@ -246,9 +246,9 @@ def __len__(self) -> int:
 
 
 def _validate_eval_results(
-    eval_results: Optional[Union[EvalResult, List[EvalResult]]],
+    eval_results: Optional[Union[EvalResult, list[EvalResult]]],
     model_name: Optional[str],
-) -> List[EvalResult]:
+) -> list[EvalResult]:
     if eval_results is None:
         return []
     if isinstance(eval_results, EvalResult):
@@ -266,17 +266,17 @@ class ModelCardData(CardData):
     """Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
 
     Args:
-        base_model (`str` or `List[str]`, *optional*):
+        base_model (`str` or `list[str]`, *optional*):
             The identifier of the base model from which the model derives. This is applicable for example if your model is a
             fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
             if your model derives from multiple models). Defaults to None.
-        datasets (`Union[str, List[str]]`, *optional*):
+        datasets (`Union[str, list[str]]`, *optional*):
             Dataset or list of datasets that were used to train this model. Should be a dataset ID
             found on https://hf.co/datasets. Defaults to None.
-        eval_results (`Union[List[EvalResult], EvalResult]`, *optional*):
+        eval_results (`Union[list[EvalResult], EvalResult]`, *optional*):
             List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
             `model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
-        language (`Union[str, List[str]]`, *optional*):
+        language (`Union[str, list[str]]`, *optional*):
             Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
             639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
         library_name (`str`, *optional*):
@@ -292,7 +292,7 @@ class ModelCardData(CardData):
         license_link (`str`, *optional*):
             Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
             Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
-        metrics (`List[str]`, *optional*):
+        metrics (`list[str]`, *optional*):
             List of metrics used to evaluate this model. Should be a metric name that can be found
             at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
         model_name (`str`, *optional*):
@@ -302,7 +302,7 @@ class ModelCardData(CardData):
             then the repo name is used as a default. Defaults to None.
         pipeline_tag (`str`, *optional*):
             The pipeline tag associated with the model. Example: "text-classification".
-        tags (`List[str]`, *optional*):
+        tags (`list[str]`, *optional*):
             List of tags to add to your model that can be used when filtering on the Hugging
             Face Hub. Defaults to None.
         ignore_metadata_errors (`str`):
@@ -329,18 +329,18 @@ class ModelCardData(CardData):
     def __init__(
         self,
         *,
-        base_model: Optional[Union[str, List[str]]] = None,
-        datasets: Optional[Union[str, List[str]]] = None,
-        eval_results: Optional[List[EvalResult]] = None,
-        language: Optional[Union[str, List[str]]] = None,
+        base_model: Optional[Union[str, list[str]]] = None,
+        datasets: Optional[Union[str, list[str]]] = None,
+        eval_results: Optional[list[EvalResult]] = None,
+        language: Optional[Union[str, list[str]]] = None,
         library_name: Optional[str] = None,
         license: Optional[str] = None,
         license_name: Optional[str] = None,
         license_link: Optional[str] = None,
-        metrics: Optional[List[str]] = None,
+        metrics: Optional[list[str]] = None,
         model_name: Optional[str] = None,
         pipeline_tag: Optional[str] = None,
-        tags: Optional[List[str]] = None,
+        tags: Optional[list[str]] = None,
         ignore_metadata_errors: bool = False,
         **kwargs,
     ):
@@ -395,58 +395,58 @@ class DatasetCardData(CardData):
     """Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
 
     Args:
-        language (`List[str]`, *optional*):
+        language (`list[str]`, *optional*):
             Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
             639-3 code (two/three letters), or a special value like "code", "multilingual".
-        license (`Union[str, List[str]]`, *optional*):
+        license (`Union[str, list[str]]`, *optional*):
             License(s) of this dataset. Example: apache-2.0 or any license from
             https://huggingface.co/docs/hub/repositories-licenses.
-        annotations_creators (`Union[str, List[str]]`, *optional*):
+        annotations_creators (`Union[str, list[str]]`, *optional*):
             How the annotations for the dataset were created.
             Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'.
-        language_creators (`Union[str, List[str]]`, *optional*):
+        language_creators (`Union[str, list[str]]`, *optional*):
             How the text-based data in the dataset was created.
             Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other'
-        multilinguality (`Union[str, List[str]]`, *optional*):
+        multilinguality (`Union[str, list[str]]`, *optional*):
             Whether the dataset is multilingual.
             Options are: 'monolingual', 'multilingual', 'translation', 'other'.
-        size_categories (`Union[str, List[str]]`, *optional*):
+        size_categories (`Union[str, list[str]]`, *optional*):
             The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
             '100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
-        source_datasets (`List[str]]`, *optional*):
+        source_datasets (`list[str]]`, *optional*):
             Indicates whether the dataset is an original dataset or extended from another existing dataset.
             Options are: 'original' and 'extended'.
-        task_categories (`Union[str, List[str]]`, *optional*):
+        task_categories (`Union[str, list[str]]`, *optional*):
             What categories of task does the dataset support?
-        task_ids (`Union[str, List[str]]`, *optional*):
+        task_ids (`Union[str, list[str]]`, *optional*):
             What specific tasks does the dataset support?
         paperswithcode_id (`str`, *optional*):
             ID of the dataset on PapersWithCode.
         pretty_name (`str`, *optional*):
             A more human-readable name for the dataset. (ex. "Cats vs. Dogs")
-        train_eval_index (`Dict`, *optional*):
+        train_eval_index (`dict`, *optional*):
             A dictionary that describes the necessary spec for doing evaluation on the Hub.
             If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
-        config_names (`Union[str, List[str]]`, *optional*):
+        config_names (`Union[str, list[str]]`, *optional*):
             A list of the available dataset configs for the dataset.
     """
 
     def __init__(
         self,
         *,
-        language: Optional[Union[str, List[str]]] = None,
-        license: Optional[Union[str, List[str]]] = None,
-        annotations_creators: Optional[Union[str, List[str]]] = None,
-        language_creators: Optional[Union[str, List[str]]] = None,
-        multilinguality: Optional[Union[str, List[str]]] = None,
-        size_categories: Optional[Union[str, List[str]]] = None,
-        source_datasets: Optional[List[str]] = None,
-        task_categories: Optional[Union[str, List[str]]] = None,
-        task_ids: Optional[Union[str, List[str]]] = None,
+        language: Optional[Union[str, list[str]]] = None,
+        license: Optional[Union[str, list[str]]] = None,
+        annotations_creators: Optional[Union[str, list[str]]] = None,
+        language_creators: Optional[Union[str, list[str]]] = None,
+        multilinguality: Optional[Union[str, list[str]]] = None,
+        size_categories: Optional[Union[str, list[str]]] = None,
+        source_datasets: Optional[list[str]] = None,
+        task_categories: Optional[Union[str, list[str]]] = None,
+        task_ids: Optional[Union[str, list[str]]] = None,
         paperswithcode_id: Optional[str] = None,
         pretty_name: Optional[str] = None,
-        train_eval_index: Optional[Dict] = None,
-        config_names: Optional[Union[str, List[str]]] = None,
+        train_eval_index: Optional[dict] = None,
+        config_names: Optional[Union[str, list[str]]] = None,
         ignore_metadata_errors: bool = False,
         **kwargs,
     ):
@@ -495,11 +495,11 @@ class SpaceCardData(CardData):
             https://huggingface.co/docs/hub/repositories-licenses.
         duplicated_from (`str`, *optional*)
             ID of the original Space if this is a duplicated Space.
-        models (List[`str`], *optional*)
+        models (list[`str`], *optional*)
             List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
-        datasets (`List[str]`, *optional*)
+        datasets (`list[str]`, *optional*)
             List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
-        tags (`List[str]`, *optional*)
+        tags (`list[str]`, *optional*)
             List of tags to add to your Space that can be used when filtering on the Hub.
         ignore_metadata_errors (`str`):
             If True, errors while parsing the metadata section will be ignored. Some information might be lost during
@@ -532,9 +532,9 @@ def __init__(
         app_port: Optional[int] = None,
         license: Optional[str] = None,
         duplicated_from: Optional[str] = None,
-        models: Optional[List[str]] = None,
-        datasets: Optional[List[str]] = None,
-        tags: Optional[List[str]] = None,
+        models: Optional[list[str]] = None,
+        datasets: Optional[list[str]] = None,
+        tags: Optional[list[str]] = None,
         ignore_metadata_errors: bool = False,
         **kwargs,
     ):
@@ -552,14 +552,14 @@ def __init__(
         super().__init__(**kwargs)
 
 
-def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str, List[EvalResult]]:
+def model_index_to_eval_results(model_index: list[dict[str, Any]]) -> tuple[str, list[EvalResult]]:
     """Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
 
     A detailed spec of the model index can be found here:
     https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
 
     Args:
-        model_index (`List[Dict[str, Any]]`):
+        model_index (`list[dict[str, Any]]`):
             A model index data structure, likely coming from a README.md file on the
             Hugging Face Hub.
 
@@ -567,7 +567,7 @@ def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str,
         model_name (`str`):
             The name of the model as found in the model index. This is used as the
             identifier for the model on leaderboards like PapersWithCode.
-        eval_results (`List[EvalResult]`):
+        eval_results (`list[EvalResult]`):
             A list of `huggingface_hub.EvalResult` objects containing the metrics
             reported in the provided model_index.
 
@@ -668,7 +668,7 @@ def _remove_none(obj):
         return obj
 
 
-def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult]) -> List[Dict[str, Any]]:
+def eval_results_to_model_index(model_name: str, eval_results: list[EvalResult]) -> list[dict[str, Any]]:
     """Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
     valid model-index that will be compatible with the format expected by the
     Hugging Face Hub.
@@ -677,12 +677,12 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
         model_name (`str`):
             Name of the model (ex. "my-cool-model"). This is used as the identifier
             for the model on leaderboards like PapersWithCode.
-        eval_results (`List[EvalResult]`):
+        eval_results (`list[EvalResult]`):
             List of `huggingface_hub.EvalResult` objects containing the metrics to be
             reported in the model-index.
 
     Returns:
-        model_index (`List[Dict[str, Any]]`): The eval_results converted to a model-index.
+        model_index (`list[dict[str, Any]]`): The eval_results converted to a model-index.
 
     Example:
         ```python
@@ -705,7 +705,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
 
     # Metrics are reported on a unique task-and-dataset basis.
     # Here, we make a map of those pairs and the associated EvalResults.
-    task_and_ds_types_map: Dict[Any, List[EvalResult]] = defaultdict(list)
+    task_and_ds_types_map: dict[Any, list[EvalResult]] = defaultdict(list)
     for eval_result in eval_results:
         task_and_ds_types_map[eval_result.unique_identifier].append(eval_result)
 
@@ -760,7 +760,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
     return _remove_none(model_index)
 
 
-def _to_unique_list(tags: Optional[List[str]]) -> Optional[List[str]]:
+def _to_unique_list(tags: Optional[list[str]]) -> Optional[list[str]]:
     if tags is None:
         return tags
     unique_tags = []  # make tags unique + keep order explicitly
diff --git a/src/huggingface_hub/repository.py b/src/huggingface_hub/repository.py
deleted file mode 100644
index 56e2bce619..0000000000
--- a/src/huggingface_hub/repository.py
+++ /dev/null
@@ -1,1471 +0,0 @@
-import atexit
-import os
-import re
-import subprocess
-import threading
-import time
-from contextlib import contextmanager
-from pathlib import Path
-from typing import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, Union
-from urllib.parse import urlparse
-
-from huggingface_hub import constants
-from huggingface_hub.repocard import metadata_load, metadata_save
-
-from .hf_api import HfApi, repo_type_and_id_from_hf_id
-from .lfs import LFS_MULTIPART_UPLOAD_COMMAND
-from .utils import (
-    SoftTemporaryDirectory,
-    get_token,
-    logging,
-    run_subprocess,
-    tqdm,
-    validate_hf_hub_args,
-)
-from .utils._deprecation import _deprecate_method
-
-
-logger = logging.get_logger(__name__)
-
-
-class CommandInProgress:
-    """
-    Utility to follow commands launched asynchronously.
-    """
-
-    def __init__(
-        self,
-        title: str,
-        is_done_method: Callable,
-        status_method: Callable,
-        process: subprocess.Popen,
-        post_method: Optional[Callable] = None,
-    ):
-        self.title = title
-        self._is_done = is_done_method
-        self._status = status_method
-        self._process = process
-        self._stderr = ""
-        self._stdout = ""
-        self._post_method = post_method
-
-    @property
-    def is_done(self) -> bool:
-        """
-        Whether the process is done.
-        """
-        result = self._is_done()
-
-        if result and self._post_method is not None:
-            self._post_method()
-            self._post_method = None
-
-        return result
-
-    @property
-    def status(self) -> int:
-        """
-        The exit code/status of the current action. Will return `0` if the
-        command has completed successfully, and a number between 1 and 255 if
-        the process errored-out.
-
-        Will return -1 if the command is still ongoing.
-        """
-        return self._status()
-
-    @property
-    def failed(self) -> bool:
-        """
-        Whether the process errored-out.
-        """
-        return self.status > 0
-
-    @property
-    def stderr(self) -> str:
-        """
-        The current output message on the standard error.
-        """
-        if self._process.stderr is not None:
-            self._stderr += self._process.stderr.read()
-        return self._stderr
-
-    @property
-    def stdout(self) -> str:
-        """
-        The current output message on the standard output.
-        """
-        if self._process.stdout is not None:
-            self._stdout += self._process.stdout.read()
-        return self._stdout
-
-    def __repr__(self):
-        status = self.status
-
-        if status == -1:
-            status = "running"
-
-        return (
-            f"[{self.title} command, status code: {status},"
-            f" {'in progress.' if not self.is_done else 'finished.'} PID:"
-            f" {self._process.pid}]"
-        )
-
-
-def is_git_repo(folder: Union[str, Path]) -> bool:
-    """
-    Check if the folder is the root or part of a git repository
-
-    Args:
-        folder (`str`):
-            The folder in which to run the command.
-
-    Returns:
-        `bool`: `True` if the repository is part of a repository, `False`
-        otherwise.
-    """
-    folder_exists = os.path.exists(os.path.join(folder, ".git"))
-    git_branch = subprocess.run("git branch".split(), cwd=folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    return folder_exists and git_branch.returncode == 0
-
-
-def is_local_clone(folder: Union[str, Path], remote_url: str) -> bool:
-    """
-    Check if the folder is a local clone of the remote_url
-
-    Args:
-        folder (`str` or `Path`):
-            The folder in which to run the command.
-        remote_url (`str`):
-            The url of a git repository.
-
-    Returns:
-        `bool`: `True` if the repository is a local clone of the remote
-        repository specified, `False` otherwise.
-    """
-    if not is_git_repo(folder):
-        return False
-
-    remotes = run_subprocess("git remote -v", folder).stdout
-
-    # Remove token for the test with remotes.
-    remote_url = re.sub(r"https://.*@", "https://", remote_url)
-    remotes = [re.sub(r"https://.*@", "https://", remote) for remote in remotes.split()]
-    return remote_url in remotes
-
-
-def is_tracked_with_lfs(filename: Union[str, Path]) -> bool:
-    """
-    Check if the file passed is tracked with git-lfs.
-
-    Args:
-        filename (`str` or `Path`):
-            The filename to check.
-
-    Returns:
-        `bool`: `True` if the file passed is tracked with git-lfs, `False`
-        otherwise.
-    """
-    folder = Path(filename).parent
-    filename = Path(filename).name
-
-    try:
-        p = run_subprocess("git check-attr -a".split() + [filename], folder)
-        attributes = p.stdout.strip()
-    except subprocess.CalledProcessError as exc:
-        if not is_git_repo(folder):
-            return False
-        else:
-            raise OSError(exc.stderr)
-
-    if len(attributes) == 0:
-        return False
-
-    found_lfs_tag = {"diff": False, "merge": False, "filter": False}
-
-    for attribute in attributes.split("\n"):
-        for tag in found_lfs_tag.keys():
-            if tag in attribute and "lfs" in attribute:
-                found_lfs_tag[tag] = True
-
-    return all(found_lfs_tag.values())
-
-
-def is_git_ignored(filename: Union[str, Path]) -> bool:
-    """
-    Check if file is git-ignored. Supports nested .gitignore files.
-
-    Args:
-        filename (`str` or `Path`):
-            The filename to check.
-
-    Returns:
-        `bool`: `True` if the file passed is ignored by `git`, `False`
-        otherwise.
-    """
-    folder = Path(filename).parent
-    filename = Path(filename).name
-
-    try:
-        p = run_subprocess("git check-ignore".split() + [filename], folder, check=False)
-        # Will return exit code 1 if not gitignored
-        is_ignored = not bool(p.returncode)
-    except subprocess.CalledProcessError as exc:
-        raise OSError(exc.stderr)
-
-    return is_ignored
-
-
-def is_binary_file(filename: Union[str, Path]) -> bool:
-    """
-    Check if file is a binary file.
-
-    Args:
-        filename (`str` or `Path`):
-            The filename to check.
-
-    Returns:
-        `bool`: `True` if the file passed is a binary file, `False` otherwise.
-    """
-    try:
-        with open(filename, "rb") as f:
-            content = f.read(10 * (1024**2))  # Read a maximum of 10MB
-
-        # Code sample taken from the following stack overflow thread
-        # https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391
-        text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
-        return bool(content.translate(None, text_chars))
-    except UnicodeDecodeError:
-        return True
-
-
-def files_to_be_staged(pattern: str = ".", folder: Union[str, Path, None] = None) -> List[str]:
-    """
-    Returns a list of filenames that are to be staged.
-
-    Args:
-        pattern (`str` or `Path`):
-            The pattern of filenames to check. Put `.` to get all files.
-        folder (`str` or `Path`):
-            The folder in which to run the command.
-
-    Returns:
-        `List[str]`: List of files that are to be staged.
-    """
-    try:
-        p = run_subprocess("git ls-files --exclude-standard -mo".split() + [pattern], folder)
-        if len(p.stdout.strip()):
-            files = p.stdout.strip().split("\n")
-        else:
-            files = []
-    except subprocess.CalledProcessError as exc:
-        raise EnvironmentError(exc.stderr)
-
-    return files
-
-
-def is_tracked_upstream(folder: Union[str, Path]) -> bool:
-    """
-    Check if the current checked-out branch is tracked upstream.
-
-    Args:
-        folder (`str` or `Path`):
-            The folder in which to run the command.
-
-    Returns:
-        `bool`: `True` if the current checked-out branch is tracked upstream,
-        `False` otherwise.
-    """
-    try:
-        run_subprocess("git rev-parse --symbolic-full-name --abbrev-ref @{u}", folder)
-        return True
-    except subprocess.CalledProcessError as exc:
-        if "HEAD" in exc.stderr:
-            raise OSError("No branch checked out")
-
-        return False
-
-
-def commits_to_push(folder: Union[str, Path], upstream: Optional[str] = None) -> int:
-    """
-        Check the number of commits that would be pushed upstream
-
-        Args:
-            folder (`str` or `Path`):
-                The folder in which to run the command.
-            upstream (`str`, *optional*):
-    The name of the upstream repository with which the comparison should be
-    made.
-
-        Returns:
-            `int`: Number of commits that would be pushed upstream were a `git
-            push` to proceed.
-    """
-    try:
-        result = run_subprocess(f"git cherry -v {upstream or ''}", folder)
-        return len(result.stdout.split("\n")) - 1
-    except subprocess.CalledProcessError as exc:
-        raise EnvironmentError(exc.stderr)
-
-
-class PbarT(TypedDict):
-    # Used to store an opened progress bar in `_lfs_log_progress`
-    bar: tqdm
-    past_bytes: int
-
-
-@contextmanager
-def _lfs_log_progress():
-    """
-    This is a context manager that will log the Git LFS progress of cleaning,
-    smudging, pulling and pushing.
-    """
-
-    if logger.getEffectiveLevel() >= logging.ERROR:
-        try:
-            yield
-        except Exception:
-            pass
-        return
-
-    def output_progress(stopping_event: threading.Event):
-        """
-        To be launched as a separate thread with an event meaning it should stop
-        the tail.
-        """
-        # Key is tuple(state, filename), value is a dict(tqdm bar and a previous value)
-        pbars: Dict[Tuple[str, str], PbarT] = {}
-
-        def close_pbars():
-            for pbar in pbars.values():
-                pbar["bar"].update(pbar["bar"].total - pbar["past_bytes"])
-                pbar["bar"].refresh()
-                pbar["bar"].close()
-
-        def tail_file(filename) -> Iterator[str]:
-            """
-            Creates a generator to be iterated through, which will return each
-            line one by one. Will stop tailing the file if the stopping_event is
-            set.
-            """
-            with open(filename, "r") as file:
-                current_line = ""
-                while True:
-                    if stopping_event.is_set():
-                        close_pbars()
-                        break
-
-                    line_bit = file.readline()
-                    if line_bit is not None and not len(line_bit.strip()) == 0:
-                        current_line += line_bit
-                        if current_line.endswith("\n"):
-                            yield current_line
-                            current_line = ""
-                    else:
-                        time.sleep(1)
-
-        # If the file isn't created yet, wait for a few seconds before trying again.
-        # Can be interrupted with the stopping_event.
-        while not os.path.exists(os.environ["GIT_LFS_PROGRESS"]):
-            if stopping_event.is_set():
-                close_pbars()
-                return
-
-            time.sleep(2)
-
-        for line in tail_file(os.environ["GIT_LFS_PROGRESS"]):
-            try:
-                state, file_progress, byte_progress, filename = line.split()
-            except ValueError as error:
-                # Try/except to ease debugging. See https://github.com/huggingface/huggingface_hub/issues/1373.
-                raise ValueError(f"Cannot unpack LFS progress line:\n{line}") from error
-            description = f"{state.capitalize()} file {filename}"
-
-            current_bytes, total_bytes = byte_progress.split("/")
-            current_bytes_int = int(current_bytes)
-            total_bytes_int = int(total_bytes)
-
-            pbar = pbars.get((state, filename))
-            if pbar is None:
-                # Initialize progress bar
-                pbars[(state, filename)] = {
-                    "bar": tqdm(
-                        desc=description,
-                        initial=current_bytes_int,
-                        total=total_bytes_int,
-                        unit="B",
-                        unit_scale=True,
-                        unit_divisor=1024,
-                        name="huggingface_hub.lfs_upload",
-                    ),
-                    "past_bytes": int(current_bytes),
-                }
-            else:
-                # Update progress bar
-                pbar["bar"].update(current_bytes_int - pbar["past_bytes"])
-                pbar["past_bytes"] = current_bytes_int
-
-    current_lfs_progress_value = os.environ.get("GIT_LFS_PROGRESS", "")
-
-    with SoftTemporaryDirectory() as tmpdir:
-        os.environ["GIT_LFS_PROGRESS"] = os.path.join(tmpdir, "lfs_progress")
-        logger.debug(f"Following progress in {os.environ['GIT_LFS_PROGRESS']}")
-
-        exit_event = threading.Event()
-        x = threading.Thread(target=output_progress, args=(exit_event,), daemon=True)
-        x.start()
-
-        try:
-            yield
-        finally:
-            exit_event.set()
-            x.join()
-
-            os.environ["GIT_LFS_PROGRESS"] = current_lfs_progress_value
-
-
-class Repository:
-    """
-    Helper class to wrap the git and git-lfs commands.
-
-    The aim is to facilitate interacting with huggingface.co hosted model or
-    dataset repos, though not a lot here (if any) is actually specific to
-    huggingface.co.
-
-    > [!WARNING]
-    > [`Repository`] is deprecated in favor of the http-based alternatives implemented in
-    > [`HfApi`]. Given its large adoption in legacy code, the complete removal of
-    > [`Repository`] will only happen in release `v1.0`. For more details, please read
-    > https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
-    """
-
-    command_queue: List[CommandInProgress]
-
-    @validate_hf_hub_args
-    @_deprecate_method(
-        version="1.0",
-        message=(
-            "Please prefer the http-based alternatives instead. Given its large adoption in legacy code, the complete"
-            " removal is only planned on next major release.\nFor more details, please read"
-            " https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http."
-        ),
-    )
-    def __init__(
-        self,
-        local_dir: Union[str, Path],
-        clone_from: Optional[str] = None,
-        repo_type: Optional[str] = None,
-        token: Union[bool, str] = True,
-        git_user: Optional[str] = None,
-        git_email: Optional[str] = None,
-        revision: Optional[str] = None,
-        skip_lfs_files: bool = False,
-        client: Optional[HfApi] = None,
-    ):
-        """
-        Instantiate a local clone of a git repo.
-
-        If `clone_from` is set, the repo will be cloned from an existing remote repository.
-        If the remote repo does not exist, a `EnvironmentError` exception will be thrown.
-        Please create the remote repo first using [`create_repo`].
-
-        `Repository` uses the local git credentials by default. If explicitly set, the `token`
-        or the `git_user`/`git_email` pair will be used instead.
-
-        Args:
-            local_dir (`str` or `Path`):
-                path (e.g. `'my_trained_model/'`) to the local directory, where
-                the `Repository` will be initialized.
-            clone_from (`str`, *optional*):
-                Either a repository url or `repo_id`.
-                Example:
-                - `"https://huggingface.co/philschmid/playground-tests"`
-                - `"philschmid/playground-tests"`
-            repo_type (`str`, *optional*):
-                To set when cloning a repo from a repo_id. Default is model.
-            token (`bool` or `str`, *optional*):
-                A valid authentication token (see https://huggingface.co/settings/token).
-                If `None` or `True` and machine is logged in (through `hf auth login`
-                or [`~huggingface_hub.login`]), token will be retrieved from the cache.
-                If `False`, token is not sent in the request header.
-            git_user (`str`, *optional*):
-                will override the `git config user.name` for committing and
-                pushing files to the hub.
-            git_email (`str`, *optional*):
-                will override the `git config user.email` for committing and
-                pushing files to the hub.
-            revision (`str`, *optional*):
-                Revision to checkout after initializing the repository. If the
-                revision doesn't exist, a branch will be created with that
-                revision name from the default branch's current HEAD.
-            skip_lfs_files (`bool`, *optional*, defaults to `False`):
-                whether to skip git-LFS files or not.
-            client (`HfApi`, *optional*):
-                Instance of [`HfApi`] to use when calling the HF Hub API. A new
-                instance will be created if this is left to `None`.
-
-        Raises:
-            [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
-                If the remote repository set in `clone_from` does not exist.
-        """
-        if isinstance(local_dir, Path):
-            local_dir = str(local_dir)
-        os.makedirs(local_dir, exist_ok=True)
-        self.local_dir = os.path.join(os.getcwd(), local_dir)
-        self._repo_type = repo_type
-        self.command_queue = []
-        self.skip_lfs_files = skip_lfs_files
-        self.client = client if client is not None else HfApi()
-
-        self.check_git_versions()
-
-        if isinstance(token, str):
-            self.huggingface_token: Optional[str] = token
-        elif token is False:
-            self.huggingface_token = None
-        else:
-            # if `True` -> explicit use of the cached token
-            # if `None` -> implicit use of the cached token
-            self.huggingface_token = get_token()
-
-        if clone_from is not None:
-            self.clone_from(repo_url=clone_from)
-        else:
-            if is_git_repo(self.local_dir):
-                logger.debug("[Repository] is a valid git repo")
-            else:
-                raise ValueError("If not specifying `clone_from`, you need to pass Repository a valid git clone.")
-
-        if self.huggingface_token is not None and (git_email is None or git_user is None):
-            user = self.client.whoami(self.huggingface_token)
-
-            if git_email is None:
-                git_email = user.get("email")
-
-            if git_user is None:
-                git_user = user.get("fullname")
-
-        if git_user is not None or git_email is not None:
-            self.git_config_username_and_email(git_user, git_email)
-
-        self.lfs_enable_largefiles()
-        self.git_credential_helper_store()
-
-        if revision is not None:
-            self.git_checkout(revision, create_branch_ok=True)
-
-        # This ensures that all commands exit before exiting the Python runtime.
-        # This will ensure all pushes register on the hub, even if other errors happen in subsequent operations.
-        atexit.register(self.wait_for_commands)
-
-    @property
-    def current_branch(self) -> str:
-        """
-        Returns the current checked out branch.
-
-        Returns:
-            `str`: Current checked out branch.
-        """
-        try:
-            result = run_subprocess("git rev-parse --abbrev-ref HEAD", self.local_dir).stdout.strip()
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-        return result
-
-    def check_git_versions(self):
-        """
-        Checks that `git` and `git-lfs` can be run.
-
-        Raises:
-            [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
-                If `git` or `git-lfs` are not installed.
-        """
-        try:
-            git_version = run_subprocess("git --version", self.local_dir).stdout.strip()
-        except FileNotFoundError:
-            raise EnvironmentError("Looks like you do not have git installed, please install.")
-
-        try:
-            lfs_version = run_subprocess("git-lfs --version", self.local_dir).stdout.strip()
-        except FileNotFoundError:
-            raise EnvironmentError(
-                "Looks like you do not have git-lfs installed, please install."
-                " You can install from https://git-lfs.github.com/."
-                " Then run `git lfs install` (you only have to do this once)."
-            )
-        logger.info(git_version + "\n" + lfs_version)
-
-    @validate_hf_hub_args
-    def clone_from(self, repo_url: str, token: Union[bool, str, None] = None):
-        """
-        Clone from a remote. If the folder already exists, will try to clone the
-        repository within it.
-
-        If this folder is a git repository with linked history, will try to
-        update the repository.
-
-        Args:
-            repo_url (`str`):
-                The URL from which to clone the repository
-            token (`Union[str, bool]`, *optional*):
-                Whether to use the authentication token. It can be:
-                 - a string which is the token itself
-                 - `False`, which would not use the authentication token
-                 - `True`, which would fetch the authentication token from the
-                   local folder and use it (you should be logged in for this to
-                   work).
-                - `None`, which would retrieve the value of
-                  `self.huggingface_token`.
-
-        > [!TIP]
-        > Raises the following error:
-        >
-        >     - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
-        >       if an organization token (starts with "api_org") is passed. Use must use
-        >       your own personal access token (see https://hf.co/settings/tokens).
-        >
-        >     - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
-        >       if you are trying to clone the repository in a non-empty folder, or if the
-        >       `git` operations raise errors.
-        """
-        token = (
-            token  # str -> use it
-            if isinstance(token, str)
-            else (
-                None  # `False` -> explicit no token
-                if token is False
-                else self.huggingface_token  # `None` or `True` -> use default
-            )
-        )
-        if token is not None and token.startswith("api_org"):
-            raise ValueError(
-                "You must use your personal access token, not an Organization token"
-                " (see https://hf.co/settings/tokens)."
-            )
-
-        hub_url = self.client.endpoint
-        if hub_url in repo_url or ("http" not in repo_url and len(repo_url.split("/")) <= 2):
-            repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(repo_url, hub_url=hub_url)
-            repo_id = f"{namespace}/{repo_name}" if namespace is not None else repo_name
-
-            if repo_type is not None:
-                self._repo_type = repo_type
-
-            repo_url = hub_url + "/"
-
-            if self._repo_type in constants.REPO_TYPES_URL_PREFIXES:
-                repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type]
-
-            if token is not None:
-                # Add token in git url when provided
-                scheme = urlparse(repo_url).scheme
-                repo_url = repo_url.replace(f"{scheme}://", f"{scheme}://user:{token}@")
-
-            repo_url += repo_id
-
-        # For error messages, it's cleaner to show the repo url without the token.
-        clean_repo_url = re.sub(r"(https?)://.*@", r"\1://", repo_url)
-        try:
-            run_subprocess("git lfs install", self.local_dir)
-
-            # checks if repository is initialized in a empty repository or in one with files
-            if len(os.listdir(self.local_dir)) == 0:
-                logger.warning(f"Cloning {clean_repo_url} into local empty directory.")
-
-                with _lfs_log_progress():
-                    env = os.environ.copy()
-
-                    if self.skip_lfs_files:
-                        env.update({"GIT_LFS_SKIP_SMUDGE": "1"})
-
-                    run_subprocess(
-                        # 'git lfs clone' is deprecated (will display a warning in the terminal)
-                        # but we still use it as it provides a nicer UX when downloading large
-                        # files (shows progress).
-                        f"{'git clone' if self.skip_lfs_files else 'git lfs clone'} {repo_url} .",
-                        self.local_dir,
-                        env=env,
-                    )
-            else:
-                # Check if the folder is the root of a git repository
-                if not is_git_repo(self.local_dir):
-                    raise EnvironmentError(
-                        "Tried to clone a repository in a non-empty folder that isn't"
-                        f" a git repository ('{self.local_dir}'). If you really want to"
-                        f" do this, do it manually:\n cd {self.local_dir} && git init"
-                        " && git remote add origin && git pull origin main\n or clone"
-                        " repo to a new folder and move your existing files there"
-                        " afterwards."
-                    )
-
-                if is_local_clone(self.local_dir, repo_url):
-                    logger.warning(
-                        f"{self.local_dir} is already a clone of {clean_repo_url}."
-                        " Make sure you pull the latest changes with"
-                        " `repo.git_pull()`."
-                    )
-                else:
-                    output = run_subprocess("git remote get-url origin", self.local_dir, check=False)
-
-                    error_msg = (
-                        f"Tried to clone {clean_repo_url} in an unrelated git"
-                        " repository.\nIf you believe this is an error, please add"
-                        f" a remote with the following URL: {clean_repo_url}."
-                    )
-                    if output.returncode == 0:
-                        clean_local_remote_url = re.sub(r"https://.*@", "https://", output.stdout)
-                        error_msg += f"\nLocal path has its origin defined as: {clean_local_remote_url}"
-                    raise EnvironmentError(error_msg)
-
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_config_username_and_email(self, git_user: Optional[str] = None, git_email: Optional[str] = None):
-        """
-        Sets git username and email (only in the current repo).
-
-        Args:
-            git_user (`str`, *optional*):
-                The username to register through `git`.
-            git_email (`str`, *optional*):
-                The email to register through `git`.
-        """
-        try:
-            if git_user is not None:
-                run_subprocess("git config user.name".split() + [git_user], self.local_dir)
-
-            if git_email is not None:
-                run_subprocess(f"git config user.email {git_email}".split(), self.local_dir)
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_credential_helper_store(self):
-        """
-        Sets the git credential helper to `store`
-        """
-        try:
-            run_subprocess("git config credential.helper store", self.local_dir)
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_head_hash(self) -> str:
-        """
-        Get commit sha on top of HEAD.
-
-        Returns:
-            `str`: The current checked out commit SHA.
-        """
-        try:
-            p = run_subprocess("git rev-parse HEAD", self.local_dir)
-            return p.stdout.strip()
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_remote_url(self) -> str:
-        """
-        Get URL to origin remote.
-
-        Returns:
-            `str`: The URL of the `origin` remote.
-        """
-        try:
-            p = run_subprocess("git config --get remote.origin.url", self.local_dir)
-            url = p.stdout.strip()
-            # Strip basic auth info.
-            return re.sub(r"https://.*@", "https://", url)
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_head_commit_url(self) -> str:
-        """
-        Get URL to last commit on HEAD. We assume it's been pushed, and the url
-        scheme is the same one as for GitHub or HuggingFace.
-
-        Returns:
-            `str`: The URL to the current checked-out commit.
-        """
-        sha = self.git_head_hash()
-        url = self.git_remote_url()
-        if url.endswith("/"):
-            url = url[:-1]
-        return f"{url}/commit/{sha}"
-
-    def list_deleted_files(self) -> List[str]:
-        """
-        Returns a list of the files that are deleted in the working directory or
-        index.
-
-        Returns:
-            `List[str]`: A list of files that have been deleted in the working
-            directory or index.
-        """
-        try:
-            git_status = run_subprocess("git status -s", self.local_dir).stdout.strip()
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-        if len(git_status) == 0:
-            return []
-
-        # Receives a status like the following
-        #  D .gitignore
-        #  D new_file.json
-        # AD new_file1.json
-        # ?? new_file2.json
-        # ?? new_file4.json
-
-        # Strip each line of whitespaces
-        modified_files_statuses = [status.strip() for status in git_status.split("\n")]
-
-        # Only keep files that are deleted using the D prefix
-        deleted_files_statuses = [status for status in modified_files_statuses if "D" in status.split()[0]]
-
-        # Remove the D prefix and strip to keep only the relevant filename
-        deleted_files = [status.split()[-1].strip() for status in deleted_files_statuses]
-
-        return deleted_files
-
-    def lfs_track(self, patterns: Union[str, List[str]], filename: bool = False):
-        """
-        Tell git-lfs to track files according to a pattern.
-
-        Setting the `filename` argument to `True` will treat the arguments as
-        literal filenames, not as patterns. Any special glob characters in the
-        filename will be escaped when writing to the `.gitattributes` file.
-
-        Args:
-            patterns (`Union[str, List[str]]`):
-                The pattern, or list of patterns, to track with git-lfs.
-            filename (`bool`, *optional*, defaults to `False`):
-                Whether to use the patterns as literal filenames.
-        """
-        if isinstance(patterns, str):
-            patterns = [patterns]
-        try:
-            for pattern in patterns:
-                run_subprocess(
-                    f"git lfs track {'--filename' if filename else ''} {pattern}",
-                    self.local_dir,
-                )
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def lfs_untrack(self, patterns: Union[str, List[str]]):
-        """
-        Tell git-lfs to untrack those files.
-
-        Args:
-            patterns (`Union[str, List[str]]`):
-                The pattern, or list of patterns, to untrack with git-lfs.
-        """
-        if isinstance(patterns, str):
-            patterns = [patterns]
-        try:
-            for pattern in patterns:
-                run_subprocess("git lfs untrack".split() + [pattern], self.local_dir)
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def lfs_enable_largefiles(self):
-        """
-        HF-specific. This enables upload support of files >5GB.
-        """
-        try:
-            lfs_config = "git config lfs.customtransfer.multipart"
-            run_subprocess(f"{lfs_config}.path hf", self.local_dir)
-            run_subprocess(
-                f"{lfs_config}.args {LFS_MULTIPART_UPLOAD_COMMAND}",
-                self.local_dir,
-            )
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def auto_track_binary_files(self, pattern: str = ".") -> List[str]:
-        """
-        Automatically track binary files with git-lfs.
-
-        Args:
-            pattern (`str`, *optional*, defaults to "."):
-                The pattern with which to track files that are binary.
-
-        Returns:
-            `List[str]`: List of filenames that are now tracked due to being
-            binary files
-        """
-        files_to_be_tracked_with_lfs = []
-
-        deleted_files = self.list_deleted_files()
-
-        for filename in files_to_be_staged(pattern, folder=self.local_dir):
-            if filename in deleted_files:
-                continue
-
-            path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
-
-            if not (is_tracked_with_lfs(path_to_file) or is_git_ignored(path_to_file)):
-                size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)
-
-                if size_in_mb >= 10:
-                    logger.warning(
-                        "Parsing a large file to check if binary or not. Tracking large"
-                        " files using `repository.auto_track_large_files` is"
-                        " recommended so as to not load the full file in memory."
-                    )
-
-                is_binary = is_binary_file(path_to_file)
-
-                if is_binary:
-                    self.lfs_track(filename)
-                    files_to_be_tracked_with_lfs.append(filename)
-
-        # Cleanup the .gitattributes if files were deleted
-        self.lfs_untrack(deleted_files)
-
-        return files_to_be_tracked_with_lfs
-
-    def auto_track_large_files(self, pattern: str = ".") -> List[str]:
-        """
-        Automatically track large files (files that weigh more than 10MBs) with
-        git-lfs.
-
-        Args:
-            pattern (`str`, *optional*, defaults to "."):
-                The pattern with which to track files that are above 10MBs.
-
-        Returns:
-            `List[str]`: List of filenames that are now tracked due to their
-            size.
-        """
-        files_to_be_tracked_with_lfs = []
-
-        deleted_files = self.list_deleted_files()
-
-        for filename in files_to_be_staged(pattern, folder=self.local_dir):
-            if filename in deleted_files:
-                continue
-
-            path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
-            size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)
-
-            if size_in_mb >= 10 and not is_tracked_with_lfs(path_to_file) and not is_git_ignored(path_to_file):
-                self.lfs_track(filename)
-                files_to_be_tracked_with_lfs.append(filename)
-
-        # Cleanup the .gitattributes if files were deleted
-        self.lfs_untrack(deleted_files)
-
-        return files_to_be_tracked_with_lfs
-
-    def lfs_prune(self, recent=False):
-        """
-        git lfs prune
-
-        Args:
-            recent (`bool`, *optional*, defaults to `False`):
-                Whether to prune files even if they were referenced by recent
-                commits. See the following
-                [link](https://github.com/git-lfs/git-lfs/blob/f3d43f0428a84fc4f1e5405b76b5a73ec2437e65/docs/man/git-lfs-prune.1.ronn#recent-files)
-                for more information.
-        """
-        try:
-            with _lfs_log_progress():
-                result = run_subprocess(f"git lfs prune {'--recent' if recent else ''}", self.local_dir)
-                logger.info(result.stdout)
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_pull(self, rebase: bool = False, lfs: bool = False):
-        """
-        git pull
-
-        Args:
-            rebase (`bool`, *optional*, defaults to `False`):
-                Whether to rebase the current branch on top of the upstream
-                branch after fetching.
-            lfs (`bool`, *optional*, defaults to `False`):
-                Whether to fetch the LFS files too. This option only changes the
-                behavior when a repository was cloned without fetching the LFS
-                files; calling `repo.git_pull(lfs=True)` will then fetch the LFS
-                file from the remote repository.
-        """
-        command = "git pull" if not lfs else "git lfs pull"
-        if rebase:
-            command += " --rebase"
-        try:
-            with _lfs_log_progress():
-                result = run_subprocess(command, self.local_dir)
-                logger.info(result.stdout)
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_add(self, pattern: str = ".", auto_lfs_track: bool = False):
-        """
-        git add
-
-        Setting the `auto_lfs_track` parameter to `True` will automatically
-        track files that are larger than 10MB with `git-lfs`.
-
-        Args:
-            pattern (`str`, *optional*, defaults to "."):
-                The pattern with which to add files to staging.
-            auto_lfs_track (`bool`, *optional*, defaults to `False`):
-                Whether to automatically track large and binary files with
-                git-lfs. Any file over 10MB in size, or in binary format, will
-                be automatically tracked.
-        """
-        if auto_lfs_track:
-            # Track files according to their size (>=10MB)
-            tracked_files = self.auto_track_large_files(pattern)
-
-            # Read the remaining files and track them if they're binary
-            tracked_files.extend(self.auto_track_binary_files(pattern))
-
-            if tracked_files:
-                logger.warning(
-                    f"Adding files tracked by Git LFS: {tracked_files}. This may take a"
-                    " bit of time if the files are large."
-                )
-
-        try:
-            result = run_subprocess("git add -v".split() + [pattern], self.local_dir)
-            logger.info(f"Adding to index:\n{result.stdout}\n")
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-    def git_commit(self, commit_message: str = "commit files to HF hub"):
-        """
-        git commit
-
-        Args:
-            commit_message (`str`, *optional*, defaults to "commit files to HF hub"):
-                The message attributed to the commit.
-        """
-        try:
-            result = run_subprocess("git commit -v -m".split() + [commit_message], self.local_dir)
-            logger.info(f"Committed:\n{result.stdout}\n")
-        except subprocess.CalledProcessError as exc:
-            if len(exc.stderr) > 0:
-                raise EnvironmentError(exc.stderr)
-            else:
-                raise EnvironmentError(exc.stdout)
-
-    def git_push(
-        self,
-        upstream: Optional[str] = None,
-        blocking: bool = True,
-        auto_lfs_prune: bool = False,
-    ) -> Union[str, Tuple[str, CommandInProgress]]:
-        """
-        git push
-
-        If used without setting `blocking`, will return url to commit on remote
-        repo. If used with `blocking=True`, will return a tuple containing the
-        url to commit and the command object to follow for information about the
-        process.
-
-        Args:
-            upstream (`str`, *optional*):
-                Upstream to which this should push. If not specified, will push
-                to the lastly defined upstream or to the default one (`origin
-                main`).
-            blocking (`bool`, *optional*, defaults to `True`):
-                Whether the function should return only when the push has
-                finished. Setting this to `False` will return an
-                `CommandInProgress` object which has an `is_done` property. This
-                property will be set to `True` when the push is finished.
-            auto_lfs_prune (`bool`, *optional*, defaults to `False`):
-                Whether to automatically prune files once they have been pushed
-                to the remote.
-        """
-        command = "git push"
-
-        if upstream:
-            command += f" --set-upstream {upstream}"
-
-        number_of_commits = commits_to_push(self.local_dir, upstream)
-
-        if number_of_commits > 1:
-            logger.warning(f"Several commits ({number_of_commits}) will be pushed upstream.")
-            if blocking:
-                logger.warning("The progress bars may be unreliable.")
-
-        try:
-            with _lfs_log_progress():
-                process = subprocess.Popen(
-                    command.split(),
-                    stderr=subprocess.PIPE,
-                    stdout=subprocess.PIPE,
-                    encoding="utf-8",
-                    cwd=self.local_dir,
-                )
-
-                if blocking:
-                    stdout, stderr = process.communicate()
-                    return_code = process.poll()
-                    process.kill()
-
-                    if len(stderr):
-                        logger.warning(stderr)
-
-                    if return_code:
-                        raise subprocess.CalledProcessError(return_code, process.args, output=stdout, stderr=stderr)
-
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-        if not blocking:
-
-            def status_method():
-                status = process.poll()
-                if status is None:
-                    return -1
-                else:
-                    return status
-
-            command_in_progress = CommandInProgress(
-                "push",
-                is_done_method=lambda: process.poll() is not None,
-                status_method=status_method,
-                process=process,
-                post_method=self.lfs_prune if auto_lfs_prune else None,
-            )
-
-            self.command_queue.append(command_in_progress)
-
-            return self.git_head_commit_url(), command_in_progress
-
-        if auto_lfs_prune:
-            self.lfs_prune()
-
-        return self.git_head_commit_url()
-
-    def git_checkout(self, revision: str, create_branch_ok: bool = False):
-        """
-        git checkout a given revision
-
-        Specifying `create_branch_ok` to `True` will create the branch to the
-        given revision if that revision doesn't exist.
-
-        Args:
-            revision (`str`):
-                The revision to checkout.
-            create_branch_ok (`str`, *optional*, defaults to `False`):
-                Whether creating a branch named with the `revision` passed at
-                the current checked-out reference if `revision` isn't an
-                existing revision is allowed.
-        """
-        try:
-            result = run_subprocess(f"git checkout {revision}", self.local_dir)
-            logger.warning(f"Checked out {revision} from {self.current_branch}.")
-            logger.warning(result.stdout)
-        except subprocess.CalledProcessError as exc:
-            if not create_branch_ok:
-                raise EnvironmentError(exc.stderr)
-            else:
-                try:
-                    result = run_subprocess(f"git checkout -b {revision}", self.local_dir)
-                    logger.warning(
-                        f"Revision `{revision}` does not exist. Created and checked out branch `{revision}`."
-                    )
-                    logger.warning(result.stdout)
-                except subprocess.CalledProcessError as exc:
-                    raise EnvironmentError(exc.stderr)
-
-    def tag_exists(self, tag_name: str, remote: Optional[str] = None) -> bool:
-        """
-        Check if a tag exists or not.
-
-        Args:
-            tag_name (`str`):
-                The name of the tag to check.
-            remote (`str`, *optional*):
-                Whether to check if the tag exists on a remote. This parameter
-                should be the identifier of the remote.
-
-        Returns:
-            `bool`: Whether the tag exists.
-        """
-        if remote:
-            try:
-                result = run_subprocess(f"git ls-remote origin refs/tags/{tag_name}", self.local_dir).stdout.strip()
-            except subprocess.CalledProcessError as exc:
-                raise EnvironmentError(exc.stderr)
-
-            return len(result) != 0
-        else:
-            try:
-                git_tags = run_subprocess("git tag", self.local_dir).stdout.strip()
-            except subprocess.CalledProcessError as exc:
-                raise EnvironmentError(exc.stderr)
-
-            git_tags = git_tags.split("\n")
-            return tag_name in git_tags
-
-    def delete_tag(self, tag_name: str, remote: Optional[str] = None) -> bool:
-        """
-        Delete a tag, both local and remote, if it exists
-
-        Args:
-            tag_name (`str`):
-                The tag name to delete.
-            remote (`str`, *optional*):
-                The remote on which to delete the tag.
-
-        Returns:
-             `bool`: `True` if deleted, `False` if the tag didn't exist.
-                If remote is not passed, will just be updated locally
-        """
-        delete_locally = True
-        delete_remotely = True
-
-        if not self.tag_exists(tag_name):
-            delete_locally = False
-
-        if not self.tag_exists(tag_name, remote=remote):
-            delete_remotely = False
-
-        if delete_locally:
-            try:
-                run_subprocess(["git", "tag", "-d", tag_name], self.local_dir).stdout.strip()
-            except subprocess.CalledProcessError as exc:
-                raise EnvironmentError(exc.stderr)
-
-        if remote and delete_remotely:
-            try:
-                run_subprocess(f"git push {remote} --delete {tag_name}", self.local_dir).stdout.strip()
-            except subprocess.CalledProcessError as exc:
-                raise EnvironmentError(exc.stderr)
-
-        return True
-
-    def add_tag(self, tag_name: str, message: Optional[str] = None, remote: Optional[str] = None):
-        """
-        Add a tag at the current head and push it
-
-        If remote is None, will just be updated locally
-
-        If no message is provided, the tag will be lightweight. if a message is
-        provided, the tag will be annotated.
-
-        Args:
-            tag_name (`str`):
-                The name of the tag to be added.
-            message (`str`, *optional*):
-                The message that accompanies the tag. The tag will turn into an
-                annotated tag if a message is passed.
-            remote (`str`, *optional*):
-                The remote on which to add the tag.
-        """
-        if message:
-            tag_args = ["git", "tag", "-a", tag_name, "-m", message]
-        else:
-            tag_args = ["git", "tag", tag_name]
-
-        try:
-            run_subprocess(tag_args, self.local_dir).stdout.strip()
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-        if remote:
-            try:
-                run_subprocess(f"git push {remote} {tag_name}", self.local_dir).stdout.strip()
-            except subprocess.CalledProcessError as exc:
-                raise EnvironmentError(exc.stderr)
-
-    def is_repo_clean(self) -> bool:
-        """
-        Return whether or not the git status is clean or not
-
-        Returns:
-            `bool`: `True` if the git status is clean, `False` otherwise.
-        """
-        try:
-            git_status = run_subprocess("git status --porcelain", self.local_dir).stdout.strip()
-        except subprocess.CalledProcessError as exc:
-            raise EnvironmentError(exc.stderr)
-
-        return len(git_status) == 0
-
-    def push_to_hub(
-        self,
-        commit_message: str = "commit files to HF hub",
-        blocking: bool = True,
-        clean_ok: bool = True,
-        auto_lfs_prune: bool = False,
-    ) -> Union[None, str, Tuple[str, CommandInProgress]]:
-        """
-        Helper to add, commit, and push files to remote repository on the
-        HuggingFace Hub. Will automatically track large files (>10MB).
-
-        Args:
-            commit_message (`str`):
-                Message to use for the commit.
-            blocking (`bool`, *optional*, defaults to `True`):
-                Whether the function should return only when the `git push` has
-                finished.
-            clean_ok (`bool`, *optional*, defaults to `True`):
-                If True, this function will return None if the repo is
-                untouched. Default behavior is to fail because the git command
-                fails.
-            auto_lfs_prune (`bool`, *optional*, defaults to `False`):
-                Whether to automatically prune files once they have been pushed
-                to the remote.
-        """
-        if clean_ok and self.is_repo_clean():
-            logger.info("Repo currently clean. Ignoring push_to_hub")
-            return None
-        self.git_add(auto_lfs_track=True)
-        self.git_commit(commit_message)
-        return self.git_push(
-            upstream=f"origin {self.current_branch}",
-            blocking=blocking,
-            auto_lfs_prune=auto_lfs_prune,
-        )
-
-    @contextmanager
-    def commit(
-        self,
-        commit_message: str,
-        branch: Optional[str] = None,
-        track_large_files: bool = True,
-        blocking: bool = True,
-        auto_lfs_prune: bool = False,
-    ):
-        """
-        Context manager utility to handle committing to a repository. This
-        automatically tracks large files (>10Mb) with git-lfs. Set the
-        `track_large_files` argument to `False` if you wish to ignore that
-        behavior.
-
-        Args:
-            commit_message (`str`):
-                Message to use for the commit.
-            branch (`str`, *optional*):
-                The branch on which the commit will appear. This branch will be
-                checked-out before any operation.
-            track_large_files (`bool`, *optional*, defaults to `True`):
-                Whether to automatically track large files or not. Will do so by
-                default.
-            blocking (`bool`, *optional*, defaults to `True`):
-                Whether the function should return only when the `git push` has
-                finished.
-            auto_lfs_prune (`bool`, defaults to `True`):
-                Whether to automatically prune files once they have been pushed
-                to the remote.
-
-        Examples:
-
-        ```python
-        >>> with Repository(
-        ...     "text-files",
-        ...     clone_from="<user>/text-files",
-        ...     token=True,
-        >>> ).commit("My first file :)"):
-        ...     with open("file.txt", "w+") as f:
-        ...         f.write(json.dumps({"hey": 8}))
-
-        >>> import torch
-
-        >>> model = torch.nn.Transformer()
-        >>> with Repository(
-        ...     "torch-model",
-        ...     clone_from="<user>/torch-model",
-        ...     token=True,
-        >>> ).commit("My cool model :)"):
-        ...     torch.save(model.state_dict(), "model.pt")
-        ```
-
-        """
-
-        files_to_stage = files_to_be_staged(".", folder=self.local_dir)
-
-        if len(files_to_stage):
-            files_in_msg = str(files_to_stage[:5])[:-1] + ", ...]" if len(files_to_stage) > 5 else str(files_to_stage)
-            logger.error(
-                "There exists some updated files in the local repository that are not"
-                f" committed: {files_in_msg}. This may lead to errors if checking out"
-                " a branch. These files and their modifications will be added to the"
-                " current commit."
-            )
-
-        if branch is not None:
-            self.git_checkout(branch, create_branch_ok=True)
-
-        if is_tracked_upstream(self.local_dir):
-            logger.warning("Pulling changes ...")
-            self.git_pull(rebase=True)
-        else:
-            logger.warning(f"The current branch has no upstream branch. Will push to 'origin {self.current_branch}'")
-
-        current_working_directory = os.getcwd()
-        os.chdir(os.path.join(current_working_directory, self.local_dir))
-
-        try:
-            yield self
-        finally:
-            self.git_add(auto_lfs_track=track_large_files)
-
-            try:
-                self.git_commit(commit_message)
-            except OSError as e:
-                # If no changes are detected, there is nothing to commit.
-                if "nothing to commit" not in str(e):
-                    raise e
-
-            try:
-                self.git_push(
-                    upstream=f"origin {self.current_branch}",
-                    blocking=blocking,
-                    auto_lfs_prune=auto_lfs_prune,
-                )
-            except OSError as e:
-                # If no changes are detected, there is nothing to commit.
-                if "could not read Username" in str(e):
-                    raise OSError("Couldn't authenticate user for push. Did you set `token` to `True`?") from e
-                else:
-                    raise e
-
-            os.chdir(current_working_directory)
-
-    def repocard_metadata_load(self) -> Optional[Dict]:
-        filepath = os.path.join(self.local_dir, constants.REPOCARD_NAME)
-        if os.path.isfile(filepath):
-            return metadata_load(filepath)
-        return None
-
-    def repocard_metadata_save(self, data: Dict) -> None:
-        return metadata_save(os.path.join(self.local_dir, constants.REPOCARD_NAME), data)
-
-    @property
-    def commands_failed(self):
-        """
-        Returns the asynchronous commands that failed.
-        """
-        return [c for c in self.command_queue if c.status > 0]
-
-    @property
-    def commands_in_progress(self):
-        """
-        Returns the asynchronous commands that are currently in progress.
-        """
-        return [c for c in self.command_queue if not c.is_done]
-
-    def wait_for_commands(self):
-        """
-        Blocking method: blocks all subsequent execution until all commands have
-        been processed.
-        """
-        index = 0
-        for command_failed in self.commands_failed:
-            logger.error(f"The {command_failed.title} command with PID {command_failed._process.pid} failed.")
-            logger.error(command_failed.stderr)
-
-        while self.commands_in_progress:
-            if index % 10 == 0:
-                logger.warning(
-                    f"Waiting for the following commands to finish before shutting down: {self.commands_in_progress}."
-                )
-
-            index += 1
-
-            time.sleep(1)
diff --git a/src/huggingface_hub/serialization/__init__.py b/src/huggingface_hub/serialization/__init__.py
index 8949a22a5f..6e624a7541 100644
--- a/src/huggingface_hub/serialization/__init__.py
+++ b/src/huggingface_hub/serialization/__init__.py
@@ -15,7 +15,6 @@
 """Contains helpers to serialize tensors."""
 
 from ._base import StateDictSplit, split_state_dict_into_shards_factory
-from ._tensorflow import get_tf_storage_size, split_tf_state_dict_into_shards
 from ._torch import (
     get_torch_storage_id,
     get_torch_storage_size,
diff --git a/src/huggingface_hub/serialization/_base.py b/src/huggingface_hub/serialization/_base.py
index b79c82f5db..0f5643f905 100644
--- a/src/huggingface_hub/serialization/_base.py
+++ b/src/huggingface_hub/serialization/_base.py
@@ -14,7 +14,7 @@
 """Contains helpers to split tensors into shards."""
 
 from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, List, Optional, TypeVar, Union
+from typing import Any, Callable, Optional, TypeVar, Union
 
 from .. import logging
 
@@ -38,16 +38,16 @@
 @dataclass
 class StateDictSplit:
     is_sharded: bool = field(init=False)
-    metadata: Dict[str, Any]
-    filename_to_tensors: Dict[str, List[str]]
-    tensor_to_filename: Dict[str, str]
+    metadata: dict[str, Any]
+    filename_to_tensors: dict[str, list[str]]
+    tensor_to_filename: dict[str, str]
 
     def __post_init__(self):
         self.is_sharded = len(self.filename_to_tensors) > 1
 
 
 def split_state_dict_into_shards_factory(
-    state_dict: Dict[str, TensorT],
+    state_dict: dict[str, TensorT],
     *,
     get_storage_size: TensorSizeFn_T,
     filename_pattern: str,
@@ -67,7 +67,7 @@ def split_state_dict_into_shards_factory(
     > size greater than `max_shard_size`.
 
     Args:
-        state_dict (`Dict[str, Tensor]`):
+        state_dict (`dict[str, Tensor]`):
             The state dictionary to save.
         get_storage_size (`Callable[[Tensor], int]`):
             A function that returns the size of a tensor when saved on disk in bytes.
@@ -84,10 +84,10 @@ def split_state_dict_into_shards_factory(
     Returns:
         [`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
     """
-    storage_id_to_tensors: Dict[Any, List[str]] = {}
+    storage_id_to_tensors: dict[Any, list[str]] = {}
 
-    shard_list: List[Dict[str, TensorT]] = []
-    current_shard: Dict[str, TensorT] = {}
+    shard_list: list[dict[str, TensorT]] = []
+    current_shard: dict[str, TensorT] = {}
     current_shard_size = 0
     total_size = 0
 
diff --git a/src/huggingface_hub/serialization/_dduf.py b/src/huggingface_hub/serialization/_dduf.py
index a1debadb3a..c184509c63 100644
--- a/src/huggingface_hub/serialization/_dduf.py
+++ b/src/huggingface_hub/serialization/_dduf.py
@@ -7,7 +7,7 @@
 from contextlib import contextmanager
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Dict, Generator, Iterable, Tuple, Union
+from typing import Any, Generator, Iterable, Union
 
 from ..errors import DDUFCorruptedFileError, DDUFExportError, DDUFInvalidEntryNameError
 
@@ -87,7 +87,7 @@ def read_text(self, encoding: str = "utf-8") -> str:
             return f.read(self.length).decode(encoding=encoding)
 
 
-def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
+def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> dict[str, DDUFEntry]:
     """
     Read a DDUF file and return a dictionary of entries.
 
@@ -98,7 +98,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
             The path to the DDUF file to read.
 
     Returns:
-        `Dict[str, DDUFEntry]`:
+        `dict[str, DDUFEntry]`:
             A dictionary of [`DDUFEntry`] indexed by filename.
 
     Raises:
@@ -157,7 +157,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
 
 
 def export_entries_as_dduf(
-    dduf_path: Union[str, os.PathLike], entries: Iterable[Tuple[str, Union[str, Path, bytes]]]
+    dduf_path: Union[str, os.PathLike], entries: Iterable[tuple[str, Union[str, Path, bytes]]]
 ) -> None:
     """Write a DDUF file from an iterable of entries.
 
@@ -167,7 +167,7 @@ def export_entries_as_dduf(
     Args:
         dduf_path (`str` or `os.PathLike`):
             The path to the DDUF file to write.
-        entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
+        entries (`Iterable[tuple[str, Union[str, Path, bytes]]]`):
             An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
             The filename should be the path to the file in the DDUF archive.
             The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.
@@ -201,7 +201,7 @@ def export_entries_as_dduf(
         >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
         ... # ... do some work with the pipeline
 
-        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
+        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[tuple[str, bytes], None, None]:
         ...     # Build an generator that yields the entries to add to the DDUF file.
         ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
         ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
@@ -267,7 +267,7 @@ def export_folder_as_dduf(dduf_path: Union[str, os.PathLike], folder_path: Union
     """
     folder_path = Path(folder_path)
 
-    def _iterate_over_folder() -> Iterable[Tuple[str, Path]]:
+    def _iterate_over_folder() -> Iterable[tuple[str, Path]]:
         for path in Path(folder_path).glob("**/*"):
             if not path.is_file():
                 continue
diff --git a/src/huggingface_hub/serialization/_tensorflow.py b/src/huggingface_hub/serialization/_tensorflow.py
deleted file mode 100644
index 1173e34a28..0000000000
--- a/src/huggingface_hub/serialization/_tensorflow.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains tensorflow-specific helpers."""
-
-import math
-import re
-from typing import TYPE_CHECKING, Dict, Union
-
-from .. import constants
-from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
-
-
-if TYPE_CHECKING:
-    import tensorflow as tf
-
-
-def split_tf_state_dict_into_shards(
-    state_dict: Dict[str, "tf.Tensor"],
-    *,
-    filename_pattern: str = constants.TF2_WEIGHTS_FILE_PATTERN,
-    max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
-) -> StateDictSplit:
-    """
-    Split a model state dictionary in shards so that each shard is smaller than a given size.
-
-    The shards are determined by iterating through the `state_dict` in the order of its keys. There is no optimization
-    made to make each shard as close as possible to the maximum size passed. For example, if the limit is 10GB and we
-    have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
-    [6+2+2GB], [6+2GB], [6GB].
-
-    > [!WARNING]
-    > If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
-    > size greater than `max_shard_size`.
-
-    Args:
-        state_dict (`Dict[str, Tensor]`):
-            The state dictionary to save.
-        filename_pattern (`str`, *optional*):
-            The pattern to generate the files names in which the model will be saved. Pattern must be a string that
-            can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
-            Defaults to `"tf_model{suffix}.h5"`.
-        max_shard_size (`int` or `str`, *optional*):
-            The maximum size of each shard, in bytes. Defaults to 5GB.
-
-    Returns:
-        [`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
-    """
-    return split_state_dict_into_shards_factory(
-        state_dict,
-        max_shard_size=max_shard_size,
-        filename_pattern=filename_pattern,
-        get_storage_size=get_tf_storage_size,
-    )
-
-
-def get_tf_storage_size(tensor: "tf.Tensor") -> int:
-    # Return `math.ceil` since dtype byte size can be a float (e.g., 0.125 for tf.bool).
-    # Better to overestimate than underestimate.
-    return math.ceil(tensor.numpy().size * _dtype_byte_size_tf(tensor.dtype))
-
-
-def _dtype_byte_size_tf(dtype) -> float:
-    """
-    Returns the size (in bytes) occupied by one parameter of type `dtype`.
-    Taken from https://github.com/huggingface/transformers/blob/74d9d0cebb0263a3f8ab9c280569170cc74651d0/src/transformers/modeling_tf_utils.py#L608.
-    NOTE: why not `tensor.numpy().nbytes`?
-    Example:
-    ```py
-    >>> _dtype_byte_size(tf.float32)
-    4
-    ```
-    """
-    import tensorflow as tf
-
-    if dtype == tf.bool:
-        return 1 / 8
-    bit_search = re.search(r"[^\d](\d+)$", dtype.name)
-    if bit_search is None:
-        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
-    bit_size = int(bit_search.groups()[0])
-    return bit_size // 8
diff --git a/src/huggingface_hub/serialization/_torch.py b/src/huggingface_hub/serialization/_torch.py
index e8cf7dad02..af84a1ead6 100644
--- a/src/huggingface_hub/serialization/_torch.py
+++ b/src/huggingface_hub/serialization/_torch.py
@@ -20,7 +20,7 @@
 from collections import defaultdict, namedtuple
 from functools import lru_cache
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union
+from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Optional, Union
 
 from packaging import version
 
@@ -43,10 +43,10 @@ def save_torch_model(
     filename_pattern: Optional[str] = None,
     force_contiguous: bool = True,
     max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
-    metadata: Optional[Dict[str, str]] = None,
+    metadata: Optional[dict[str, str]] = None,
     safe_serialization: bool = True,
     is_main_process: bool = True,
-    shared_tensors_to_discard: Optional[List[str]] = None,
+    shared_tensors_to_discard: Optional[list[str]] = None,
 ):
     """
     Saves a given torch model to disk, handling sharding and shared tensors issues.
@@ -86,7 +86,7 @@ def save_torch_model(
             that reason. Defaults to `True`.
         max_shard_size (`int` or `str`, *optional*):
             The maximum size of each shard, in bytes. Defaults to 5GB.
-        metadata (`Dict[str, str]`, *optional*):
+        metadata (`dict[str, str]`, *optional*):
             Extra information to save along with the model. Some metadata will be added for each dropped tensors.
             This information will not be enough to recover the entire shared structure but might help understanding
             things.
@@ -98,7 +98,7 @@ def save_torch_model(
             Whether the process calling this is the main process or not. Useful when in distributed training like
             TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
             the main process to avoid race conditions. Defaults to True.
-        shared_tensors_to_discard (`List[str]`, *optional*):
+        shared_tensors_to_discard (`list[str]`, *optional*):
             List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
             detected, it will drop the first name alphabetically.
 
@@ -131,16 +131,16 @@ def save_torch_model(
 
 
 def save_torch_state_dict(
-    state_dict: Dict[str, "torch.Tensor"],
+    state_dict: dict[str, "torch.Tensor"],
     save_directory: Union[str, Path],
     *,
     filename_pattern: Optional[str] = None,
     force_contiguous: bool = True,
     max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
-    metadata: Optional[Dict[str, str]] = None,
+    metadata: Optional[dict[str, str]] = None,
     safe_serialization: bool = True,
     is_main_process: bool = True,
-    shared_tensors_to_discard: Optional[List[str]] = None,
+    shared_tensors_to_discard: Optional[list[str]] = None,
 ) -> None:
     """
     Save a model state dictionary to the disk, handling sharding and shared tensors issues.
@@ -165,7 +165,7 @@ def save_torch_state_dict(
     > If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
 
     Args:
-        state_dict (`Dict[str, torch.Tensor]`):
+        state_dict (`dict[str, torch.Tensor]`):
             The state dictionary to save.
         save_directory (`str` or `Path`):
             The directory in which the model will be saved.
@@ -180,7 +180,7 @@ def save_torch_state_dict(
             that reason. Defaults to `True`.
         max_shard_size (`int` or `str`, *optional*):
             The maximum size of each shard, in bytes. Defaults to 5GB.
-        metadata (`Dict[str, str]`, *optional*):
+        metadata (`dict[str, str]`, *optional*):
             Extra information to save along with the model. Some metadata will be added for each dropped tensors.
             This information will not be enough to recover the entire shared structure but might help understanding
             things.
@@ -192,7 +192,7 @@ def save_torch_state_dict(
             Whether the process calling this is the main process or not. Useful when in distributed training like
             TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
             the main process to avoid race conditions. Defaults to True.
-        shared_tensors_to_discard (`List[str]`, *optional*):
+        shared_tensors_to_discard (`list[str]`, *optional*):
             List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
             detected, it will drop the first name alphabetically.
 
@@ -288,7 +288,7 @@ def save_torch_state_dict(
 
 
 def split_torch_state_dict_into_shards(
-    state_dict: Dict[str, "torch.Tensor"],
+    state_dict: dict[str, "torch.Tensor"],
     *,
     filename_pattern: str = constants.SAFETENSORS_WEIGHTS_FILE_PATTERN,
     max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
@@ -311,7 +311,7 @@ def split_torch_state_dict_into_shards(
     > size greater than `max_shard_size`.
 
     Args:
-        state_dict (`Dict[str, torch.Tensor]`):
+        state_dict (`dict[str, torch.Tensor]`):
             The state dictionary to save.
         filename_pattern (`str`, *optional*):
             The pattern to generate the files names in which the model will be saved. Pattern must be a string that
@@ -330,7 +330,7 @@ def split_torch_state_dict_into_shards(
     >>> from safetensors.torch import save_file as safe_save_file
     >>> from huggingface_hub import split_torch_state_dict_into_shards
 
-    >>> def save_state_dict(state_dict: Dict[str, torch.Tensor], save_directory: str):
+    >>> def save_state_dict(state_dict: dict[str, torch.Tensor], save_directory: str):
     ...     state_dict_split = split_torch_state_dict_into_shards(state_dict)
     ...     for filename, tensors in state_dict_split.filename_to_tensors.items():
     ...         shard = {tensor: state_dict[tensor] for tensor in tensors}
@@ -542,7 +542,7 @@ def load_state_dict_from_file(
     map_location: Optional[Union[str, "torch.device"]] = None,
     weights_only: bool = False,
     mmap: bool = False,
-) -> Union[Dict[str, "torch.Tensor"], Any]:
+) -> Union[dict[str, "torch.Tensor"], Any]:
     """
     Loads a checkpoint file, handling both safetensors and pickle checkpoint formats.
 
@@ -562,7 +562,7 @@ def load_state_dict_from_file(
             loading safetensors files, as the `safetensors` library uses memory mapping by default.
 
     Returns:
-        `Union[Dict[str, "torch.Tensor"], Any]`: The loaded checkpoint.
+        `Union[dict[str, "torch.Tensor"], Any]`: The loaded checkpoint.
             - For safetensors files: always returns a dictionary mapping parameter names to tensors.
             - For pickle files: returns any Python object that was pickled (commonly a state dict, but could be
               an entire model, optimizer state, or any other Python object).
@@ -682,7 +682,7 @@ def _validate_keys_for_strict_loading(
         raise RuntimeError(error_message)
 
 
-def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
+def _get_unique_id(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
     """Returns a unique id for plain tensor
     or a (potentially nested) Tuple of unique id for the flattened Tensor
     if the input is a wrapper tensor subclass Tensor
@@ -723,7 +723,7 @@ def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
     return unique_id
 
 
-def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[Tuple["torch.device", Union[int, Tuple[Any, ...]], int]]:
+def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[tuple["torch.device", Union[int, tuple[Any, ...]], int]]:
     """
     Return unique identifier to a tensor storage.
 
@@ -797,7 +797,7 @@ def is_torch_tpu_available(check_device=True):
     return False
 
 
-def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
+def storage_ptr(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
     """
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
     """
@@ -823,10 +823,10 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
 
 
 def _clean_state_dict_for_safetensors(
-    state_dict: Dict[str, "torch.Tensor"],
-    metadata: Dict[str, str],
+    state_dict: dict[str, "torch.Tensor"],
+    metadata: dict[str, str],
     force_contiguous: bool = True,
-    shared_tensors_to_discard: Optional[List[str]] = None,
+    shared_tensors_to_discard: Optional[list[str]] = None,
 ):
     """Remove shared tensors from state_dict and update metadata accordingly (for reloading).
 
@@ -860,7 +860,7 @@ def _end_ptr(tensor: "torch.Tensor") -> int:
     return stop
 
 
-def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, "torch.Tensor"]) -> List[Set[str]]:
+def _filter_shared_not_shared(tensors: list[set[str]], state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
     """
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L44
     """
@@ -888,7 +888,7 @@ def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, "to
     return filtered_tensors
 
 
-def _find_shared_tensors(state_dict: Dict[str, "torch.Tensor"]) -> List[Set[str]]:
+def _find_shared_tensors(state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
     """
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L69.
     """
@@ -925,11 +925,11 @@ def _is_complete(tensor: "torch.Tensor") -> bool:
 
 
 def _remove_duplicate_names(
-    state_dict: Dict[str, "torch.Tensor"],
+    state_dict: dict[str, "torch.Tensor"],
     *,
-    preferred_names: Optional[List[str]] = None,
-    discard_names: Optional[List[str]] = None,
-) -> Dict[str, List[str]]:
+    preferred_names: Optional[list[str]] = None,
+    discard_names: Optional[list[str]] = None,
+) -> dict[str, list[str]]:
     """
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L80
     """
diff --git a/src/huggingface_hub/utils/__init__.py b/src/huggingface_hub/utils/__init__.py
index 992eac104b..bccc01174d 100644
--- a/src/huggingface_hub/utils/__init__.py
+++ b/src/huggingface_hub/utils/__init__.py
@@ -42,6 +42,7 @@
     CachedRevisionInfo,
     DeleteCacheStrategy,
     HFCacheInfo,
+    _format_size,
     scan_cache_dir,
 )
 from ._chunk_utils import chunk_iterable
@@ -50,14 +51,18 @@
 from ._fixes import SoftTemporaryDirectory, WeakFileLock, yaml_dump
 from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
 from ._headers import build_hf_headers, get_token_to_send
-from ._hf_folder import HfFolder
 from ._http import (
-    configure_http_backend,
+    ASYNC_CLIENT_FACTORY_T,
+    CLIENT_FACTORY_T,
+    close_session,
     fix_hf_endpoint_in_url,
+    get_async_session,
     get_session,
     hf_raise_for_status,
     http_backoff,
-    reset_sessions,
+    http_stream_backoff,
+    set_async_client_factory,
+    set_client_factory,
 )
 from ._pagination import paginate
 from ._paths import DEFAULT_IGNORE_PATTERNS, FORBIDDEN_FOLDERS, filter_repo_objects
@@ -80,6 +85,7 @@
     get_tensorboard_version,
     get_tf_version,
     get_torch_version,
+    installation_method,
     is_aiohttp_available,
     is_colab_enterprise,
     is_fastai_available,
@@ -104,8 +110,9 @@
 from ._safetensors import SafetensorsFileMetadata, SafetensorsRepoMetadata, TensorInfo
 from ._subprocess import capture_output, run_interactive_subprocess, run_subprocess
 from ._telemetry import send_telemetry
+from ._terminal import ANSI, tabulate
 from ._typing import is_jsonable, is_simple_optional_type, unwrap_simple_optional_type
-from ._validators import smoothly_deprecate_use_auth_token, validate_hf_hub_args, validate_repo_id
+from ._validators import validate_hf_hub_args, validate_repo_id
 from ._xet import (
     XetConnectionInfo,
     XetFileData,
diff --git a/src/huggingface_hub/utils/_auth.py b/src/huggingface_hub/utils/_auth.py
index 72be4dedbd..f19ac3e5f6 100644
--- a/src/huggingface_hub/utils/_auth.py
+++ b/src/huggingface_hub/utils/_auth.py
@@ -19,7 +19,7 @@
 import warnings
 from pathlib import Path
 from threading import Lock
-from typing import Dict, Optional
+from typing import Optional
 
 from .. import constants
 from ._runtime import is_colab_enterprise, is_google_colab
@@ -125,13 +125,13 @@ def _get_token_from_file() -> Optional[str]:
         return None
 
 
-def get_stored_tokens() -> Dict[str, str]:
+def get_stored_tokens() -> dict[str, str]:
     """
     Returns the parsed INI file containing the access tokens.
     The file is located at `HF_STORED_TOKENS_PATH`, defaulting to `~/.cache/huggingface/stored_tokens`.
     If the file does not exist, an empty dictionary is returned.
 
-    Returns: `Dict[str, str]`
+    Returns: `dict[str, str]`
         Key is the token name and value is the token.
     """
     tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
@@ -147,12 +147,12 @@ def get_stored_tokens() -> Dict[str, str]:
     return stored_tokens
 
 
-def _save_stored_tokens(stored_tokens: Dict[str, str]) -> None:
+def _save_stored_tokens(stored_tokens: dict[str, str]) -> None:
     """
     Saves the given configuration to the stored tokens file.
 
     Args:
-        stored_tokens (`Dict[str, str]`):
+        stored_tokens (`dict[str, str]`):
             The stored tokens to save. Key is the token name and value is the token.
     """
     stored_tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
diff --git a/src/huggingface_hub/utils/_cache_manager.py b/src/huggingface_hub/utils/_cache_manager.py
index 90d0e01f74..3ec03f35e8 100644
--- a/src/huggingface_hub/utils/_cache_manager.py
+++ b/src/huggingface_hub/utils/_cache_manager.py
@@ -20,13 +20,13 @@
 from collections import defaultdict
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
+from typing import Literal, Optional, Union
 
 from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
 
-from ..commands._cli_utils import tabulate
 from ..constants import HF_HUB_CACHE
 from . import logging
+from ._terminal import tabulate
 
 
 logger = logging.get_logger(__name__)
@@ -116,9 +116,9 @@ class CachedRevisionInfo:
         snapshot_path (`Path`):
             Path to the revision directory in the `snapshots` folder. It contains the
             exact tree structure as the repo on the Hub.
-        files: (`FrozenSet[CachedFileInfo]`):
+        files: (`frozenset[CachedFileInfo]`):
             Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
-        refs (`FrozenSet[str]`):
+        refs (`frozenset[str]`):
             Set of `refs` pointing to this revision. If the revision has no `refs`, it
             is considered detached.
             Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
@@ -140,8 +140,8 @@ class CachedRevisionInfo:
     commit_hash: str
     snapshot_path: Path
     size_on_disk: int
-    files: FrozenSet[CachedFileInfo]
-    refs: FrozenSet[str]
+    files: frozenset[CachedFileInfo]
+    refs: frozenset[str]
 
     last_modified: float
 
@@ -187,7 +187,7 @@ class CachedRepoInfo:
             Sum of the blob file sizes in the cached repo.
         nb_files (`int`):
             Total number of blob files in the cached repo.
-        revisions (`FrozenSet[CachedRevisionInfo]`):
+        revisions (`frozenset[CachedRevisionInfo]`):
             Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
         last_accessed (`float`):
             Timestamp of the last time a blob file of the repo has been accessed.
@@ -210,7 +210,7 @@ class CachedRepoInfo:
     repo_path: Path
     size_on_disk: int
     nb_files: int
-    revisions: FrozenSet[CachedRevisionInfo]
+    revisions: frozenset[CachedRevisionInfo]
 
     last_accessed: float
     last_modified: float
@@ -245,7 +245,7 @@ def size_on_disk_str(self) -> str:
         return _format_size(self.size_on_disk)
 
     @property
-    def refs(self) -> Dict[str, CachedRevisionInfo]:
+    def refs(self) -> dict[str, CachedRevisionInfo]:
         """
         (property) Mapping between `refs` and revision data structures.
         """
@@ -262,21 +262,21 @@ class DeleteCacheStrategy:
     Args:
         expected_freed_size (`float`):
             Expected freed size once strategy is executed.
-        blobs (`FrozenSet[Path]`):
+        blobs (`frozenset[Path]`):
             Set of blob file paths to be deleted.
-        refs (`FrozenSet[Path]`):
+        refs (`frozenset[Path]`):
             Set of reference file paths to be deleted.
-        repos (`FrozenSet[Path]`):
+        repos (`frozenset[Path]`):
             Set of entire repo paths to be deleted.
-        snapshots (`FrozenSet[Path]`):
+        snapshots (`frozenset[Path]`):
             Set of snapshots to be deleted (directory of symlinks).
     """
 
     expected_freed_size: int
-    blobs: FrozenSet[Path]
-    refs: FrozenSet[Path]
-    repos: FrozenSet[Path]
-    snapshots: FrozenSet[Path]
+    blobs: frozenset[Path]
+    refs: frozenset[Path]
+    repos: frozenset[Path]
+    snapshots: frozenset[Path]
 
     @property
     def expected_freed_size_str(self) -> str:
@@ -331,10 +331,10 @@ class HFCacheInfo:
     Args:
         size_on_disk (`int`):
             Sum of all valid repo sizes in the cache-system.
-        repos (`FrozenSet[CachedRepoInfo]`):
+        repos (`frozenset[CachedRepoInfo]`):
             Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
             cache-system while scanning.
-        warnings (`List[CorruptedCacheException]`):
+        warnings (`list[CorruptedCacheException]`):
             List of [`~CorruptedCacheException`] that occurred while scanning the cache.
             Those exceptions are captured so that the scan can continue. Corrupted repos
             are skipped from the scan.
@@ -345,8 +345,8 @@ class HFCacheInfo:
     """
 
     size_on_disk: int
-    repos: FrozenSet[CachedRepoInfo]
-    warnings: List[CorruptedCacheException]
+    repos: frozenset[CachedRepoInfo]
+    warnings: list[CorruptedCacheException]
 
     @property
     def size_on_disk_str(self) -> str:
@@ -393,9 +393,9 @@ def delete_revisions(self, *revisions: str) -> DeleteCacheStrategy:
         > be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
         > allows having a dry run before actually executing the deletion.
         """
-        hashes_to_delete: Set[str] = set(revisions)
+        hashes_to_delete: set[str] = set(revisions)
 
-        repos_with_revisions: Dict[CachedRepoInfo, Set[CachedRevisionInfo]] = defaultdict(set)
+        repos_with_revisions: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
 
         for repo in self.repos:
             for revision in repo.revisions:
@@ -406,10 +406,10 @@ def delete_revisions(self, *revisions: str) -> DeleteCacheStrategy:
         if len(hashes_to_delete) > 0:
             logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
 
-        delete_strategy_blobs: Set[Path] = set()
-        delete_strategy_refs: Set[Path] = set()
-        delete_strategy_repos: Set[Path] = set()
-        delete_strategy_snapshots: Set[Path] = set()
+        delete_strategy_blobs: set[Path] = set()
+        delete_strategy_refs: set[Path] = set()
+        delete_strategy_repos: set[Path] = set()
+        delete_strategy_snapshots: set[Path] = set()
         delete_strategy_expected_freed_size = 0
 
         for affected_repo, revisions_to_delete in repos_with_revisions.items():
@@ -651,8 +651,8 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
             f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
         )
 
-    repos: Set[CachedRepoInfo] = set()
-    warnings: List[CorruptedCacheException] = []
+    repos: set[CachedRepoInfo] = set()
+    warnings: list[CorruptedCacheException] = []
     for repo_path in cache_dir.iterdir():
         if repo_path.name == ".locks":  # skip './.locks/' folder
             continue
@@ -688,7 +688,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
             f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
         )
 
-    blob_stats: Dict[Path, os.stat_result] = {}  # Key is blob_path, value is blob stats
+    blob_stats: dict[Path, os.stat_result] = {}  # Key is blob_path, value is blob stats
 
     snapshots_path = repo_path / "snapshots"
     refs_path = repo_path / "refs"
@@ -699,7 +699,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
     # Scan over `refs` directory
 
     # key is revision hash, value is set of refs
-    refs_by_hash: Dict[str, Set[str]] = defaultdict(set)
+    refs_by_hash: dict[str, set[str]] = defaultdict(set)
     if refs_path.exists():
         # Example of `refs` directory
         # ── refs
@@ -722,7 +722,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
             refs_by_hash[commit_hash].add(ref_name)
 
     # Scan snapshots directory
-    cached_revisions: Set[CachedRevisionInfo] = set()
+    cached_revisions: set[CachedRevisionInfo] = set()
     for revision_path in snapshots_path.iterdir():
         # Ignore OS-created helper files
         if revision_path.name in FILES_TO_IGNORE:
diff --git a/src/huggingface_hub/utils/_deprecation.py b/src/huggingface_hub/utils/_deprecation.py
index 4cb8d6e418..51063879db 100644
--- a/src/huggingface_hub/utils/_deprecation.py
+++ b/src/huggingface_hub/utils/_deprecation.py
@@ -62,7 +62,7 @@ def _deprecate_arguments(
     Args:
         version (`str`):
             The version when deprecated arguments will result in error.
-        deprecated_args (`List[str]`):
+        deprecated_args (`list[str]`):
             List of the arguments to be deprecated.
         custom_message (`str`, *optional*):
             Warning message that is raised. If not passed, a default warning message
diff --git a/src/huggingface_hub/utils/_dotenv.py b/src/huggingface_hub/utils/_dotenv.py
index 23b8a1b70a..97e3b885be 100644
--- a/src/huggingface_hub/utils/_dotenv.py
+++ b/src/huggingface_hub/utils/_dotenv.py
@@ -1,14 +1,14 @@
 # AI-generated module (ChatGPT)
 import re
-from typing import Dict, Optional
+from typing import Optional
 
 
-def load_dotenv(dotenv_str: str, environ: Optional[Dict[str, str]] = None) -> Dict[str, str]:
+def load_dotenv(dotenv_str: str, environ: Optional[dict[str, str]] = None) -> dict[str, str]:
     """
     Parse a DOTENV-format string and return a dictionary of key-value pairs.
     Handles quoted values, comments, export keyword, and blank lines.
     """
-    env: Dict[str, str] = {}
+    env: dict[str, str] = {}
     line_pattern = re.compile(
         r"""
         ^\s*
diff --git a/src/huggingface_hub/utils/_fixes.py b/src/huggingface_hub/utils/_fixes.py
index 560003b622..a1cacc0907 100644
--- a/src/huggingface_hub/utils/_fixes.py
+++ b/src/huggingface_hub/utils/_fixes.py
@@ -1,13 +1,3 @@
-# JSONDecodeError was introduced in requests=2.27 released in 2022.
-# This allows us to support older requests for users
-# More information: https://github.com/psf/requests/pull/5856
-try:
-    from requests import JSONDecodeError  # type: ignore  # noqa: F401
-except ImportError:
-    try:
-        from simplejson import JSONDecodeError  # type: ignore # noqa: F401
-    except ImportError:
-        from json import JSONDecodeError  # type: ignore  # noqa: F401
 import contextlib
 import os
 import shutil
diff --git a/src/huggingface_hub/utils/_git_credential.py b/src/huggingface_hub/utils/_git_credential.py
index 5ad84648a0..7aa03727d4 100644
--- a/src/huggingface_hub/utils/_git_credential.py
+++ b/src/huggingface_hub/utils/_git_credential.py
@@ -16,7 +16,7 @@
 
 import re
 import subprocess
-from typing import List, Optional
+from typing import Optional
 
 from ..constants import ENDPOINT
 from ._subprocess import run_interactive_subprocess, run_subprocess
@@ -34,7 +34,7 @@
 )
 
 
-def list_credential_helpers(folder: Optional[str] = None) -> List[str]:
+def list_credential_helpers(folder: Optional[str] = None) -> list[str]:
     """Return the list of git credential helpers configured.
 
     See https://git-scm.com/docs/gitcredentials.
@@ -104,7 +104,7 @@ def unset_git_credential(username: str = "hf_user", folder: Optional[str] = None
         stdin.flush()
 
 
-def _parse_credential_output(output: str) -> List[str]:
+def _parse_credential_output(output: str) -> list[str]:
     """Parse the output of `git credential fill` to extract the password.
 
     Args:
diff --git a/src/huggingface_hub/utils/_headers.py b/src/huggingface_hub/utils/_headers.py
index 053a92a398..cabdbd7c81 100644
--- a/src/huggingface_hub/utils/_headers.py
+++ b/src/huggingface_hub/utils/_headers.py
@@ -14,43 +14,30 @@
 # limitations under the License.
 """Contains utilities to handle headers to send in calls to Huggingface Hub."""
 
-from typing import Dict, Optional, Union
+from typing import Optional, Union
 
 from huggingface_hub.errors import LocalTokenNotFoundError
 
 from .. import constants
 from ._auth import get_token
-from ._deprecation import _deprecate_arguments
 from ._runtime import (
-    get_fastai_version,
-    get_fastcore_version,
     get_hf_hub_version,
     get_python_version,
-    get_tf_version,
     get_torch_version,
-    is_fastai_available,
-    is_fastcore_available,
-    is_tf_available,
     is_torch_available,
 )
 from ._validators import validate_hf_hub_args
 
 
-@_deprecate_arguments(
-    version="1.0",
-    deprecated_args="is_write_action",
-    custom_message="This argument is ignored and we let the server handle the permission error instead (if any).",
-)
 @validate_hf_hub_args
 def build_hf_headers(
     *,
     token: Optional[Union[bool, str]] = None,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Union[Dict, str, None] = None,
-    headers: Optional[Dict[str, str]] = None,
-    is_write_action: bool = False,
-) -> Dict[str, str]:
+    user_agent: Union[dict, str, None] = None,
+    headers: Optional[dict[str, str]] = None,
+) -> dict[str, str]:
     """
     Build headers dictionary to send in a HF Hub call.
 
@@ -63,8 +50,7 @@ def build_hf_headers(
     `None` or token is an organization token (starting with `"api_org***"`).
 
     In addition to the auth header, a user-agent is added to provide information about
-    the installed packages (versions of python, huggingface_hub, torch, tensorflow,
-    fastai and fastcore).
+    the installed packages (versions of python, huggingface_hub, torch).
 
     Args:
         token (`str`, `bool`, *optional*):
@@ -86,11 +72,9 @@ def build_hf_headers(
         headers (`dict`, *optional*):
             Additional headers to include in the request. Those headers take precedence
             over the ones generated by this function.
-        is_write_action (`bool`):
-            Ignored and deprecated argument.
 
     Returns:
-        A `Dict` of headers to pass in your API call.
+        A `dict` of headers to pass in your API call.
 
     Example:
     ```py
@@ -176,7 +160,7 @@ def _http_user_agent(
     *,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Union[Dict, str, None] = None,
+    user_agent: Union[dict, str, None] = None,
 ) -> str:
     """Format a user-agent string containing information about the installed packages.
 
@@ -201,12 +185,6 @@ def _http_user_agent(
     if not constants.HF_HUB_DISABLE_TELEMETRY:
         if is_torch_available():
             ua += f"; torch/{get_torch_version()}"
-        if is_tf_available():
-            ua += f"; tensorflow/{get_tf_version()}"
-        if is_fastai_available():
-            ua += f"; fastai/{get_fastai_version()}"
-        if is_fastcore_available():
-            ua += f"; fastcore/{get_fastcore_version()}"
 
     if isinstance(user_agent, dict):
         ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
diff --git a/src/huggingface_hub/utils/_hf_folder.py b/src/huggingface_hub/utils/_hf_folder.py
deleted file mode 100644
index 6418bf2fd2..0000000000
--- a/src/huggingface_hub/utils/_hf_folder.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding=utf-8
-# Copyright 2022-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contain helper class to retrieve/store token from/to local cache."""
-
-from pathlib import Path
-from typing import Optional
-
-from .. import constants
-from ._auth import get_token
-
-
-class HfFolder:
-    # TODO: deprecate when adapted in transformers/datasets/gradio
-    # @_deprecate_method(version="1.0", message="Use `huggingface_hub.login` instead.")
-    @classmethod
-    def save_token(cls, token: str) -> None:
-        """
-        Save token, creating folder as needed.
-
-        Token is saved in the huggingface home folder. You can configure it by setting
-        the `HF_HOME` environment variable.
-
-        Args:
-            token (`str`):
-                The token to save to the [`HfFolder`]
-        """
-        path_token = Path(constants.HF_TOKEN_PATH)
-        path_token.parent.mkdir(parents=True, exist_ok=True)
-        path_token.write_text(token)
-
-    # TODO: deprecate when adapted in transformers/datasets/gradio
-    # @_deprecate_method(version="1.0", message="Use `huggingface_hub.get_token` instead.")
-    @classmethod
-    def get_token(cls) -> Optional[str]:
-        """
-        Get token or None if not existent.
-
-        This method is deprecated in favor of [`huggingface_hub.get_token`] but is kept for backward compatibility.
-        Its behavior is the same as [`huggingface_hub.get_token`].
-
-        Returns:
-            `str` or `None`: The token, `None` if it doesn't exist.
-        """
-        return get_token()
-
-    # TODO: deprecate when adapted in transformers/datasets/gradio
-    # @_deprecate_method(version="1.0", message="Use `huggingface_hub.logout` instead.")
-    @classmethod
-    def delete_token(cls) -> None:
-        """
-        Deletes the token from storage. Does not fail if token does not exist.
-        """
-        try:
-            Path(constants.HF_TOKEN_PATH).unlink()
-        except FileNotFoundError:
-            pass
diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py
index e01022274c..f98dfda517 100644
--- a/src/huggingface_hub/utils/_http.py
+++ b/src/huggingface_hub/utils/_http.py
@@ -12,22 +12,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Contains utilities to handle HTTP requests in Huggingface Hub."""
+"""Contains utilities to handle HTTP requests in huggingface_hub."""
 
+import atexit
 import io
-import os
+import json
 import re
 import threading
 import time
 import uuid
-from functools import lru_cache
+from contextlib import contextmanager
+from http import HTTPStatus
 from shlex import quote
-from typing import Any, Callable, List, Optional, Tuple, Type, Union
+from typing import Any, Callable, Generator, Optional, Union
 
-import requests
-from requests import HTTPError, Response
-from requests.adapters import HTTPAdapter
-from requests.models import PreparedRequest
+import httpx
 
 from huggingface_hub.errors import OfflineModeIsEnabled
 
@@ -35,14 +34,13 @@
 from ..errors import (
     BadRequestError,
     DisabledRepoError,
-    EntryNotFoundError,
     GatedRepoError,
     HfHubHTTPError,
+    RemoteEntryNotFoundError,
     RepositoryNotFoundError,
     RevisionNotFoundError,
 )
 from . import logging
-from ._fixes import JSONDecodeError
 from ._lfs import SliceFileObj
 from ._typing import HTTP_METHOD_T
 
@@ -71,142 +69,245 @@
 )
 
 
-class UniqueRequestIdAdapter(HTTPAdapter):
-    X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
+def hf_request_event_hook(request: httpx.Request) -> None:
+    """
+    Event hook that will be used to make HTTP requests to the Hugging Face Hub.
 
-    def add_headers(self, request, **kwargs):
-        super().add_headers(request, **kwargs)
+    What it does:
+    - Block requests if offline mode is enabled
+    - Add a request ID to the request headers
+    - Log the request if debug mode is enabled
+    """
+    if constants.HF_HUB_OFFLINE:
+        raise OfflineModeIsEnabled(
+            f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
+        )
 
-        # Add random request ID => easier for server-side debug
-        if X_AMZN_TRACE_ID not in request.headers:
-            request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
+    # Add random request ID => easier for server-side debugging
+    if X_AMZN_TRACE_ID not in request.headers:
+        request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
+    request_id = request.headers.get(X_AMZN_TRACE_ID)
 
-        # Add debug log
-        has_token = len(str(request.headers.get("authorization", ""))) > 0
-        logger.debug(
-            f"Request {request.headers[X_AMZN_TRACE_ID]}: {request.method} {request.url} (authenticated: {has_token})"
-        )
+    # Debug log
+    logger.debug(
+        "Request %s: %s %s (authenticated: %s)",
+        request_id,
+        request.method,
+        request.url,
+        request.headers.get("authorization") is not None,
+    )
+    if constants.HF_DEBUG:
+        logger.debug("Send: %s", _curlify(request))
 
-    def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
-        """Catch any RequestException to append request id to the error message for debugging."""
-        if constants.HF_DEBUG:
-            logger.debug(f"Send: {_curlify(request)}")
-        try:
-            return super().send(request, *args, **kwargs)
-        except requests.RequestException as e:
-            request_id = request.headers.get(X_AMZN_TRACE_ID)
-            if request_id is not None:
-                # Taken from https://stackoverflow.com/a/58270258
-                e.args = (*e.args, f"(Request ID: {request_id})")
-            raise
+    return request_id
 
 
-class OfflineAdapter(HTTPAdapter):
-    def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
-        raise OfflineModeIsEnabled(
-            f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
-        )
+def default_client_factory() -> httpx.Client:
+    """
+    Factory function to create a `httpx.Client` with the default transport.
+    """
+    return httpx.Client(
+        event_hooks={"request": [hf_request_event_hook]},
+        follow_redirects=True,
+        timeout=httpx.Timeout(constants.DEFAULT_REQUEST_TIMEOUT, write=60.0),
+    )
 
 
-def _default_backend_factory() -> requests.Session:
-    session = requests.Session()
-    if constants.HF_HUB_OFFLINE:
-        session.mount("http://", OfflineAdapter())
-        session.mount("https://", OfflineAdapter())
-    else:
-        session.mount("http://", UniqueRequestIdAdapter())
-        session.mount("https://", UniqueRequestIdAdapter())
-    return session
+def default_async_client_factory() -> httpx.AsyncClient:
+    """
+    Factory function to create a `httpx.AsyncClient` with the default transport.
+    """
+    return httpx.AsyncClient(
+        event_hooks={"request": [hf_request_event_hook]},
+        follow_redirects=True,
+        timeout=httpx.Timeout(constants.DEFAULT_REQUEST_TIMEOUT, write=60.0),
+    )
 
 
-BACKEND_FACTORY_T = Callable[[], requests.Session]
-_GLOBAL_BACKEND_FACTORY: BACKEND_FACTORY_T = _default_backend_factory
+CLIENT_FACTORY_T = Callable[[], httpx.Client]
+ASYNC_CLIENT_FACTORY_T = Callable[[], httpx.AsyncClient]
 
+_CLIENT_LOCK = threading.Lock()
+_GLOBAL_CLIENT_FACTORY: CLIENT_FACTORY_T = default_client_factory
+_GLOBAL_ASYNC_CLIENT_FACTORY: ASYNC_CLIENT_FACTORY_T = default_async_client_factory
+_GLOBAL_CLIENT: Optional[httpx.Client] = None
 
-def configure_http_backend(backend_factory: BACKEND_FACTORY_T = _default_backend_factory) -> None:
-    """
-    Configure the HTTP backend by providing a `backend_factory`. Any HTTP calls made by `huggingface_hub` will use a
-    Session object instantiated by this factory. This can be useful if you are running your scripts in a specific
-    environment requiring custom configuration (e.g. custom proxy or certifications).
 
-    Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
-    `huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
-    set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
-    calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
+def set_client_factory(client_factory: CLIENT_FACTORY_T) -> None:
+    """
+    Set the HTTP client factory to be used by `huggingface_hub`.
 
-    See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
+    The client factory is a method that returns a `httpx.Client` object. On the first call to [`get_client`] the client factory
+    will be used to create a new `httpx.Client` object that will be shared between all calls made by `huggingface_hub`.
 
-    Example:
-    ```py
-    import requests
-    from huggingface_hub import configure_http_backend, get_session
+    This can be useful if you are running your scripts in a specific environment requiring custom configuration (e.g. custom proxy or certifications).
 
-    # Create a factory function that returns a Session with configured proxies
-    def backend_factory() -> requests.Session:
-        session = requests.Session()
-        session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
-        return session
+    Use [`get_client`] to get a correctly configured `httpx.Client`.
+    """
+    global _GLOBAL_CLIENT_FACTORY
+    with _CLIENT_LOCK:
+        close_session()
+        _GLOBAL_CLIENT_FACTORY = client_factory
 
-    # Set it as the default session factory
-    configure_http_backend(backend_factory=backend_factory)
 
-    # In practice, this is mostly done internally in `huggingface_hub`
-    session = get_session()
-    ```
+def set_async_client_factory(async_client_factory: ASYNC_CLIENT_FACTORY_T) -> None:
     """
-    global _GLOBAL_BACKEND_FACTORY
-    _GLOBAL_BACKEND_FACTORY = backend_factory
-    reset_sessions()
+    Set the HTTP async client factory to be used by `huggingface_hub`.
+
+    The async client factory is a method that returns a `httpx.AsyncClient` object.
+    This can be useful if you are running your scripts in a specific environment requiring custom configuration (e.g. custom proxy or certifications).
+    Use [`get_async_client`] to get a correctly configured `httpx.AsyncClient`.
 
+    <Tip warning={true}>
 
-def get_session() -> requests.Session:
+    Contrary to the `httpx.Client` that is shared between all calls made by `huggingface_hub`, the `httpx.AsyncClient` is not shared.
+    It is recommended to use an async context manager to ensure the client is properly closed when the context is exited.
+
+    </Tip>
     """
-    Get a `requests.Session` object, using the session factory from the user.
+    global _GLOBAL_ASYNC_CLIENT_FACTORY
+    _GLOBAL_ASYNC_CLIENT_FACTORY = async_client_factory
 
-    Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
-    `huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
-    set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
-    calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
 
-    See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
+def get_session() -> httpx.Client:
+    """
+    Get a `httpx.Client` object, using the transport factory from the user.
 
-    Example:
-    ```py
-    import requests
-    from huggingface_hub import configure_http_backend, get_session
+    This client is shared between all calls made by `huggingface_hub`. Therefore you should not close it manually.
 
-    # Create a factory function that returns a Session with configured proxies
-    def backend_factory() -> requests.Session:
-        session = requests.Session()
-        session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
-        return session
+    Use [`set_client_factory`] to customize the `httpx.Client`.
+    """
+    global _GLOBAL_CLIENT
+    if _GLOBAL_CLIENT is None:
+        with _CLIENT_LOCK:
+            _GLOBAL_CLIENT = _GLOBAL_CLIENT_FACTORY()
+    return _GLOBAL_CLIENT
 
-    # Set it as the default session factory
-    configure_http_backend(backend_factory=backend_factory)
 
-    # In practice, this is mostly done internally in `huggingface_hub`
-    session = get_session()
-    ```
+def get_async_session() -> httpx.AsyncClient:
     """
-    return _get_session_from_cache(process_id=os.getpid(), thread_id=threading.get_ident())
+    Return a `httpx.AsyncClient` object, using the transport factory from the user.
 
+    Use [`set_async_client_factory`] to customize the `httpx.AsyncClient`.
 
-def reset_sessions() -> None:
-    """Reset the cache of sessions.
+    <Tip warning={true}>
 
-    Mostly used internally when sessions are reconfigured or an SSLError is raised.
-    See [`configure_http_backend`] for more details.
+    Contrary to the `httpx.Client` that is shared between all calls made by `huggingface_hub`, the `httpx.AsyncClient` is not shared.
+    It is recommended to use an async context manager to ensure the client is properly closed when the context is exited.
+
+    </Tip>
     """
-    _get_session_from_cache.cache_clear()
+    return _GLOBAL_ASYNC_CLIENT_FACTORY()
 
 
-@lru_cache
-def _get_session_from_cache(process_id: int, thread_id: int) -> requests.Session:
+def close_session() -> None:
     """
-    Create a new session per thread using global factory. Using LRU cache (maxsize 128) to avoid memory leaks when
-    using thousands of threads. Cache is cleared when `configure_http_backend` is called.
+    Close the global `httpx.Client` used by `huggingface_hub`.
+
+    If a Client is closed, it will be recreated on the next call to [`get_session`].
+
+    Can be useful if e.g. an SSL certificate has been updated.
     """
-    return _GLOBAL_BACKEND_FACTORY()
+    global _GLOBAL_CLIENT
+    client = _GLOBAL_CLIENT
+
+    # First, set global client to None
+    _GLOBAL_CLIENT = None
+
+    # Then, close the clients
+    if client is not None:
+        try:
+            client.close()
+        except Exception as e:
+            logger.warning(f"Error closing client: {e}")
+
+
+atexit.register(close_session)
+
+
+def _http_backoff_base(
+    method: HTTP_METHOD_T,
+    url: str,
+    *,
+    max_retries: int = 5,
+    base_wait_time: float = 1,
+    max_wait_time: float = 8,
+    retry_on_exceptions: Union[type[Exception], tuple[type[Exception], ...]] = (
+        httpx.TimeoutException,
+        httpx.NetworkError,
+    ),
+    retry_on_status_codes: Union[int, tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
+    stream: bool = False,
+    **kwargs,
+) -> Generator[httpx.Response, None, None]:
+    """Internal implementation of HTTP backoff logic shared between `http_backoff` and `http_stream_backoff`."""
+    if isinstance(retry_on_exceptions, type):  # Tuple from single exception type
+        retry_on_exceptions = (retry_on_exceptions,)
+
+    if isinstance(retry_on_status_codes, int):  # Tuple from single status code
+        retry_on_status_codes = (retry_on_status_codes,)
+
+    nb_tries = 0
+    sleep_time = base_wait_time
+
+    # If `data` is used and is a file object (or any IO), it will be consumed on the
+    # first HTTP request. We need to save the initial position so that the full content
+    # of the file is re-sent on http backoff. See warning tip in docstring.
+    io_obj_initial_pos = None
+    if "data" in kwargs and isinstance(kwargs["data"], (io.IOBase, SliceFileObj)):
+        io_obj_initial_pos = kwargs["data"].tell()
+
+    client = get_session()
+    while True:
+        nb_tries += 1
+        try:
+            # If `data` is used and is a file object (or any IO), set back cursor to
+            # initial position.
+            if io_obj_initial_pos is not None:
+                kwargs["data"].seek(io_obj_initial_pos)
+
+            # Perform request and handle response
+            def _should_retry(response: httpx.Response) -> bool:
+                """Handle response and return True if should retry, False if should return/yield."""
+                if response.status_code not in retry_on_status_codes:
+                    return False  # Success, don't retry
+
+                # Wrong status code returned (HTTP 503 for instance)
+                logger.warning(f"HTTP Error {response.status_code} thrown while requesting {method} {url}")
+                if nb_tries > max_retries:
+                    hf_raise_for_status(response)  # Will raise uncaught exception
+                    # Return/yield response to avoid infinite loop in the corner case where the
+                    # user ask for retry on a status code that doesn't raise_for_status.
+                    return False  # Don't retry, return/yield response
+
+                return True  # Should retry
+
+            if stream:
+                with client.stream(method=method, url=url, **kwargs) as response:
+                    if not _should_retry(response):
+                        yield response
+                        return
+            else:
+                response = client.request(method=method, url=url, **kwargs)
+                if not _should_retry(response):
+                    yield response
+                    return
+
+        except retry_on_exceptions as err:
+            logger.warning(f"'{err}' thrown while requesting {method} {url}")
+
+            if isinstance(err, httpx.ConnectError):
+                close_session()  # In case of SSLError it's best to close the shared httpx.Client objects
+
+            if nb_tries > max_retries:
+                raise err
+
+        # Sleep for X seconds
+        logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
+        time.sleep(sleep_time)
+
+        # Update sleep time for next retry
+        sleep_time = min(max_wait_time, sleep_time * 2)  # Exponential backoff
 
 
 def http_backoff(
@@ -216,14 +317,14 @@ def http_backoff(
     max_retries: int = 5,
     base_wait_time: float = 1,
     max_wait_time: float = 8,
-    retry_on_exceptions: Union[Type[Exception], Tuple[Type[Exception], ...]] = (
-        requests.Timeout,
-        requests.ConnectionError,
+    retry_on_exceptions: Union[type[Exception], tuple[type[Exception], ...]] = (
+        httpx.TimeoutException,
+        httpx.NetworkError,
     ),
-    retry_on_status_codes: Union[int, Tuple[int, ...]] = (500, 502, 503, 504),
+    retry_on_status_codes: Union[int, tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
     **kwargs,
-) -> Response:
-    """Wrapper around requests to retry calls on an endpoint, with exponential backoff.
+) -> httpx.Response:
+    """Wrapper around httpx to retry calls on an endpoint, with exponential backoff.
 
     Endpoint call is retried on exceptions (ex: connection timeout, proxy error,...)
     and/or on specific status codes (ex: service unavailable). If the call failed more
@@ -246,19 +347,20 @@ def http_backoff(
             `max_wait_time`.
         max_wait_time (`float`, *optional*, defaults to `8`):
             Maximum duration (in seconds) to wait before retrying.
-        retry_on_exceptions (`Type[Exception]` or `Tuple[Type[Exception]]`, *optional*):
+        retry_on_exceptions (`type[Exception]` or `tuple[type[Exception]]`, *optional*):
             Define which exceptions must be caught to retry the request. Can be a single type or a tuple of types.
-            By default, retry on `requests.Timeout` and `requests.ConnectionError`.
-        retry_on_status_codes (`int` or `Tuple[int]`, *optional*, defaults to `(500, 502, 503, 504)`):
-            Define on which status codes the request must be retried. By default, 5xx errors are retried.
+            By default, retry on `httpx.TimeoutException` and `httpx.NetworkError`.
+        retry_on_status_codes (`int` or `tuple[int]`, *optional*, defaults to `503`):
+            Define on which status codes the request must be retried. By default, only
+            HTTP 503 Service Unavailable is retried.
         **kwargs (`dict`, *optional*):
-            kwargs to pass to `requests.request`.
+            kwargs to pass to `httpx.request`.
 
     Example:
     ```
     >>> from huggingface_hub.utils import http_backoff
 
-    # Same usage as "requests.request".
+    # Same usage as "httpx.request".
     >>> response = http_backoff("GET", "https://www.google.com")
     >>> response.raise_for_status()
 
@@ -276,59 +378,105 @@ def http_backoff(
     > will fail. If this is a hard constraint for you, please let us know by opening an
     > issue on [Github](https://github.com/huggingface/huggingface_hub).
     """
-    if isinstance(retry_on_exceptions, type):  # Tuple from single exception type
-        retry_on_exceptions = (retry_on_exceptions,)
+    return next(
+        _http_backoff_base(
+            method=method,
+            url=url,
+            max_retries=max_retries,
+            base_wait_time=base_wait_time,
+            max_wait_time=max_wait_time,
+            retry_on_exceptions=retry_on_exceptions,
+            retry_on_status_codes=retry_on_status_codes,
+            stream=False,
+            **kwargs,
+        )
+    )
 
-    if isinstance(retry_on_status_codes, int):  # Tuple from single status code
-        retry_on_status_codes = (retry_on_status_codes,)
 
-    nb_tries = 0
-    sleep_time = base_wait_time
+@contextmanager
+def http_stream_backoff(
+    method: HTTP_METHOD_T,
+    url: str,
+    *,
+    max_retries: int = 5,
+    base_wait_time: float = 1,
+    max_wait_time: float = 8,
+    retry_on_exceptions: Union[type[Exception], tuple[type[Exception], ...]] = (
+        httpx.TimeoutException,
+        httpx.NetworkError,
+    ),
+    retry_on_status_codes: Union[int, tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
+    **kwargs,
+) -> Generator[httpx.Response, None, None]:
+    """Wrapper around httpx to retry calls on an endpoint, with exponential backoff.
 
-    # If `data` is used and is a file object (or any IO), it will be consumed on the
-    # first HTTP request. We need to save the initial position so that the full content
-    # of the file is re-sent on http backoff. See warning tip in docstring.
-    io_obj_initial_pos = None
-    if "data" in kwargs and isinstance(kwargs["data"], (io.IOBase, SliceFileObj)):
-        io_obj_initial_pos = kwargs["data"].tell()
+    Endpoint call is retried on exceptions (ex: connection timeout, proxy error,...)
+    and/or on specific status codes (ex: service unavailable). If the call failed more
+    than `max_retries`, the exception is thrown or `raise_for_status` is called on the
+    response object.
 
-    session = get_session()
-    while True:
-        nb_tries += 1
-        try:
-            # If `data` is used and is a file object (or any IO), set back cursor to
-            # initial position.
-            if io_obj_initial_pos is not None:
-                kwargs["data"].seek(io_obj_initial_pos)
+    Re-implement mechanisms from the `backoff` library to avoid adding an external
+    dependencies to `hugging_face_hub`. See https://github.com/litl/backoff.
 
-            # Perform request and return if status_code is not in the retry list.
-            response = session.request(method=method, url=url, **kwargs)
-            if response.status_code not in retry_on_status_codes:
-                return response
+    Args:
+        method (`Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]`):
+            HTTP method to perform.
+        url (`str`):
+            The URL of the resource to fetch.
+        max_retries (`int`, *optional*, defaults to `5`):
+            Maximum number of retries, defaults to 5 (no retries).
+        base_wait_time (`float`, *optional*, defaults to `1`):
+            Duration (in seconds) to wait before retrying the first time.
+            Wait time between retries then grows exponentially, capped by
+            `max_wait_time`.
+        max_wait_time (`float`, *optional*, defaults to `8`):
+            Maximum duration (in seconds) to wait before retrying.
+        retry_on_exceptions (`type[Exception]` or `tuple[type[Exception]]`, *optional*):
+            Define which exceptions must be caught to retry the request. Can be a single type or a tuple of types.
+            By default, retry on `httpx.Timeout` and `httpx.NetworkError`.
+        retry_on_status_codes (`int` or `tuple[int]`, *optional*, defaults to `503`):
+            Define on which status codes the request must be retried. By default, only
+            HTTP 503 Service Unavailable is retried.
+        **kwargs (`dict`, *optional*):
+            kwargs to pass to `httpx.request`.
 
-            # Wrong status code returned (HTTP 503 for instance)
-            logger.warning(f"HTTP Error {response.status_code} thrown while requesting {method} {url}")
-            if nb_tries > max_retries:
-                response.raise_for_status()  # Will raise uncaught exception
-                # We return response to avoid infinite loop in the corner case where the
-                # user ask for retry on a status code that doesn't raise_for_status.
-                return response
+    Example:
+    ```
+    >>> from huggingface_hub.utils import http_stream_backoff
 
-        except retry_on_exceptions as err:
-            logger.warning(f"'{err}' thrown while requesting {method} {url}")
+    # Same usage as "httpx.stream".
+    >>> with http_stream_backoff("GET", "https://www.google.com") as response:
+    ...     for chunk in response.iter_bytes():
+    ...         print(chunk)
 
-            if isinstance(err, requests.ConnectionError):
-                reset_sessions()  # In case of SSLError it's best to reset the shared requests.Session objects
+    # If you expect a Gateway Timeout from time to time
+    >>> with http_stream_backoff("PUT", upload_url, data=data, retry_on_status_codes=504) as response:
+    ...     response.raise_for_status()
+    ```
 
-            if nb_tries > max_retries:
-                raise err
+    <Tip warning={true}>
 
-        # Sleep for X seconds
-        logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
-        time.sleep(sleep_time)
+    When using `httpx` it is possible to stream data by passing an iterator to the
+    `data` argument. On http backoff this is a problem as the iterator is not reset
+    after a failed call. This issue is mitigated for file objects or any IO streams
+    by saving the initial position of the cursor (with `data.tell()`) and resetting the
+    cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
+    will fail. If this is a hard constraint for you, please let us know by opening an
+    issue on [Github](https://github.com/huggingface/huggingface_hub).
 
-        # Update sleep time for next retry
-        sleep_time = min(max_wait_time, sleep_time * 2)  # Exponential backoff
+    </Tip>
+    """
+    yield from _http_backoff_base(
+        method=method,
+        url=url,
+        max_retries=max_retries,
+        base_wait_time=base_wait_time,
+        max_wait_time=max_wait_time,
+        retry_on_exceptions=retry_on_exceptions,
+        retry_on_status_codes=retry_on_status_codes,
+        stream=True,
+        **kwargs,
+    )
 
 
 def fix_hf_endpoint_in_url(url: str, endpoint: Optional[str]) -> str:
@@ -344,38 +492,18 @@ def fix_hf_endpoint_in_url(url: str, endpoint: Optional[str]) -> str:
     return url
 
 
-def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None) -> None:
+def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] = None) -> None:
     """
-    Internal version of `response.raise_for_status()` that will refine a
-    potential HTTPError. Raised exception will be an instance of `HfHubHTTPError`.
+    Internal version of `response.raise_for_status()` that will refine a potential HTTPError.
+    Raised exception will be an instance of [`~errors.HfHubHTTPError`].
 
-    This helper is meant to be the unique method to raise_for_status when making a call
-    to the Hugging Face Hub.
-
-
-    Example:
-    ```py
-        import requests
-        from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
-
-        response = get_session().post(...)
-        try:
-            hf_raise_for_status(response)
-        except HfHubHTTPError as e:
-            print(str(e)) # formatted message
-            e.request_id, e.server_message # details returned by server
-
-            # Complete the error message with additional information once it's raised
-            e.append_to_message("\n`create_commit` expects the repository to exist.")
-            raise
-    ```
+    This helper is meant to be the unique method to raise_for_status when making a call to the Hugging Face Hub.
 
     Args:
         response (`Response`):
             Response from the server.
         endpoint_name (`str`, *optional*):
-            Name of the endpoint that has been called. If provided, the error message
-            will be more complete.
+            Name of the endpoint that has been called. If provided, the error message will be more complete.
 
     > [!WARNING]
     > Raises when the request has failed:
@@ -399,7 +527,10 @@ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None)
     """
     try:
         response.raise_for_status()
-    except HTTPError as e:
+    except httpx.HTTPStatusError as e:
+        if response.status_code // 100 == 3:
+            return  # Do not raise on redirects to stay consistent with `requests`
+
         error_code = response.headers.get("X-Error-Code")
         error_message = response.headers.get("X-Error-Message")
 
@@ -409,7 +540,7 @@ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None)
 
         elif error_code == "EntryNotFound":
             message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
-            raise _format(EntryNotFoundError, message, response) from e
+            raise _format(RemoteEntryNotFoundError, message, response) from e
 
         elif error_code == "GatedRepo":
             message = (
@@ -432,7 +563,7 @@ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None)
             and error_message != "Invalid credentials in Authorization header"
             and response.request is not None
             and response.request.url is not None
-            and REPO_API_REGEX.search(response.request.url) is not None
+            and REPO_API_REGEX.search(str(response.request.url)) is not None
         ):
             # 401 is misleading as it is returned for:
             #    - private and gated repos if user is not authenticated
@@ -474,7 +605,7 @@ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None)
         raise _format(HfHubHTTPError, str(e), response) from e
 
 
-def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Response) -> HfHubHTTPError:
+def _format(error_type: type[HfHubHTTPError], custom_message: str, response: httpx.Response) -> HfHubHTTPError:
     server_errors = []
 
     # Retrieve server error from header
@@ -485,7 +616,11 @@ def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Res
     # Retrieve server error from body
     try:
         # Case errors are returned in a JSON format
-        data = response.json()
+        try:
+            data = response.json()
+        except httpx.ResponseNotRead:
+            response.read()  # In case of streaming response, we need to read the response first
+            data = response.json()
 
         error = data.get("error")
         if error is not None:
@@ -503,7 +638,7 @@ def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Res
                 if "message" in error:
                     server_errors.append(error["message"])
 
-    except JSONDecodeError:
+    except json.JSONDecodeError:
         # If content is not JSON and not HTML, append the text
         content_type = response.headers.get("Content-Type", "")
         if response.text and "html" not in content_type.lower():
@@ -548,15 +683,15 @@ def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Res
     return error_type(final_error_message.strip(), response=response, server_message=server_message or None)
 
 
-def _curlify(request: requests.PreparedRequest) -> str:
-    """Convert a `requests.PreparedRequest` into a curl command (str).
+def _curlify(request: httpx.Request) -> str:
+    """Convert a `httpx.Request` into a curl command (str).
 
     Used for debug purposes only.
 
     Implementation vendored from https://github.com/ofw/curlify/blob/master/curlify.py.
     MIT License Copyright (c) 2016 Egor.
     """
-    parts: List[Tuple[Any, Any]] = [
+    parts: list[tuple[Any, Any]] = [
         ("curl", None),
         ("-X", request.method),
     ]
@@ -564,16 +699,16 @@ def _curlify(request: requests.PreparedRequest) -> str:
     for k, v in sorted(request.headers.items()):
         if k.lower() == "authorization":
             v = "<TOKEN>"  # Hide authorization header, no matter its value (can be Bearer, Key, etc.)
-        parts += [("-H", "{0}: {1}".format(k, v))]
-
-    if request.body:
-        body = request.body
-        if isinstance(body, bytes):
-            body = body.decode("utf-8", errors="ignore")
-        elif hasattr(body, "read"):
-            body = "<file-like object>"  # Don't try to read it to avoid consuming the stream
+        parts += [("-H", f"{k}: {v}")]
+
+    body: Optional[str] = None
+    if request.content is not None:
+        body = request.content.decode("utf-8", errors="ignore")
         if len(body) > 1000:
-            body = body[:1000] + " ... [truncated]"
+            body = f"{body[:1000]} ... [truncated]"
+    elif request.stream is not None:
+        body = "<streaming body>"
+    if body is not None:
         parts += [("-d", body.replace("\n", ""))]
 
     parts += [(None, request.url)]
@@ -581,9 +716,9 @@ def _curlify(request: requests.PreparedRequest) -> str:
     flat_parts = []
     for k, v in parts:
         if k:
-            flat_parts.append(quote(k))
+            flat_parts.append(quote(str(k)))
         if v:
-            flat_parts.append(quote(v))
+            flat_parts.append(quote(str(v)))
 
     return " ".join(flat_parts)
 
diff --git a/src/huggingface_hub/utils/_pagination.py b/src/huggingface_hub/utils/_pagination.py
index 3ef2b6668b..275d5d5f5a 100644
--- a/src/huggingface_hub/utils/_pagination.py
+++ b/src/huggingface_hub/utils/_pagination.py
@@ -14,9 +14,9 @@
 # limitations under the License.
 """Contains utilities to handle pagination on Huggingface Hub."""
 
-from typing import Dict, Iterable, Optional
+from typing import Iterable, Optional
 
-import requests
+import httpx
 
 from . import get_session, hf_raise_for_status, http_backoff, logging
 
@@ -24,7 +24,7 @@
 logger = logging.get_logger(__name__)
 
 
-def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
+def paginate(path: str, params: dict, headers: dict) -> Iterable:
     """Fetch a list of models/datasets/spaces and paginate through results.
 
     This is using the same "Link" header format as GitHub.
@@ -48,5 +48,5 @@ def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
         next_page = _get_next_page(r)
 
 
-def _get_next_page(response: requests.Response) -> Optional[str]:
+def _get_next_page(response: httpx.Response) -> Optional[str]:
     return response.links.get("next", {}).get("url")
diff --git a/src/huggingface_hub/utils/_paths.py b/src/huggingface_hub/utils/_paths.py
index 4f2c0ebce0..f4d48c2cfe 100644
--- a/src/huggingface_hub/utils/_paths.py
+++ b/src/huggingface_hub/utils/_paths.py
@@ -16,7 +16,7 @@
 
 from fnmatch import fnmatch
 from pathlib import Path
-from typing import Callable, Generator, Iterable, List, Optional, TypeVar, Union
+from typing import Callable, Generator, Iterable, Optional, TypeVar, Union
 
 
 T = TypeVar("T")
@@ -39,8 +39,8 @@
 def filter_repo_objects(
     items: Iterable[T],
     *,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
     key: Optional[Callable[[T], str]] = None,
 ) -> Generator[T, None, None]:
     """Filter repo objects based on an allowlist and a denylist.
@@ -55,10 +55,10 @@ def filter_repo_objects(
     Args:
         items (`Iterable`):
             List of items to filter.
-        allow_patterns (`str` or `List[str]`, *optional*):
+        allow_patterns (`str` or `list[str]`, *optional*):
             Patterns constituting the allowlist. If provided, item paths must match at
             least one pattern from the allowlist.
-        ignore_patterns (`str` or `List[str]`, *optional*):
+        ignore_patterns (`str` or `list[str]`, *optional*):
             Patterns constituting the denylist. If provided, item paths must not match
             any patterns from the denylist.
         key (`Callable[[T], str]`, *optional*):
diff --git a/src/huggingface_hub/utils/_runtime.py b/src/huggingface_hub/utils/_runtime.py
index 9e38e6da74..445be52baf 100644
--- a/src/huggingface_hub/utils/_runtime.py
+++ b/src/huggingface_hub/utils/_runtime.py
@@ -19,7 +19,8 @@
 import platform
 import sys
 import warnings
-from typing import Any, Dict
+from pathlib import Path
+from typing import Any, Literal
 
 from .. import __version__, constants
 
@@ -38,6 +39,7 @@
     "hf_transfer": {"hf_transfer"},
     "hf_xet": {"hf_xet"},
     "jinja": {"Jinja2"},
+    "httpx": {"httpx"},
     "keras": {"keras"},
     "numpy": {"numpy"},
     "pillow": {"Pillow"},
@@ -152,6 +154,15 @@ def get_hf_transfer_version() -> str:
     return _get_version("hf_transfer")
 
 
+# httpx
+def is_httpx_available() -> bool:
+    return is_package_available("httpx")
+
+
+def get_httpx_version() -> str:
+    return _get_version("httpx")
+
+
 # xet
 def is_xet_available() -> bool:
     # since hf_xet is automatically used if available, allow explicit disabling via environment variable
@@ -312,7 +323,50 @@ def is_colab_enterprise() -> bool:
     return os.environ.get("VERTEX_PRODUCT") == "COLAB_ENTERPRISE"
 
 
-def dump_environment_info() -> Dict[str, Any]:
+# Check how huggingface_hub has been installed
+
+
+def installation_method() -> Literal["brew", "hf_installer", "unknown"]:
+    """Return the installation method of the current environment.
+
+    - "hf_installer" if installed via the official installer script
+    - "brew" if installed via Homebrew
+    - "unknown" otherwise
+    """
+    if _is_brew_installation():
+        return "brew"
+    elif _is_hf_installer_installation():
+        return "hf_installer"
+    else:
+        return "unknown"
+
+
+def _is_brew_installation() -> bool:
+    """Check if running from a Homebrew installation.
+
+    Note: AI-generated by Claude.
+    """
+    exe_path = Path(sys.executable).resolve()
+    exe_str = str(exe_path)
+
+    # Check common Homebrew paths
+    # /opt/homebrew (Apple Silicon), /usr/local (Intel)
+    return "/Cellar/" in exe_str or "/opt/homebrew/" in exe_str or exe_str.startswith("/usr/local/Cellar/")
+
+
+def _is_hf_installer_installation() -> bool:
+    """Return `True` if the current environment was set up via the official hf installer script.
+
+    i.e. using one of
+        curl -LsSf https://hf.co/cli/install.sh | sh
+        powershell -ExecutionPolicy ByPass -c "irm https://hf.co/cli/install.ps1 | iex"
+    """
+    venv = sys.prefix  # points to venv root if active
+    marker = Path(venv) / ".hf_installer_marker"
+    return marker.exists()
+
+
+def dump_environment_info() -> dict[str, Any]:
     """Dump information about the machine to help debugging issues.
 
     Similar helper exist in:
@@ -326,7 +380,7 @@ def dump_environment_info() -> Dict[str, Any]:
     token = get_token()
 
     # Generic machine info
-    info: Dict[str, Any] = {
+    info: dict[str, Any] = {
         "huggingface_hub version": get_hf_hub_version(),
         "Platform": platform.platform(),
         "Python version": get_python_version(),
@@ -356,22 +410,17 @@ def dump_environment_info() -> Dict[str, Any]:
     except Exception:
         pass
 
+    # How huggingface_hub has been installed?
+    info["Installation method"] = installation_method()
+
     # Installed dependencies
-    info["FastAI"] = get_fastai_version()
-    info["Tensorflow"] = get_tf_version()
     info["Torch"] = get_torch_version()
-    info["Jinja2"] = get_jinja_version()
-    info["Graphviz"] = get_graphviz_version()
-    info["keras"] = get_keras_version()
-    info["Pydot"] = get_pydot_version()
-    info["Pillow"] = get_pillow_version()
+    info["httpx"] = get_httpx_version()
     info["hf_transfer"] = get_hf_transfer_version()
+    info["hf_xet"] = get_xet_version()
     info["gradio"] = get_gradio_version()
     info["tensorboard"] = get_tensorboard_version()
-    info["numpy"] = get_numpy_version()
     info["pydantic"] = get_pydantic_version()
-    info["aiohttp"] = get_aiohttp_version()
-    info["hf_xet"] = get_xet_version()
 
     # Environment variables
     info["ENDPOINT"] = constants.ENDPOINT
diff --git a/src/huggingface_hub/utils/_safetensors.py b/src/huggingface_hub/utils/_safetensors.py
index 38546c6d34..8b9c257055 100644
--- a/src/huggingface_hub/utils/_safetensors.py
+++ b/src/huggingface_hub/utils/_safetensors.py
@@ -2,7 +2,7 @@
 import operator
 from collections import defaultdict
 from dataclasses import dataclass, field
-from typing import Dict, List, Literal, Optional, Tuple
+from typing import Literal, Optional
 
 
 FILENAME_T = str
@@ -19,17 +19,17 @@ class TensorInfo:
     Attributes:
         dtype (`str`):
             The data type of the tensor ("F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL").
-        shape (`List[int]`):
+        shape (`list[int]`):
             The shape of the tensor.
-        data_offsets (`Tuple[int, int]`):
+        data_offsets (`tuple[int, int]`):
             The offsets of the data in the file as a tuple `[BEGIN, END]`.
         parameter_count (`int`):
             The number of parameters in the tensor.
     """
 
     dtype: DTYPE_T
-    shape: List[int]
-    data_offsets: Tuple[int, int]
+    shape: list[int]
+    data_offsets: tuple[int, int]
     parameter_count: int = field(init=False)
 
     def __post_init__(self) -> None:
@@ -49,22 +49,22 @@ class SafetensorsFileMetadata:
     For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
 
     Attributes:
-        metadata (`Dict`):
+        metadata (`dict`):
             The metadata contained in the file.
-        tensors (`Dict[str, TensorInfo]`):
+        tensors (`dict[str, TensorInfo]`):
             A map of all tensors. Keys are tensor names and values are information about the corresponding tensor, as a
             [`TensorInfo`] object.
-        parameter_count (`Dict[str, int]`):
+        parameter_count (`dict[str, int]`):
             A map of the number of parameters per data type. Keys are data types and values are the number of parameters
             of that data type.
     """
 
-    metadata: Dict[str, str]
-    tensors: Dict[TENSOR_NAME_T, TensorInfo]
-    parameter_count: Dict[DTYPE_T, int] = field(init=False)
+    metadata: dict[str, str]
+    tensors: dict[TENSOR_NAME_T, TensorInfo]
+    parameter_count: dict[DTYPE_T, int] = field(init=False)
 
     def __post_init__(self) -> None:
-        parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
+        parameter_count: dict[DTYPE_T, int] = defaultdict(int)
         for tensor in self.tensors.values():
             parameter_count[tensor.dtype] += tensor.parameter_count
         self.parameter_count = dict(parameter_count)
@@ -82,29 +82,29 @@ class SafetensorsRepoMetadata:
     For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
 
     Attributes:
-        metadata (`Dict`, *optional*):
+        metadata (`dict`, *optional*):
             The metadata contained in the 'model.safetensors.index.json' file, if it exists. Only populated for sharded
             models.
         sharded (`bool`):
             Whether the repo contains a sharded model or not.
-        weight_map (`Dict[str, str]`):
+        weight_map (`dict[str, str]`):
             A map of all weights. Keys are tensor names and values are filenames of the files containing the tensors.
-        files_metadata (`Dict[str, SafetensorsFileMetadata]`):
+        files_metadata (`dict[str, SafetensorsFileMetadata]`):
             A map of all files metadata. Keys are filenames and values are the metadata of the corresponding file, as
             a [`SafetensorsFileMetadata`] object.
-        parameter_count (`Dict[str, int]`):
+        parameter_count (`dict[str, int]`):
             A map of the number of parameters per data type. Keys are data types and values are the number of parameters
             of that data type.
     """
 
-    metadata: Optional[Dict]
+    metadata: Optional[dict]
     sharded: bool
-    weight_map: Dict[TENSOR_NAME_T, FILENAME_T]  # tensor name -> filename
-    files_metadata: Dict[FILENAME_T, SafetensorsFileMetadata]  # filename -> metadata
-    parameter_count: Dict[DTYPE_T, int] = field(init=False)
+    weight_map: dict[TENSOR_NAME_T, FILENAME_T]  # tensor name -> filename
+    files_metadata: dict[FILENAME_T, SafetensorsFileMetadata]  # filename -> metadata
+    parameter_count: dict[DTYPE_T, int] = field(init=False)
 
     def __post_init__(self) -> None:
-        parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
+        parameter_count: dict[DTYPE_T, int] = defaultdict(int)
         for file_metadata in self.files_metadata.values():
             for dtype, nb_parameters_ in file_metadata.parameter_count.items():
                 parameter_count[dtype] += nb_parameters_
diff --git a/src/huggingface_hub/utils/_subprocess.py b/src/huggingface_hub/utils/_subprocess.py
index fdabf1c4df..e2b9a4f2f1 100644
--- a/src/huggingface_hub/utils/_subprocess.py
+++ b/src/huggingface_hub/utils/_subprocess.py
@@ -20,7 +20,7 @@
 from contextlib import contextmanager
 from io import StringIO
 from pathlib import Path
-from typing import IO, Generator, List, Optional, Tuple, Union
+from typing import IO, Generator, Optional, Union
 
 from .logging import get_logger
 
@@ -51,7 +51,7 @@ def capture_output() -> Generator[StringIO, None, None]:
 
 
 def run_subprocess(
-    command: Union[str, List[str]],
+    command: Union[str, list[str]],
     folder: Optional[Union[str, Path]] = None,
     check=True,
     **kwargs,
@@ -62,7 +62,7 @@ def run_subprocess(
     be captured.
 
     Args:
-        command (`str` or `List[str]`):
+        command (`str` or `list[str]`):
             The command to execute as a string or list of strings.
         folder (`str`, *optional*):
             The folder in which to run the command. Defaults to current working
@@ -70,7 +70,7 @@ def run_subprocess(
         check (`bool`, *optional*, defaults to `True`):
             Setting `check` to `True` will raise a `subprocess.CalledProcessError`
             when the subprocess has a non-zero exit code.
-        kwargs (`Dict[str]`):
+        kwargs (`dict[str]`):
             Keyword arguments to be passed to the `subprocess.run` underlying command.
 
     Returns:
@@ -96,23 +96,23 @@ def run_subprocess(
 
 @contextmanager
 def run_interactive_subprocess(
-    command: Union[str, List[str]],
+    command: Union[str, list[str]],
     folder: Optional[Union[str, Path]] = None,
     **kwargs,
-) -> Generator[Tuple[IO[str], IO[str]], None, None]:
+) -> Generator[tuple[IO[str], IO[str]], None, None]:
     """Run a subprocess in an interactive mode in a context manager.
 
     Args:
-        command (`str` or `List[str]`):
+        command (`str` or `list[str]`):
             The command to execute as a string or list of strings.
         folder (`str`, *optional*):
             The folder in which to run the command. Defaults to current working
             directory (from `os.getcwd()`).
-        kwargs (`Dict[str]`):
+        kwargs (`dict[str]`):
             Keyword arguments to be passed to the `subprocess.run` underlying command.
 
     Returns:
-        `Tuple[IO[str], IO[str]]`: A tuple with `stdin` and `stdout` to interact
+        `tuple[IO[str], IO[str]]`: A tuple with `stdin` and `stdout` to interact
         with the process (input and output are utf-8 encoded).
 
     Example:
diff --git a/src/huggingface_hub/utils/_telemetry.py b/src/huggingface_hub/utils/_telemetry.py
index 2ba4a6349a..e8f0bd0345 100644
--- a/src/huggingface_hub/utils/_telemetry.py
+++ b/src/huggingface_hub/utils/_telemetry.py
@@ -1,6 +1,6 @@
 from queue import Queue
 from threading import Lock, Thread
-from typing import Dict, Optional, Union
+from typing import Optional, Union
 from urllib.parse import quote
 
 from .. import constants, logging
@@ -22,7 +22,7 @@ def send_telemetry(
     *,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Union[Dict, str, None] = None,
+    user_agent: Union[dict, str, None] = None,
 ) -> None:
     """
     Sends telemetry that helps tracking usage of different HF libraries.
@@ -98,7 +98,7 @@ def _send_telemetry_in_thread(
     *,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Union[Dict, str, None] = None,
+    user_agent: Union[dict, str, None] = None,
 ) -> None:
     """Contains the actual data sending data to the Hub.
 
diff --git a/src/huggingface_hub/commands/_cli_utils.py b/src/huggingface_hub/utils/_terminal.py
similarity index 81%
rename from src/huggingface_hub/commands/_cli_utils.py
rename to src/huggingface_hub/utils/_terminal.py
index bf4a1c0373..6463b4b3cc 100644
--- a/src/huggingface_hub/commands/_cli_utils.py
+++ b/src/huggingface_hub/utils/_terminal.py
@@ -1,4 +1,4 @@
-# Copyright 2022 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,10 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Contains a utility for good-looking prints."""
+"""Contains utilities to print stuff to the terminal (styling, helpers)."""
 
 import os
-from typing import List, Union
+from typing import Union
 
 
 class ANSI:
@@ -52,7 +52,7 @@ def _format(cls, s: str, code: str) -> str:
         return f"{code}{s}{cls._reset}"
 
 
-def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
+def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str:
     """
     Inspired by:
 
@@ -67,8 +67,3 @@ def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
     for row in rows:
         lines.append(row_format.format(*row))
     return "\n".join(lines)
-
-
-def show_deprecation_warning(old_command: str, new_command: str):
-    """Show a yellow warning about deprecated CLI command."""
-    print(ANSI.yellow(f"⚠️  Warning: '{old_command}' is deprecated. Use '{new_command}' instead."))
diff --git a/src/huggingface_hub/utils/_typing.py b/src/huggingface_hub/utils/_typing.py
index 8c5d6381a2..1573d7b285 100644
--- a/src/huggingface_hub/utils/_typing.py
+++ b/src/huggingface_hub/utils/_typing.py
@@ -15,10 +15,10 @@
 """Handle typing imports based on system compatibility."""
 
 import sys
-from typing import Any, Callable, List, Literal, Optional, Set, Type, TypeVar, Union, get_args, get_origin
+from typing import Any, Callable, Literal, Optional, Type, TypeVar, Union, get_args, get_origin
 
 
-UNION_TYPES: List[Any] = [Union]
+UNION_TYPES: list[Any] = [Union]
 if sys.version_info >= (3, 10):
     from types import UnionType
 
@@ -33,7 +33,7 @@
 _JSON_SERIALIZABLE_TYPES = (int, float, str, bool, type(None))
 
 
-def is_jsonable(obj: Any, _visited: Optional[Set[int]] = None) -> bool:
+def is_jsonable(obj: Any, _visited: Optional[set[int]] = None) -> bool:
     """Check if an object is JSON serializable.
 
     This is a weak check, as it does not check for the actual JSON serialization, but only for the types of the object.
diff --git a/src/huggingface_hub/utils/_validators.py b/src/huggingface_hub/utils/_validators.py
index 4bc219611b..47b9d0d3f7 100644
--- a/src/huggingface_hub/utils/_validators.py
+++ b/src/huggingface_hub/utils/_validators.py
@@ -19,7 +19,7 @@
 import warnings
 from functools import wraps
 from itertools import chain
-from typing import Any, Dict
+from typing import Any
 
 from huggingface_hub.errors import HFValidationError
 
@@ -48,9 +48,7 @@ def validate_hf_hub_args(fn: CallableT) -> CallableT:
     Validators:
         - [`~utils.validate_repo_id`]: `repo_id` must be `"repo_name"`
           or `"namespace/repo_name"`. Namespace is a username or an organization.
-        - [`~utils.smoothly_deprecate_use_auth_token`]: Use `token` instead of
-          `use_auth_token` (only if `use_auth_token` is not expected by the decorated
-          function - in practice, always the case in `huggingface_hub`).
+        - [`~utils.smoothly_deprecate_legacy_arguments`]: Ignore `proxies` when downloading files (should be set globally).
 
     Example:
     ```py
@@ -68,20 +66,6 @@ def validate_hf_hub_args(fn: CallableT) -> CallableT:
 
     >>> my_cool_method(repo_id="other..repo..id")
     huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
-
-    >>> @validate_hf_hub_args
-    ... def my_cool_auth_method(token: str):
-    ...     print(token)
-
-    >>> my_cool_auth_method(token="a token")
-    "a token"
-
-    >>> my_cool_auth_method(use_auth_token="a use_auth_token")
-    "a use_auth_token"
-
-    >>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token")
-    UserWarning: Both `token` and `use_auth_token` are passed (...)
-    "a token"
     ```
 
     Raises:
@@ -91,13 +75,8 @@ def validate_hf_hub_args(fn: CallableT) -> CallableT:
     # TODO: add an argument to opt-out validation for specific argument?
     signature = inspect.signature(fn)
 
-    # Should the validator switch `use_auth_token` values to `token`? In practice, always
-    # True in `huggingface_hub`. Might not be the case in a downstream library.
-    check_use_auth_token = "use_auth_token" not in signature.parameters and "token" in signature.parameters
-
     @wraps(fn)
     def _inner_fn(*args, **kwargs):
-        has_token = False
         for arg_name, arg_value in chain(
             zip(signature.parameters, args),  # Args values
             kwargs.items(),  # Kwargs values
@@ -105,11 +84,7 @@ def _inner_fn(*args, **kwargs):
             if arg_name in ["repo_id", "from_id", "to_id"]:
                 validate_repo_id(arg_value)
 
-            elif arg_name == "token" and arg_value is not None:
-                has_token = True
-
-        if check_use_auth_token:
-            kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
+        kwargs = smoothly_deprecate_legacy_arguments(fn_name=fn.__name__, kwargs=kwargs)
 
         return fn(*args, **kwargs)
 
@@ -170,57 +145,63 @@ def validate_repo_id(repo_id: str) -> None:
         raise HFValidationError(f"Repo_id cannot end by '.git': '{repo_id}'.")
 
 
-def smoothly_deprecate_use_auth_token(fn_name: str, has_token: bool, kwargs: Dict[str, Any]) -> Dict[str, Any]:
-    """Smoothly deprecate `use_auth_token` in the `huggingface_hub` codebase.
-
-    The long-term goal is to remove any mention of `use_auth_token` in the codebase in
-    favor of a unique and less verbose `token` argument. This will be done a few steps:
+def smoothly_deprecate_legacy_arguments(fn_name: str, kwargs: dict[str, Any]) -> dict[str, Any]:
+    """Smoothly deprecate legacy arguments in the `huggingface_hub` codebase.
 
-    0. Step 0: methods that require a read-access to the Hub use the `use_auth_token`
-       argument (`str`, `bool` or `None`). Methods requiring write-access have a `token`
-       argument (`str`, `None`). This implicit rule exists to be able to not send the
-       token when not necessary (`use_auth_token=False`) even if logged in.
+    This function ignores some deprecated arguments from the kwargs and warns the user they are ignored.
+    The goal is to avoid breaking existing code while guiding the user to the new way of doing things.
 
-    1. Step 1: we want to harmonize everything and use `token` everywhere (supporting
-       `token=False` for read-only methods). In order not to break existing code, if
-       `use_auth_token` is passed to a function, the `use_auth_token` value is passed
-       as `token` instead, without any warning.
-       a. Corner case: if both `use_auth_token` and `token` values are passed, a warning
-          is thrown and the `use_auth_token` value is ignored.
+    List of deprecated arguments:
+        - `proxies`:
+            To set up proxies, user must either use the HTTP_PROXY environment variable or configure the `httpx.Client`
+            manually using the [`set_client_factory`] function.
 
-    2. Step 2: Once it is release, we should push downstream libraries to switch from
-       `use_auth_token` to `token` as much as possible, but without throwing a warning
-       (e.g. manually create issues on the corresponding repos).
+            In huggingface_hub 0.x, `proxies` was a dictionary directly passed to `requests.request`.
+            In huggingface_hub 1.x, we migrated to `httpx` which does not support `proxies` the same way.
+            In particular, it is not possible to configure proxies on a per-request basis. The solution is to configure
+            it globally using the [`set_client_factory`] function or using the HTTP_PROXY environment variable.
 
-    3. Step 3: After a transitional period (6 months e.g. until April 2023?), we update
-       `huggingface_hub` to throw a warning on `use_auth_token`. Hopefully, very few
-       users will be impacted as it would have already been fixed.
-       In addition, unit tests in `huggingface_hub` must be adapted to expect warnings
-       to be thrown (but still use `use_auth_token` as before).
+            More more details, see:
+            - https://www.python-httpx.org/advanced/proxies/
+            - https://www.python-httpx.org/compatibility/#proxy-keys.
 
-    4. Step 4: After a normal deprecation cycle (3 releases ?), remove this validator.
-       `use_auth_token` will definitely not be supported.
-       In addition, we update unit tests in `huggingface_hub` to use `token` everywhere.
-
-    This has been discussed in:
-    - https://github.com/huggingface/huggingface_hub/issues/1094.
-    - https://github.com/huggingface/huggingface_hub/pull/928
-    - (related) https://github.com/huggingface/huggingface_hub/pull/1064
+        - `resume_download`: deprecated without replacement. `huggingface_hub` always resumes downloads whenever possible.
+        - `force_filename`: deprecated without replacement. Filename is always the same as on the Hub.
+        - `local_dir_use_symlinks`: deprecated without replacement. Downloading to a local directory does not use symlinks anymore.
     """
     new_kwargs = kwargs.copy()  # do not mutate input !
 
-    use_auth_token = new_kwargs.pop("use_auth_token", None)  # remove from kwargs
-    if use_auth_token is not None:
-        if has_token:
-            warnings.warn(
-                "Both `token` and `use_auth_token` are passed to"
-                f" `{fn_name}` with non-None values. `token` is now the"
-                " preferred argument to pass a User Access Token."
-                " `use_auth_token` value will be ignored."
-            )
-        else:
-            # `token` argument is not passed and a non-None value is passed in
-            # `use_auth_token` => use `use_auth_token` value as `token` kwarg.
-            new_kwargs["token"] = use_auth_token
+    # proxies
+    proxies = new_kwargs.pop("proxies", None)  # remove from kwargs
+    if proxies is not None:
+        warnings.warn(
+            f"The `proxies` argument is ignored in `{fn_name}`. To set up proxies, use the HTTP_PROXY / HTTPS_PROXY"
+            " environment variables or configure the `httpx.Client` manually using `huggingface_hub.set_client_factory`."
+            " See https://www.python-httpx.org/advanced/proxies/ for more details."
+        )
+
+    # resume_download
+    resume_download = new_kwargs.pop("resume_download", None)  # remove from kwargs
+    if resume_download is not None:
+        warnings.warn(
+            f"The `resume_download` argument is deprecated and ignored in `{fn_name}`. Downloads always resume"
+            " whenever possible."
+        )
+
+    # force_filename
+    force_filename = new_kwargs.pop("force_filename", None)  # remove from kwargs
+    if force_filename is not None:
+        warnings.warn(
+            f"The `force_filename` argument is deprecated and ignored in `{fn_name}`. Filename is always the same "
+            "as on the Hub."
+        )
+
+    # local_dir_use_symlinks
+    local_dir_use_symlinks = new_kwargs.pop("local_dir_use_symlinks", None)  # remove from kwargs
+    if local_dir_use_symlinks is not None:
+        warnings.warn(
+            f"The `local_dir_use_symlinks` argument is deprecated and ignored in `{fn_name}`. Downloading to a local"
+            " directory does not use symlinks anymore."
+        )
 
     return new_kwargs
diff --git a/src/huggingface_hub/utils/_xet.py b/src/huggingface_hub/utils/_xet.py
index 3dcf99068f..473c451251 100644
--- a/src/huggingface_hub/utils/_xet.py
+++ b/src/huggingface_hub/utils/_xet.py
@@ -1,8 +1,8 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Dict, Optional
+from typing import Optional
 
-import requests
+import httpx
 
 from .. import constants
 from . import get_session, hf_raise_for_status, validate_hf_hub_args
@@ -27,7 +27,7 @@ class XetConnectionInfo:
 
 
 def parse_xet_file_data_from_response(
-    response: requests.Response, endpoint: Optional[str] = None
+    response: httpx.Response, endpoint: Optional[str] = None
 ) -> Optional[XetFileData]:
     """
     Parse XET file metadata from an HTTP response.
@@ -36,7 +36,7 @@ def parse_xet_file_data_from_response(
     of a given response object. If the required metadata is not found, it returns `None`.
 
     Args:
-        response (`requests.Response`):
+        response (`httpx.Response`):
             The HTTP response object containing headers dict and links dict to extract the XET metadata from.
     Returns:
         `Optional[XetFileData]`:
@@ -63,11 +63,11 @@ def parse_xet_file_data_from_response(
     )
 
 
-def parse_xet_connection_info_from_headers(headers: Dict[str, str]) -> Optional[XetConnectionInfo]:
+def parse_xet_connection_info_from_headers(headers: dict[str, str]) -> Optional[XetConnectionInfo]:
     """
     Parse XET connection info from the HTTP headers or return None if not found.
     Args:
-        headers (`Dict`):
+        headers (`dict`):
            HTTP headers to extract the XET metadata from.
     Returns:
         `XetConnectionInfo` or `None`:
@@ -92,7 +92,7 @@ def parse_xet_connection_info_from_headers(headers: Dict[str, str]) -> Optional[
 def refresh_xet_connection_info(
     *,
     file_data: XetFileData,
-    headers: Dict[str, str],
+    headers: dict[str, str],
 ) -> XetConnectionInfo:
     """
     Utilizes the information in the parsed metadata to request the Hub xet connection information.
@@ -100,7 +100,7 @@ def refresh_xet_connection_info(
     Args:
         file_data: (`XetFileData`):
             The file data needed to refresh the xet connection information.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
     Returns:
         `XetConnectionInfo`:
@@ -123,9 +123,9 @@ def fetch_xet_connection_info_from_repo_info(
     repo_id: str,
     repo_type: str,
     revision: Optional[str] = None,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     endpoint: Optional[str] = None,
-    params: Optional[Dict[str, str]] = None,
+    params: Optional[dict[str, str]] = None,
 ) -> XetConnectionInfo:
     """
     Uses the repo info to request a xet access token from Hub.
@@ -138,11 +138,11 @@ def fetch_xet_connection_info_from_repo_info(
             Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
         revision (`str`, `optional`):
             The revision of the repo to get the token for.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         endpoint (`str`, `optional`):
             The endpoint to use for the request. Defaults to the Hub endpoint.
-        params (`Dict[str, str]`, `optional`):
+        params (`dict[str, str]`, `optional`):
             Additional parameters to pass with the request.
     Returns:
         `XetConnectionInfo`:
@@ -161,8 +161,8 @@ def fetch_xet_connection_info_from_repo_info(
 @validate_hf_hub_args
 def _fetch_xet_connection_info_with_url(
     url: str,
-    headers: Dict[str, str],
-    params: Optional[Dict[str, str]] = None,
+    headers: dict[str, str],
+    params: Optional[dict[str, str]] = None,
 ) -> XetConnectionInfo:
     """
     Requests the xet connection info from the supplied URL. This includes the
@@ -170,9 +170,9 @@ def _fetch_xet_connection_info_with_url(
     Args:
         url: (`str`):
             The access token endpoint URL.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
-        params (`Dict[str, str]`, `optional`):
+        params (`dict[str, str]`, `optional`):
             Additional parameters to pass with the request.
     Returns:
         `XetConnectionInfo`:
diff --git a/src/huggingface_hub/utils/_xet_progress_reporting.py b/src/huggingface_hub/utils/_xet_progress_reporting.py
index e47740d5c5..05c87b835a 100644
--- a/src/huggingface_hub/utils/_xet_progress_reporting.py
+++ b/src/huggingface_hub/utils/_xet_progress_reporting.py
@@ -64,7 +64,7 @@ def format_desc(self, name: str, indent: bool) -> str:
 
         return f"{padding}{name.ljust(width)}"
 
-    def update_progress(self, total_update: PyTotalProgressUpdate, item_updates: List[PyItemProgressUpdate]):
+    def update_progress(self, total_update: PyTotalProgressUpdate, item_updates: list[PyItemProgressUpdate]):
         # Update all the per-item values.
         for item in item_updates:
             item_name = item.item_name
diff --git a/src/huggingface_hub/utils/insecure_hashlib.py b/src/huggingface_hub/utils/insecure_hashlib.py
index 6901b6d647..639e04460b 100644
--- a/src/huggingface_hub/utils/insecure_hashlib.py
+++ b/src/huggingface_hub/utils/insecure_hashlib.py
@@ -25,14 +25,8 @@
 #     ```
 import functools
 import hashlib
-import sys
 
 
-if sys.version_info >= (3, 9):
-    md5 = functools.partial(hashlib.md5, usedforsecurity=False)
-    sha1 = functools.partial(hashlib.sha1, usedforsecurity=False)
-    sha256 = functools.partial(hashlib.sha256, usedforsecurity=False)
-else:
-    md5 = hashlib.md5
-    sha1 = hashlib.sha1
-    sha256 = hashlib.sha256
+md5 = functools.partial(hashlib.md5, usedforsecurity=False)
+sha1 = functools.partial(hashlib.sha1, usedforsecurity=False)
+sha256 = functools.partial(hashlib.sha256, usedforsecurity=False)
diff --git a/src/huggingface_hub/utils/tqdm.py b/src/huggingface_hub/utils/tqdm.py
index 4c1fcef4be..4d47cafc8f 100644
--- a/src/huggingface_hub/utils/tqdm.py
+++ b/src/huggingface_hub/utils/tqdm.py
@@ -86,7 +86,7 @@
 import warnings
 from contextlib import contextmanager, nullcontext
 from pathlib import Path
-from typing import ContextManager, Dict, Iterator, Optional, Union
+from typing import ContextManager, Iterator, Optional, Union
 
 from tqdm.auto import tqdm as old_tqdm
 
@@ -102,7 +102,7 @@
 # progress bar visibility through code. By default, progress bars are turned on.
 
 
-progress_bar_states: Dict[str, bool] = {}
+progress_bar_states: dict[str, bool] = {}
 
 
 def disable_progress_bars(name: Optional[str] = None) -> None:
@@ -248,7 +248,7 @@ def tqdm_stream_file(path: Union[Path, str]) -> Iterator[io.BufferedReader]:
     Example:
     ```py
     >>> with tqdm_stream_file("config.json") as f:
-    >>>     requests.put(url, data=f)
+    >>>     httpx.put(url, data=f)
     config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
     ```
     """
diff --git a/tests/test_auth_cli.py b/tests/test_auth_cli.py
deleted file mode 100644
index 5d4dd96111..0000000000
--- a/tests/test_auth_cli.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import logging
-import os
-import tempfile
-from unittest.mock import patch
-
-import pytest
-from pytest import CaptureFixture, LogCaptureFixture
-
-from huggingface_hub import constants
-from huggingface_hub.commands.user import AuthListCommand, AuthSwitchCommand, LoginCommand, LogoutCommand
-
-from .testing_constants import ENDPOINT_STAGING
-from .testing_utils import assert_in_logs
-
-
-# fixtures & constants
-
-MOCK_TOKEN = "hf_1234"
-
-
-@pytest.fixture(autouse=True)
-def use_tmp_file_paths():
-    """
-    Fixture to temporarily override HF_TOKEN_PATH, HF_STORED_TOKENS_PATH, and ENDPOINT.
-    """
-    with tempfile.TemporaryDirectory() as tmp_hf_home:
-        hf_token_path = os.path.join(tmp_hf_home, "token")
-        hf_stored_tokens_path = os.path.join(tmp_hf_home, "stored_tokens")
-        with patch.multiple(
-            constants,
-            HF_TOKEN_PATH=hf_token_path,
-            HF_STORED_TOKENS_PATH=hf_stored_tokens_path,
-            ENDPOINT=ENDPOINT_STAGING,
-        ):
-            yield
-
-
-@pytest.fixture
-def mock_whoami_api_call():
-    MOCK_WHOAMI_RESPONSE = {
-        "auth": {
-            "accessToken": {
-                "displayName": "test_token",
-                "role": "write",
-                "createdAt": "2024-01-01T00:00:00.000Z",
-            }
-        }
-    }
-    with patch("huggingface_hub.hf_api.whoami", return_value=MOCK_WHOAMI_RESPONSE):
-        yield
-
-
-@pytest.fixture
-def mock_stored_tokens():
-    """Mock stored tokens."""
-    stored_tokens = {
-        "token1": "hf_1234",
-        "token2": "hf_5678",
-        "active_token": "hf_9012",
-    }
-    with patch("huggingface_hub._login.get_stored_tokens", return_value=stored_tokens):
-        with patch("huggingface_hub.utils._auth.get_stored_tokens", return_value=stored_tokens):
-            yield stored_tokens
-
-
-def test_login_command_basic(mock_whoami_api_call, caplog: LogCaptureFixture):
-    """Test basic login command execution."""
-    caplog.set_level(logging.INFO)
-
-    args = type("Args", (), {"token": MOCK_TOKEN, "add_to_git_credential": False})()
-    cmd = LoginCommand(args)
-    cmd.run()
-
-    assert_in_logs(caplog, "Login successful")
-    assert_in_logs(caplog, "Token is valid")
-    assert_in_logs(caplog, "The current active token is: `test_token`")
-
-
-def test_login_command_with_git(mock_whoami_api_call, caplog: LogCaptureFixture):
-    """Test login command with git credential option."""
-    caplog.set_level(logging.INFO)
-
-    args = type("Args", (), {"token": MOCK_TOKEN, "add_to_git_credential": True})()
-    cmd = LoginCommand(args)
-
-    with patch("huggingface_hub._login._is_git_credential_helper_configured", return_value=True):
-        with patch("huggingface_hub.utils.set_git_credential"):
-            cmd.run()
-
-    assert_in_logs(caplog, "Login successful")
-    assert_in_logs(caplog, "Your token has been saved in your configured git credential helpers")
-
-
-def test_logout_specific_token(mock_stored_tokens, caplog: LogCaptureFixture):
-    """Test logout command for a specific token."""
-    caplog.set_level(logging.INFO)
-
-    args = type("Args", (), {"token_name": "token1"})()
-    cmd = LogoutCommand(args)
-    cmd.run()
-
-    assert_in_logs(caplog, "Successfully logged out from access token: token1")
-
-
-def test_logout_active_token(mock_stored_tokens, caplog: LogCaptureFixture):
-    """Test logout command for active token."""
-    caplog.set_level(logging.INFO)
-
-    with patch("huggingface_hub._login._get_token_from_file", return_value="hf_9012"):
-        args = type("Args", (), {"token_name": "active_token"})()
-        cmd = LogoutCommand(args)
-        cmd.run()
-
-        assert_in_logs(caplog, "Successfully logged out from access token: active_token")
-        assert_in_logs(caplog, "Active token 'active_token' has been deleted")
-
-
-def test_logout_all_tokens(mock_stored_tokens, caplog: LogCaptureFixture):
-    """Test logout command for all tokens."""
-    caplog.set_level(logging.INFO)
-
-    args = type("Args", (), {"token_name": None})()
-    cmd = LogoutCommand(args)
-    cmd.run()
-
-    assert_in_logs(caplog, "Successfully logged out from all access tokens")
-
-
-def test_switch_token(mock_stored_tokens, caplog: LogCaptureFixture):
-    """Test switching between tokens."""
-    caplog.set_level(logging.INFO)
-
-    args = type("Args", (), {"token_name": "token1", "add_to_git_credential": False})()
-    cmd = AuthSwitchCommand(args)
-    cmd.run()
-
-    assert_in_logs(caplog, "The current active token is: token1")
-
-
-def test_switch_nonexistent_token(mock_stored_tokens):
-    """Test switching to a non-existent token."""
-    args = type("Args", (), {"token_name": "nonexistent", "add_to_git_credential": False})()
-    cmd = AuthSwitchCommand(args)
-
-    with pytest.raises(ValueError, match="Access token nonexistent not found"):
-        cmd.run()
-
-
-def test_list_tokens(mock_stored_tokens, capsys: CaptureFixture):
-    """Test listing tokens command."""
-    args = type("Args", (), {})()
-    cmd = AuthListCommand(args)
-    cmd.run()
-
-    captured = capsys.readouterr()
-    assert "token1" in captured.out
-    assert "hf_****1234" in captured.out
-    assert "token2" in captured.out
diff --git a/tests/test_cli.py b/tests/test_cli.py
index ae9ebc7886..bb300a76a3 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,506 +1,637 @@
 import os
-import unittest
 import warnings
-from argparse import ArgumentParser, Namespace
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Generator
+from typing import Generator, Optional
 from unittest.mock import Mock, patch
 
-from huggingface_hub.cli.cache import CacheCommand
-from huggingface_hub.cli.download import DownloadCommand
-from huggingface_hub.cli.jobs import JobsCommands, RunCommand, ScheduledRunCommand, UvCommand
-from huggingface_hub.cli.repo import RepoCommands
-from huggingface_hub.cli.repo_files import DeleteFilesSubCommand, RepoFilesCommand
-from huggingface_hub.cli.upload import UploadCommand
+import pytest
+import typer
+from typer.testing import CliRunner
+
+from huggingface_hub.cli._cli_utils import RepoType
+from huggingface_hub.cli.cache import _CANCEL_DELETION_STR
+from huggingface_hub.cli.download import download
+from huggingface_hub.cli.hf import app
+from huggingface_hub.cli.upload import _resolve_upload_paths, upload
 from huggingface_hub.errors import RevisionNotFoundError
-from huggingface_hub.utils import SoftTemporaryDirectory, capture_output
+from huggingface_hub.utils import SoftTemporaryDirectory
 
 from .testing_utils import DUMMY_MODEL_ID
 
 
-class TestCacheCommand(unittest.TestCase):
-    def setUp(self) -> None:
-        """
-        Set up cache scan/delete commands as in `src/huggingface_hub/cli/hf.py`.
-        """
-        self.parser = ArgumentParser("hf", usage="hf <command> [<args>]")
-        commands_parser = self.parser.add_subparsers()
-        CacheCommand.register_subcommand(commands_parser)
-
-    def test_scan_cache_basic(self) -> None:
-        """Test `hf cache scan`."""
-        args = self.parser.parse_args(["cache", "scan"])
-        assert args.dir is None
-        assert args.verbose == 0
-        assert args.func == CacheCommand
-        assert args.cache_command == "scan"
-
-    def test_scan_cache_verbose(self) -> None:
-        """Test `hf cache scan -v`."""
-        args = self.parser.parse_args(["cache", "scan", "-v"])
-        assert args.dir is None
-        assert args.verbose == 1
-        assert args.func == CacheCommand
-        assert args.cache_command == "scan"
-
-    def test_scan_cache_with_dir(self) -> None:
-        """Test `hf cache scan --dir something`."""
-        args = self.parser.parse_args(["cache", "scan", "--dir", "something"])
-        assert args.dir == "something"
-        assert args.verbose == 0
-        assert args.func == CacheCommand
-        assert args.cache_command == "scan"
-
-    def test_scan_cache_ultra_verbose(self) -> None:
-        """Test `hf cache scan -vvv`."""
-        args = self.parser.parse_args(["cache", "scan", "-vvv"])
-        assert args.dir is None
-        assert args.verbose == 3
-        assert args.func == CacheCommand
-        assert args.cache_command == "scan"
-
-    def test_delete_cache_with_dir(self) -> None:
-        """Test `hf cache delete --dir something`."""
-        args = self.parser.parse_args(["cache", "delete", "--dir", "something"])
-        assert args.dir == "something"
-        assert args.func == CacheCommand
-        assert args.cache_command == "delete"
-
-
-class TestUploadCommand(unittest.TestCase):
-    def setUp(self) -> None:
-        """
-        Set up CLI as in `src/huggingface_hub/cli/hf.py`.
-        """
-        self.parser = ArgumentParser("hf", usage="hf <command> [<args>]")
-        commands_parser = self.parser.add_subparsers()
-        UploadCommand.register_subcommand(commands_parser)
-
-    def test_upload_basic(self) -> None:
-        """Test `hf upload my-folder to dummy-repo`."""
-        cmd = UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, "my-folder"]))
-        assert cmd.repo_id == DUMMY_MODEL_ID
-        assert cmd.local_path == "my-folder"
-        assert cmd.path_in_repo == "."  # implicit
-        assert cmd.repo_type == "model"
-        assert cmd.revision is None
-        assert cmd.include is None
-        assert cmd.exclude is None
-        assert cmd.delete is None
-        assert cmd.commit_message is None
-        assert cmd.commit_description is None
-        assert cmd.create_pr is False
-        assert cmd.every is None
-        assert cmd.api.token is None
-        assert cmd.quiet is False
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+
+
+class TestCacheCommand:
+    def test_scan_cache_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.cache.scan_cache_dir") as mock_run:
+            result = runner.invoke(app, ["cache", "scan"])
+        assert result.exit_code == 0
+        mock_run.assert_called_once_with(None)
+
+    def test_scan_cache_verbose(self, runner: CliRunner) -> None:
+        with (
+            patch("huggingface_hub.cli.cache.scan_cache_dir") as mock_run,
+            patch("huggingface_hub.cli.cache.get_table") as get_table_mock,
+        ):
+            result = runner.invoke(app, ["cache", "scan", "-v"])
+        assert result.exit_code == 0
+        mock_run.assert_called_once_with(None)
+        get_table_mock.assert_called_once_with(mock_run.return_value, verbosity=1)
+
+    def test_scan_cache_with_dir(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.cache.scan_cache_dir") as mock_run:
+            result = runner.invoke(app, ["cache", "scan", "--dir", "something"])
+        assert result.exit_code == 0
+        mock_run.assert_called_once_with("something")
+
+    def test_scan_cache_ultra_verbose(self, runner: CliRunner) -> None:
+        with (
+            patch("huggingface_hub.cli.cache.scan_cache_dir") as mock_run,
+            patch("huggingface_hub.cli.cache.get_table") as get_table_mock,
+        ):
+            result = runner.invoke(app, ["cache", "scan", "-vvv"])
+        assert result.exit_code == 0
+        mock_run.assert_called_once_with(None)
+        get_table_mock.assert_called_once_with(mock_run.return_value, verbosity=3)
+
+    def test_delete_cache_with_dir(self, runner: CliRunner) -> None:
+        hf_cache_info = Mock()
+        with (
+            patch("huggingface_hub.cli.cache.scan_cache_dir", return_value=hf_cache_info) as scan_mock,
+            patch(
+                "huggingface_hub.cli.cache._manual_review_tui",
+                return_value=[_CANCEL_DELETION_STR],
+            ) as review_mock,
+        ):
+            result = runner.invoke(app, ["cache", "delete", "--dir", "something"])
+        assert result.exit_code == 0
+        scan_mock.assert_called_once_with("something")
+        review_mock.assert_called_once_with(hf_cache_info, preselected=[], sort_by=None)
+
+
+class TestUploadCommand:
+    def test_upload_basic(self, runner: CliRunner) -> None:
+        with SoftTemporaryDirectory() as tmp_dir:
+            folder = Path(tmp_dir) / "my-folder"
+            folder.mkdir()
+            with (
+                patch(
+                    "huggingface_hub.cli.upload._resolve_upload_paths",
+                    return_value=(folder.as_posix(), ".", None),
+                ) as resolve_mock,
+                patch("huggingface_hub.cli.upload.get_hf_api") as api_cls,
+            ):
+                api = api_cls.return_value
+                api.create_repo.return_value = Mock(repo_id=DUMMY_MODEL_ID)
+                api.upload_folder.return_value = "uploaded"
+                result = runner.invoke(app, ["upload", DUMMY_MODEL_ID, "my-folder"])
+        assert result.exit_code == 0
+        assert "uploaded" in result.stdout
+        resolve_mock.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            local_path="my-folder",
+            path_in_repo=None,
+            include=None,
+        )
+        api_cls.assert_called_once_with(token=None)
+        api.create_repo.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            repo_type="model",
+            exist_ok=True,
+            private=False,
+            space_sdk=None,
+        )
+        api.upload_folder.assert_called_once_with(
+            folder_path=folder.as_posix(),
+            path_in_repo=".",
+            repo_id=DUMMY_MODEL_ID,
+            repo_type="model",
+            revision=None,
+            commit_message=None,
+            commit_description=None,
+            create_pr=False,
+            allow_patterns=None,
+            ignore_patterns=None,
+            delete_patterns=None,
+        )
 
-    def test_upload_with_wildcard(self) -> None:
-        """Test uploading files using wildcard patterns."""
-        with tmp_current_directory() as cache_dir:
-            # Create test files
-            (Path(cache_dir) / "model1.safetensors").touch()
-            (Path(cache_dir) / "model2.safetensors").touch()
-            (Path(cache_dir) / "model.bin").touch()
-            (Path(cache_dir) / "config.json").touch()
-
-            # Test basic wildcard pattern
-            cmd = UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, "*.safetensors"]))
-            assert cmd.local_path == "."
-            assert cmd.include == "*.safetensors"
-            assert cmd.path_in_repo == "."
-            assert cmd.repo_id == DUMMY_MODEL_ID
-
-            # Test wildcard pattern with specific directory
-            subdir = Path(cache_dir) / "subdir"
-            subdir.mkdir()
-            (subdir / "special.safetensors").touch()
-
-            cmd = UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, "subdir/*.safetensors"]))
-            assert cmd.local_path == "."
-            assert cmd.include == "subdir/*.safetensors"
-            assert cmd.path_in_repo == "."
-
-            # Test error when using wildcard with --include
-            with self.assertRaises(ValueError):
-                UploadCommand(
-                    self.parser.parse_args(["upload", DUMMY_MODEL_ID, "*.safetensors", "--include", "*.json"])
+    def test_upload_with_all_options(self, runner: CliRunner) -> None:
+        with SoftTemporaryDirectory() as tmp_dir:
+            folder = Path(tmp_dir) / "my-file"
+            folder.mkdir()
+            returned_paths = (folder.as_posix(), "data/", ["*.json", "*.yaml"])
+            with (
+                patch(
+                    "huggingface_hub.cli.upload._resolve_upload_paths",
+                    return_value=returned_paths,
+                ) as resolve_mock,
+                patch("huggingface_hub.cli.upload.get_hf_api") as api_cls,
+                patch("huggingface_hub.cli.upload.CommitScheduler") as scheduler_cls,
+                patch("huggingface_hub.cli.upload.time.sleep", side_effect=KeyboardInterrupt),
+            ):
+                api = api_cls.return_value
+                scheduler = scheduler_cls.return_value
+                scheduler.repo_id = DUMMY_MODEL_ID
+                result = runner.invoke(
+                    app,
+                    [
+                        "upload",
+                        DUMMY_MODEL_ID,
+                        "my-file",
+                        "data/",
+                        "--repo-type",
+                        "dataset",
+                        "--revision",
+                        "v1.0.0",
+                        "--include",
+                        "*.json",
+                        "--include",
+                        "*.yaml",
+                        "--exclude",
+                        "*.log",
+                        "--exclude",
+                        "*.txt",
+                        "--delete",
+                        "*.config",
+                        "--delete",
+                        "*.secret",
+                        "--commit-message",
+                        "My commit message",
+                        "--commit-description",
+                        "My commit description",
+                        "--create-pr",
+                        "--every",
+                        "5",
+                        "--token",
+                        "my-token",
+                        "--quiet",
+                    ],
                 )
+        assert result.exit_code == 0
+        assert "Stopped scheduled commits." in result.stdout
+        resolve_mock.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            local_path="my-file",
+            path_in_repo="data/",
+            include=["*.json", "*.yaml"],
+        )
+        api_cls.assert_called_once_with(token="my-token")
+        scheduler_cls.assert_called_once_with(
+            folder_path=folder.as_posix(),
+            repo_id=DUMMY_MODEL_ID,
+            repo_type="dataset",
+            revision="v1.0.0",
+            allow_patterns=["*.json", "*.yaml"],
+            ignore_patterns=["*.log", "*.txt"],
+            path_in_repo="data/",
+            private=False,
+            every=5,
+            hf_api=api,
+        )
+        scheduler.stop.assert_called_once_with()
 
-            # Test error when using wildcard with explicit path_in_repo
-            with self.assertRaises(ValueError):
-                UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, "*.safetensors", "models/"]))
+    def test_every_must_be_positive(self) -> None:
+        class _PatchedBadParameter(typer.BadParameter):
+            def __init__(self, message: str, *, param_name: Optional[str] = None, **kwargs: object) -> None:
+                super().__init__(message, **kwargs)
+
+        with (
+            patch("huggingface_hub.cli.upload.typer.BadParameter", _PatchedBadParameter),
+            patch("huggingface_hub.cli.upload.get_hf_api") as api_cls,
+        ):
+            with pytest.raises(typer.BadParameter, match="--every must be a positive value"):
+                upload(repo_id=DUMMY_MODEL_ID, every=0)
+
+            with pytest.raises(typer.BadParameter, match="--every must be a positive value"):
+                upload(repo_id=DUMMY_MODEL_ID, every=-10)
+        api_cls.assert_not_called()
+
+    def test_every_as_int(self, runner: CliRunner) -> None:
+        with SoftTemporaryDirectory() as tmp_dir:
+            folder = Path(tmp_dir)
+            with (
+                patch(
+                    "huggingface_hub.cli.upload._resolve_upload_paths",
+                    return_value=(folder.as_posix(), ".", None),
+                ),
+                patch("huggingface_hub.cli.upload.get_hf_api"),
+                patch("huggingface_hub.cli.upload.CommitScheduler") as scheduler_cls,
+                patch("huggingface_hub.cli.upload.time.sleep", side_effect=KeyboardInterrupt),
+            ):
+                result = runner.invoke(app, ["upload", DUMMY_MODEL_ID, ".", "--every", "10"])
+        assert result.exit_code == 0
+        assert scheduler_cls.call_args.kwargs["every"] == pytest.approx(10)
+
+    def test_every_as_float(self, runner: CliRunner) -> None:
+        with SoftTemporaryDirectory() as tmp_dir:
+            folder = Path(tmp_dir)
+            with (
+                patch(
+                    "huggingface_hub.cli.upload._resolve_upload_paths",
+                    return_value=(folder.as_posix(), ".", None),
+                ),
+                patch("huggingface_hub.cli.upload.get_hf_api"),
+                patch("huggingface_hub.cli.upload.CommitScheduler") as scheduler_cls,
+                patch("huggingface_hub.cli.upload.time.sleep", side_effect=KeyboardInterrupt),
+            ):
+                result = runner.invoke(app, ["upload", DUMMY_MODEL_ID, ".", "--every", "0.5"])
+        assert result.exit_code == 0
+        assert scheduler_cls.call_args.kwargs["every"] == pytest.approx(0.5)
+
+
+class TestResolveUploadPaths:
+    def test_upload_with_wildcard(self) -> None:
+        local_path, path_in_repo, include = _resolve_upload_paths(
+            repo_id=DUMMY_MODEL_ID, local_path="*.safetensors", path_in_repo=None, include=None
+        )
+        assert local_path == "."
+        assert path_in_repo == "*.safetensors"
+        assert include == ["."]
 
-    def test_upload_with_all_options(self) -> None:
-        """Test `hf upload my-file to dummy-repo with all options selected`."""
-        cmd = UploadCommand(
-            self.parser.parse_args(
-                [
-                    "upload",
-                    DUMMY_MODEL_ID,
-                    "my-file",
-                    "data/",
-                    "--repo-type",
-                    "dataset",
-                    "--revision",
-                    "v1.0.0",
-                    "--include",
-                    "*.json",
-                    "*.yaml",
-                    "--exclude",
-                    "*.log",
-                    "*.txt",
-                    "--delete",
-                    "*.config",
-                    "*.secret",
-                    "--commit-message",
-                    "My commit message",
-                    "--commit-description",
-                    "My commit description",
-                    "--create-pr",
-                    "--every",
-                    "5",
-                    "--token",
-                    "my-token",
-                    "--quiet",
-                ]
-            )
+        local_path, path_in_repo, include = _resolve_upload_paths(
+            repo_id=DUMMY_MODEL_ID, local_path="subdir/*.safetensors", path_in_repo=None, include=None
         )
-        assert cmd.repo_id == DUMMY_MODEL_ID
-        assert cmd.local_path == "my-file"
-        assert cmd.path_in_repo == "data/"
-        assert cmd.repo_type == "dataset"
-        assert cmd.revision == "v1.0.0"
-        assert cmd.include == ["*.json", "*.yaml"]
-        assert cmd.exclude == ["*.log", "*.txt"]
-        assert cmd.delete == ["*.config", "*.secret"]
-        assert cmd.commit_message == "My commit message"
-        assert cmd.commit_description == "My commit description"
-        assert cmd.create_pr is True
-        assert cmd.every == 5
-        assert cmd.api.token == "my-token"
-        assert cmd.quiet is True
+        assert local_path == "."
+        assert path_in_repo == "subdir/*.safetensors"
+        assert include == ["."]
+
+        with pytest.raises(ValueError):
+            _resolve_upload_paths(
+                repo_id=DUMMY_MODEL_ID,
+                local_path="*.safetensors",
+                path_in_repo=None,
+                include=["*.json"],
+            )
+
+        with pytest.raises(ValueError):
+            _resolve_upload_paths(
+                repo_id=DUMMY_MODEL_ID,
+                local_path="*.safetensors",
+                path_in_repo="models/",
+                include=None,
+            )
 
     def test_upload_implicit_local_path_when_folder_exists(self) -> None:
         with tmp_current_directory() as cache_dir:
             folder_path = Path(cache_dir) / "my-cool-model"
             folder_path.mkdir()
-            cmd = UploadCommand(self.parser.parse_args(["upload", "my-cool-model"]))
-
-        # A folder with the same name as the repo exists => upload it at the root of the repo
-        assert cmd.local_path == "my-cool-model"
-        assert cmd.path_in_repo == "."
+            local_path, path_in_repo, include = _resolve_upload_paths(
+                repo_id="my-cool-model", local_path=None, path_in_repo=None, include=None
+            )
+        assert local_path == "my-cool-model"
+        assert path_in_repo == "."
+        assert include is None
 
     def test_upload_implicit_local_path_when_file_exists(self) -> None:
         with tmp_current_directory() as cache_dir:
-            folder_path = Path(cache_dir) / "my-cool-model"
-            folder_path.touch()
-            cmd = UploadCommand(self.parser.parse_args(["upload", "my-cool-model"]))
-
-        # A file with the same name as the repo exists => upload it at the root of the repo
-        assert cmd.local_path == "my-cool-model"
-        assert cmd.path_in_repo == "my-cool-model"
+            file_path = Path(cache_dir) / "my-cool-model"
+            file_path.write_text("content")
+            local_path, path_in_repo, include = _resolve_upload_paths(
+                repo_id="my-cool-model", local_path=None, path_in_repo=None, include=None
+            )
+        assert local_path == "my-cool-model"
+        assert path_in_repo == "my-cool-model"
+        assert include is None
 
     def test_upload_implicit_local_path_when_org_repo(self) -> None:
         with tmp_current_directory() as cache_dir:
             folder_path = Path(cache_dir) / "my-cool-model"
             folder_path.mkdir()
-            cmd = UploadCommand(self.parser.parse_args(["upload", "my-cool-org/my-cool-model"]))
-
-        # A folder with the same name as the repo exists => upload it at the root of the repo
-        assert cmd.local_path == "my-cool-model"
-        assert cmd.path_in_repo == "."
+            local_path, path_in_repo, include = _resolve_upload_paths(
+                repo_id="my-cool-org/my-cool-model", local_path=None, path_in_repo=None, include=None
+            )
+        assert local_path == "my-cool-model"
+        assert path_in_repo == "."
+        assert include is None
 
     def test_upload_implicit_local_path_otherwise(self) -> None:
-        # No folder or file has the same name as the repo => raise exception
-        with self.assertRaises(ValueError):
-            with tmp_current_directory():
-                UploadCommand(self.parser.parse_args(["upload", "my-cool-model"]))
+        with tmp_current_directory():
+            with pytest.raises(ValueError):
+                _resolve_upload_paths(repo_id="my-cool-model", local_path=None, path_in_repo=None, include=None)
 
     def test_upload_explicit_local_path_to_folder_implicit_path_in_repo(self) -> None:
         with tmp_current_directory() as cache_dir:
             folder_path = Path(cache_dir) / "path" / "to" / "folder"
             folder_path.mkdir(parents=True, exist_ok=True)
-            cmd = UploadCommand(self.parser.parse_args(["upload", "my-repo", "./path/to/folder"]))
-        assert cmd.local_path == "./path/to/folder"
-        assert cmd.path_in_repo == "."  # Always upload the folder at the root of the repo
+            local_path, path_in_repo, include = _resolve_upload_paths(
+                repo_id="my-repo", local_path="./path/to/folder", path_in_repo=None, include=None
+            )
+        assert local_path == "./path/to/folder"
+        assert path_in_repo == "."
+        assert include is None
 
     def test_upload_explicit_local_path_to_file_implicit_path_in_repo(self) -> None:
         with tmp_current_directory() as cache_dir:
             file_path = Path(cache_dir) / "path" / "to" / "file.txt"
             file_path.parent.mkdir(parents=True, exist_ok=True)
-            file_path.touch()
-            cmd = UploadCommand(self.parser.parse_args(["upload", "my-repo", "./path/to/file.txt"]))
-        assert cmd.local_path == "./path/to/file.txt"
-        assert cmd.path_in_repo == "file.txt"  # If a file, upload it at the root of the repo and keep name
+            file_path.write_text("content")
+            local_path, path_in_repo, include = _resolve_upload_paths(
+                repo_id="my-repo", local_path="./path/to/file.txt", path_in_repo=None, include=None
+            )
+        assert local_path == "./path/to/file.txt"
+        assert path_in_repo == "file.txt"
+        assert include is None
 
     def test_upload_explicit_paths(self) -> None:
-        cmd = UploadCommand(self.parser.parse_args(["upload", "my-repo", "./path/to/folder", "data/"]))
-        assert cmd.local_path == "./path/to/folder"
-        assert cmd.path_in_repo == "data/"
-
-    def test_every_must_be_positive(self) -> None:
-        with self.assertRaises(ValueError):
-            UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, ".", "--every", "0"]))
-
-        with self.assertRaises(ValueError):
-            UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, ".", "--every", "-10"]))
-
-    def test_every_as_int(self) -> None:
-        cmd = UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, ".", "--every", "10"]))
-        assert cmd.every == 10
-
-    def test_every_as_float(self) -> None:
-        cmd = UploadCommand(self.parser.parse_args(["upload", DUMMY_MODEL_ID, ".", "--every", "0.5"]))
-        assert cmd.every == 0.5
-
-    @patch("huggingface_hub.cli.upload.HfApi.repo_info")
-    @patch("huggingface_hub.cli.upload.HfApi.upload_folder")
-    @patch("huggingface_hub.cli.upload.HfApi.create_repo")
-    def test_upload_folder_mock(self, create_mock: Mock, upload_mock: Mock, repo_info_mock: Mock) -> None:
+        local_path, path_in_repo, include = _resolve_upload_paths(
+            repo_id="my-repo", local_path="./path/to/folder", path_in_repo="data/", include=None
+        )
+        assert local_path == "./path/to/folder"
+        assert path_in_repo == "data/"
+        assert include is None
+
+
+class TestUploadImpl:
+    @patch("huggingface_hub.cli.upload.is_xet_available", return_value=True)
+    @patch("huggingface_hub.cli.upload.HF_HUB_ENABLE_HF_TRANSFER", False)
+    def test_upload_folder_mock(self, *_: object) -> None:
+        api = Mock()
+        api.create_repo.return_value = Mock(repo_id="my-model")
+        api.upload_folder.return_value = "done"
         with SoftTemporaryDirectory() as cache_dir:
             cache_path = cache_dir.absolute().as_posix()
-            cmd = UploadCommand(
-                self.parser.parse_args(
-                    ["upload", "my-model", cache_path, ".", "--private", "--include", "*.json", "--delete", "*.json"]
+            local_dir = Path(cache_path)
+            (local_dir / "config.json").write_text("{}")
+            with (
+                patch("huggingface_hub.cli.upload.get_hf_api", return_value=api),
+                patch("builtins.print") as print_mock,
+            ):
+                upload(
+                    repo_id="my-model",
+                    local_path=cache_path,
+                    include=["*.json"],
+                    delete=["*.json"],
+                    private=True,
+                    quiet=True,
                 )
-            )
-            cmd.run()
-
-            create_mock.assert_called_once_with(
-                repo_id="my-model", repo_type="model", exist_ok=True, private=True, space_sdk=None
-            )
-            upload_mock.assert_called_once_with(
-                folder_path=cache_path,
-                path_in_repo=".",
-                repo_id=create_mock.return_value.repo_id,
-                repo_type="model",
-                revision=None,
-                commit_message=None,
-                commit_description=None,
-                create_pr=False,
-                allow_patterns=["*.json"],
-                ignore_patterns=None,
-                delete_patterns=["*.json"],
-            )
-
-    @patch("huggingface_hub.cli.upload.HfApi.repo_info")
-    @patch("huggingface_hub.cli.upload.HfApi.upload_file")
-    @patch("huggingface_hub.cli.upload.HfApi.create_repo")
-    def test_upload_file_mock(self, create_mock: Mock, upload_mock: Mock, repo_info_mock: Mock) -> None:
+        api.create_repo.assert_called_once_with(
+            repo_id="my-model",
+            repo_type="model",
+            exist_ok=True,
+            private=True,
+            space_sdk=None,
+        )
+        api.upload_folder.assert_called_once_with(
+            folder_path=cache_path,
+            path_in_repo=".",
+            repo_id="my-model",
+            repo_type="model",
+            revision=None,
+            commit_message=None,
+            commit_description=None,
+            create_pr=False,
+            allow_patterns=["*.json"],
+            ignore_patterns=None,
+            delete_patterns=["*.json"],
+        )
+        print_mock.assert_called_once_with("done")
+
+    @patch("huggingface_hub.cli.upload.is_xet_available", return_value=True)
+    @patch("huggingface_hub.cli.upload.HF_HUB_ENABLE_HF_TRANSFER", False)
+    def test_upload_file_mock(self, *_: object) -> None:
+        api = Mock()
+        api.create_repo.return_value = Mock(repo_id="my-dataset")
+        api.upload_file.return_value = "uploaded"
         with SoftTemporaryDirectory() as cache_dir:
             file_path = Path(cache_dir) / "file.txt"
             file_path.write_text("content")
-            cmd = UploadCommand(
-                self.parser.parse_args(
-                    ["upload", "my-dataset", str(file_path), "logs/file.txt", "--repo-type", "dataset", "--create-pr"]
+            with (
+                patch("huggingface_hub.cli.upload.get_hf_api", return_value=api),
+                patch("builtins.print") as print_mock,
+            ):
+                upload(
+                    repo_id="my-dataset",
+                    repo_type=RepoType.dataset,
+                    local_path=str(file_path),
+                    path_in_repo="logs/file.txt",
+                    create_pr=True,
+                    quiet=True,
                 )
-            )
-            cmd.run()
-
-            create_mock.assert_called_once_with(
-                repo_id="my-dataset", repo_type="dataset", exist_ok=True, private=False, space_sdk=None
-            )
-            upload_mock.assert_called_once_with(
-                path_or_fileobj=str(file_path),
-                path_in_repo="logs/file.txt",
-                repo_id=create_mock.return_value.repo_id,
-                repo_type="dataset",
-                revision=None,
-                commit_message=None,
-                commit_description=None,
-                create_pr=True,
-            )
+        api.create_repo.assert_called_once_with(
+            repo_id="my-dataset",
+            repo_type="dataset",
+            exist_ok=True,
+            private=False,
+            space_sdk=None,
+        )
+        api.upload_file.assert_called_once_with(
+            path_or_fileobj=str(file_path),
+            path_in_repo="logs/file.txt",
+            repo_id="my-dataset",
+            repo_type="dataset",
+            revision=None,
+            commit_message=None,
+            commit_description=None,
+            create_pr=True,
+        )
+        print_mock.assert_called_once_with("uploaded")
 
-    @patch("huggingface_hub.cli.upload.HfApi.repo_info")
-    @patch("huggingface_hub.cli.upload.HfApi.upload_file")
-    @patch("huggingface_hub.cli.upload.HfApi.create_repo")
-    def test_upload_file_no_revision_mock(self, create_mock: Mock, upload_mock: Mock, repo_info_mock: Mock) -> None:
+    @patch("huggingface_hub.cli.upload.is_xet_available", return_value=True)
+    @patch("huggingface_hub.cli.upload.HF_HUB_ENABLE_HF_TRANSFER", False)
+    def test_upload_file_no_revision_mock(self, *_: object) -> None:
+        api = Mock()
+        api.create_repo.return_value = Mock(repo_id="my-model")
         with SoftTemporaryDirectory() as cache_dir:
             file_path = Path(cache_dir) / "file.txt"
             file_path.write_text("content")
-            cmd = UploadCommand(self.parser.parse_args(["upload", "my-model", str(file_path), "logs/file.txt"]))
-            cmd.run()
-            # Revision not specified => no need to check
-            repo_info_mock.assert_not_called()
-
-    @patch("huggingface_hub.cli.upload.HfApi.create_branch")
-    @patch("huggingface_hub.cli.upload.HfApi.repo_info")
-    @patch("huggingface_hub.cli.upload.HfApi.upload_file")
-    @patch("huggingface_hub.cli.upload.HfApi.create_repo")
-    def test_upload_file_with_revision_mock(
-        self, create_mock: Mock, upload_mock: Mock, repo_info_mock: Mock, create_branch_mock: Mock
-    ) -> None:
-        repo_info_mock.side_effect = RevisionNotFoundError("revision not found")
-
+            with (
+                patch("huggingface_hub.cli.upload.get_hf_api", return_value=api),
+                patch("builtins.print"),
+            ):
+                upload(
+                    repo_id="my-model",
+                    local_path=str(file_path),
+                    path_in_repo="logs/file.txt",
+                    quiet=True,
+                )
+        api.repo_info.assert_not_called()
+
+    @patch("huggingface_hub.cli.upload.is_xet_available", return_value=True)
+    @patch("huggingface_hub.cli.upload.HF_HUB_ENABLE_HF_TRANSFER", False)
+    def test_upload_file_with_revision_mock(self, *_: object) -> None:
+        api = Mock()
+        api.create_repo.return_value = Mock(repo_id="my-model")
+        api.repo_info.side_effect = RevisionNotFoundError("revision not found", response=Mock())
         with SoftTemporaryDirectory() as cache_dir:
             file_path = Path(cache_dir) / "file.txt"
             file_path.write_text("content")
-            cmd = UploadCommand(
-                self.parser.parse_args(
-                    ["upload", "my-model", str(file_path), "logs/file.txt", "--revision", "my-branch"]
+            with (
+                patch("huggingface_hub.cli.upload.get_hf_api", return_value=api),
+                patch("builtins.print"),
+            ):
+                upload(
+                    repo_id="my-model",
+                    revision="my-branch",
+                    local_path=str(file_path),
+                    path_in_repo="logs/file.txt",
+                    quiet=True,
                 )
-            )
-            cmd.run()
-
-            # Revision specified => check that it exists
-            repo_info_mock.assert_called_once_with(
-                repo_id=create_mock.return_value.repo_id, repo_type="model", revision="my-branch"
-            )
-
-            # Revision does not exist => create it
-            create_branch_mock.assert_called_once_with(
-                repo_id=create_mock.return_value.repo_id, repo_type="model", branch="my-branch", exist_ok=True
-            )
+        api.repo_info.assert_called_once_with(repo_id="my-model", repo_type="model", revision="my-branch")
+        api.create_branch.assert_called_once_with(
+            repo_id="my-model", repo_type="model", branch="my-branch", exist_ok=True
+        )
 
-    @patch("huggingface_hub.cli.upload.HfApi.repo_info")
-    @patch("huggingface_hub.cli.upload.HfApi.upload_file")
-    @patch("huggingface_hub.cli.upload.HfApi.create_repo")
-    def test_upload_file_revision_and_create_pr_mock(
-        self, create_mock: Mock, upload_mock: Mock, repo_info_mock: Mock
-    ) -> None:
+    @patch("huggingface_hub.cli.upload.is_xet_available", return_value=True)
+    @patch("huggingface_hub.cli.upload.HF_HUB_ENABLE_HF_TRANSFER", False)
+    def test_upload_file_revision_and_create_pr_mock(self, *_: object) -> None:
+        api = Mock()
+        api.create_repo.return_value = Mock(repo_id="my-model")
         with SoftTemporaryDirectory() as cache_dir:
             file_path = Path(cache_dir) / "file.txt"
             file_path.write_text("content")
-            cmd = UploadCommand(
-                self.parser.parse_args(
-                    ["upload", "my-model", str(file_path), "logs/file.txt", "--revision", "my-branch", "--create-pr"]
+            with (
+                patch("huggingface_hub.cli.upload.get_hf_api", return_value=api),
+                patch("builtins.print"),
+            ):
+                upload(
+                    repo_id="my-model",
+                    revision="my-branch",
+                    local_path=str(file_path),
+                    path_in_repo="logs/file.txt",
+                    create_pr=True,
+                    quiet=True,
                 )
+        api.repo_info.assert_not_called()
+        api.create_branch.assert_not_called()
+
+    @patch("huggingface_hub.cli.upload.is_xet_available", return_value=True)
+    @patch("huggingface_hub.cli.upload.HF_HUB_ENABLE_HF_TRANSFER", False)
+    def test_upload_missing_path(self, *_: object) -> None:
+        api = Mock()
+        with pytest.raises(FileNotFoundError):
+            with patch("huggingface_hub.cli.upload.get_hf_api", return_value=api):
+                upload(
+                    repo_id="my-model",
+                    local_path="/path/to/missing_file",
+                    path_in_repo="logs/file.txt",
+                    quiet=True,
+                )
+        api.create_repo.assert_not_called()
+
+
+class TestDownloadCommand:
+    def test_download_basic(self, runner: CliRunner) -> None:
+        with (
+            patch("huggingface_hub.cli.download.snapshot_download", return_value="path") as snapshot_mock,
+            patch("huggingface_hub.cli.download.hf_hub_download") as download_mock,
+        ):
+            result = runner.invoke(app, ["download", DUMMY_MODEL_ID])
+        assert result.exit_code == 0
+        assert "path" in result.stdout
+        download_mock.assert_not_called()
+        snapshot_mock.assert_called_once()
+        kwargs = snapshot_mock.call_args.kwargs
+        assert kwargs["repo_id"] == DUMMY_MODEL_ID
+        assert kwargs["repo_type"] == "model"
+        assert kwargs["revision"] is None
+        assert kwargs["allow_patterns"] is None
+        assert kwargs["ignore_patterns"] is None
+        assert kwargs["force_download"] is False
+        assert kwargs["cache_dir"] is None
+        assert kwargs["local_dir"] is None
+        assert kwargs["token"] is None
+        assert kwargs["library_name"] == "hf"
+        assert kwargs["max_workers"] == 8
+
+    def test_download_with_all_options(self, runner: CliRunner) -> None:
+        with (
+            patch("huggingface_hub.cli.download.snapshot_download", return_value="path") as snapshot_mock,
+            patch("huggingface_hub.cli.download.hf_hub_download") as download_mock,
+        ):
+            result = runner.invoke(
+                app,
+                [
+                    "download",
+                    DUMMY_MODEL_ID,
+                    "--repo-type",
+                    "dataset",
+                    "--revision",
+                    "v1.0.0",
+                    "--include",
+                    "*.json",
+                    "--include",
+                    "*.yaml",
+                    "--exclude",
+                    "*.log",
+                    "--exclude",
+                    "*.txt",
+                    "--force-download",
+                    "--cache-dir",
+                    "/tmp",
+                    "--token",
+                    "my-token",
+                    "--quiet",
+                    "--local-dir",
+                    ".",
+                    "--max-workers",
+                    "4",
+                ],
             )
-            cmd.run()
-            # Revision specified but --create-pr => no need to check
-            repo_info_mock.assert_not_called()
-
-    @patch("huggingface_hub.cli.upload.HfApi.create_repo")
-    def test_upload_missing_path(self, create_mock: Mock) -> None:
-        cmd = UploadCommand(self.parser.parse_args(["upload", "my-model", "/path/to/missing_file", "logs/file.txt"]))
-        with self.assertRaises(FileNotFoundError):
-            cmd.run()  # File/folder does not exist locally
-
-        # Repo creation happens before the check
-        create_mock.assert_not_called()
-
-
-class TestDownloadCommand(unittest.TestCase):
-    def setUp(self) -> None:
-        """
-        Set up CLI as in `src/huggingface_hub/cli/hf.py`.
-        """
-        self.parser = ArgumentParser("hf", usage="hf <command> [<args>]")
-        commands_parser = self.parser.add_subparsers()
-        DownloadCommand.register_subcommand(commands_parser)
-
-    def test_download_basic(self) -> None:
-        """Test `hf download dummy-repo`."""
-        args = self.parser.parse_args(["download", DUMMY_MODEL_ID])
-        assert args.repo_id == DUMMY_MODEL_ID
-        assert len(args.filenames) == 0
-        assert args.repo_type == "model"
-        assert args.revision is None
-        assert args.include is None
-        assert args.exclude is None
-        assert args.cache_dir is None
-        assert args.local_dir is None
-        assert args.force_download is False
-        assert args.token is None
-        assert args.quiet is False
-        assert args.func == DownloadCommand
-
-    def test_download_with_all_options(self) -> None:
-        """Test `hf download dummy-repo` with all options selected."""
-        args = self.parser.parse_args(
-            [
-                "download",
-                DUMMY_MODEL_ID,
-                "--repo-type",
-                "dataset",
-                "--revision",
-                "v1.0.0",
-                "--include",
-                "*.json",
-                "*.yaml",
-                "--exclude",
-                "*.log",
-                "*.txt",
-                "--force-download",
-                "--cache-dir",
-                "/tmp",
-                "--token",
-                "my-token",
-                "--quiet",
-                "--local-dir",
-                ".",
-                "--max-workers",
-                "4",
-            ]
-        )
-        assert args.repo_id == DUMMY_MODEL_ID
-        assert args.repo_type == "dataset"
-        assert args.revision == "v1.0.0"
-        assert args.include == ["*.json", "*.yaml"]
-        assert args.exclude == ["*.log", "*.txt"]
-        assert args.force_download is True
-        assert args.cache_dir == "/tmp"
-        assert args.local_dir == "."
-        assert args.token == "my-token"
-        assert args.quiet is True
-        assert args.max_workers == 4
-        assert args.func == DownloadCommand
-
+        assert result.exit_code == 0
+        download_mock.assert_not_called()
+        snapshot_mock.assert_called_once()
+        kwargs = snapshot_mock.call_args.kwargs
+        assert kwargs["repo_id"] == DUMMY_MODEL_ID
+        assert kwargs["repo_type"] == "dataset"
+        assert kwargs["revision"] == "v1.0.0"
+        assert kwargs["allow_patterns"] == ["*.json", "*.yaml"]
+        assert kwargs["ignore_patterns"] == ["*.log", "*.txt"]
+        assert kwargs["force_download"] is True
+        assert kwargs["cache_dir"] == "/tmp"
+        assert kwargs["local_dir"] == "."
+        assert kwargs["token"] == "my-token"
+        assert kwargs["library_name"] == "hf"
+        assert kwargs["max_workers"] == 4
+
+
+class TestDownloadImpl:
+    @patch("huggingface_hub.cli.download.snapshot_download")
     @patch("huggingface_hub.cli.download.hf_hub_download")
-    def test_download_file_from_revision(self, mock: Mock) -> None:
-        args = Namespace(
-            token="hf_****",
-            repo_id="author/dataset",
-            filenames=["README.md"],
-            repo_type="dataset",
-            revision="refs/pr/1",
-            include=None,
-            exclude=None,
-            force_download=False,
-            cache_dir=None,
-            local_dir=".",
-            quiet=False,
-            max_workers=8,
-        )
-
-        # Output path is printed to terminal once run is completed
-        with capture_output() as output:
-            DownloadCommand(args).run()
-        self.assertRegex(output.getvalue(), r"<MagicMock name='hf_hub_download\(\)' id='\d+'>")
-
-        mock.assert_called_once_with(
-            repo_id="author/dataset",
-            repo_type="dataset",
-            revision="refs/pr/1",
-            filename="README.md",
+    def test_download_file_from_revision(self, mock_download: Mock, mock_snapshot: Mock) -> None:
+        mock_download.return_value = "file-path"
+        with patch("builtins.print") as print_mock:
+            download(
+                repo_id="author/model",
+                filenames=["config.json"],
+                repo_type=RepoType.model,
+                revision="main",
+                quiet=True,
+            )
+        print_mock.assert_called_once_with("file-path")
+        mock_download.assert_called_once_with(
+            repo_id="author/model",
+            repo_type="model",
+            revision="main",
+            filename="config.json",
             cache_dir=None,
             force_download=False,
-            token="hf_****",
-            local_dir=".",
+            token=None,
+            local_dir=None,
             library_name="hf",
+            dry_run=False,
         )
+        mock_snapshot.assert_not_called()
 
     @patch("huggingface_hub.cli.download.snapshot_download")
-    def test_download_multiple_files(self, mock: Mock) -> None:
-        args = Namespace(
-            token="hf_****",
-            repo_id="author/model",
-            filenames=["README.md", "config.json"],
-            repo_type="model",
-            revision=None,
-            include=None,
-            exclude=None,
-            force_download=True,
-            cache_dir=None,
-            local_dir="/path/to/dir",
-            quiet=False,
-            max_workers=8,
-        )
-        DownloadCommand(args).run()
-
-        # Use `snapshot_download` to ensure all files comes from same revision
-        mock.assert_called_once_with(
+    @patch("huggingface_hub.cli.download.hf_hub_download")
+    def test_download_multiple_files(self, mock_download: Mock, mock_snapshot: Mock) -> None:
+        mock_snapshot.return_value = "folder-path"
+        with patch("builtins.print") as print_mock:
+            download(
+                repo_id="author/model",
+                filenames=["README.md", "config.json"],
+                repo_type=RepoType.model,
+                force_download=True,
+                max_workers=4,
+                quiet=True,
+            )
+        print_mock.assert_called_once_with("folder-path")
+        mock_download.assert_not_called()
+        mock_snapshot.assert_called_once_with(
             repo_id="author/model",
             repo_type="model",
             revision=None,
@@ -508,184 +639,600 @@ def test_download_multiple_files(self, mock: Mock) -> None:
             ignore_patterns=None,
             force_download=True,
             cache_dir=None,
-            token="hf_****",
-            local_dir="/path/to/dir",
+            token=None,
+            local_dir=None,
             library_name="hf",
-            max_workers=8,
+            max_workers=4,
+            dry_run=False,
         )
 
     @patch("huggingface_hub.cli.download.snapshot_download")
-    def test_download_with_patterns(self, mock: Mock) -> None:
-        args = Namespace(
-            token=None,
+    def test_download_with_patterns(self, mock_snapshot: Mock) -> None:
+        with patch("builtins.print"):
+            download(
+                repo_id="author/model",
+                filenames=[],
+                repo_type=RepoType.model,
+                include=["*.json"],
+                exclude=["data/*"],
+                force_download=True,
+                quiet=True,
+            )
+        mock_snapshot.assert_called_once_with(
             repo_id="author/model",
-            filenames=[],
             repo_type="model",
             revision=None,
-            include=["*.json"],
-            exclude=["data/*"],
+            allow_patterns=["*.json"],
+            ignore_patterns=["data/*"],
             force_download=True,
             cache_dir=None,
-            quiet=False,
+            token=None,
             local_dir=None,
+            library_name="hf",
             max_workers=8,
+            dry_run=False,
         )
-        DownloadCommand(args).run()
 
-        # Use `snapshot_download` to ensure all files comes from same revision
-        mock.assert_called_once_with(
+    @patch("huggingface_hub.cli.download.snapshot_download")
+    @patch("huggingface_hub.cli.download.hf_hub_download")
+    def test_download_with_ignored_patterns(self, mock_download: Mock, mock_snapshot: Mock) -> None:
+        mock_snapshot.return_value = "folder-path"
+        with (
+            patch("builtins.print") as print_mock,
+            patch("huggingface_hub.cli.download.logging.set_verbosity_info"),
+            patch("huggingface_hub.cli.download.logging.set_verbosity_warning"),
+            warnings.catch_warnings(record=True) as caught,
+        ):
+            download(
+                repo_id="author/model",
+                filenames=["README.md", "config.json"],
+                repo_type=RepoType.model,
+                include=["*.json"],
+                exclude=["data/*"],
+                force_download=True,
+            )
+        print_mock.assert_called_once_with("folder-path")
+        assert any("Ignoring" in str(w.message) for w in caught)
+        mock_download.assert_not_called()
+        mock_snapshot.assert_called_once_with(
             repo_id="author/model",
             repo_type="model",
             revision=None,
-            allow_patterns=["*.json"],
-            ignore_patterns=["data/*"],
+            allow_patterns=["README.md", "config.json"],
+            ignore_patterns=None,
             force_download=True,
             cache_dir=None,
-            local_dir=None,
             token=None,
+            local_dir=None,
             library_name="hf",
             max_workers=8,
+            dry_run=False,
         )
 
-    @patch("huggingface_hub.cli.download.snapshot_download")
-    def test_download_with_ignored_patterns(self, mock: Mock) -> None:
-        args = Namespace(
-            token=None,
-            repo_id="author/model",
-            filenames=["README.md", "config.json"],
+
+class TestTagCommands:
+    def test_tag_create_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                ["repo", "tag", "create", DUMMY_MODEL_ID, "1.0", "-m", "My tag message"],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.create_tag.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            tag="1.0",
+            tag_message="My tag message",
+            revision=None,
             repo_type="model",
+        )
+
+    def test_tag_create_with_all_options(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                [
+                    "repo",
+                    "tag",
+                    "create",
+                    DUMMY_MODEL_ID,
+                    "1.0",
+                    "--message",
+                    "My tag message",
+                    "--revision",
+                    "v1.0.0",
+                    "--token",
+                    "my-token",
+                    "--repo-type",
+                    "dataset",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token="my-token")
+        api.create_tag.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            tag="1.0",
+            tag_message="My tag message",
+            revision="v1.0.0",
+            repo_type="dataset",
+        )
+
+    def test_tag_list_basic(self, runner: CliRunner) -> None:
+        refs = Mock(tags=[Mock(name="v1")])
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.list_repo_refs.return_value = refs
+            result = runner.invoke(app, ["repo", "tag", "list", DUMMY_MODEL_ID])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.list_repo_refs.assert_called_once_with(repo_id=DUMMY_MODEL_ID, repo_type="model")
+
+    def test_tag_delete_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                ["repo", "tag", "delete", DUMMY_MODEL_ID, "1.0"],
+                input="y\n",
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.delete_tag.assert_called_once_with(repo_id=DUMMY_MODEL_ID, tag="1.0", repo_type="model")
+
+
+class TestBranchCommands:
+    def test_branch_create_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(app, ["repo", "branch", "create", DUMMY_MODEL_ID, "dev"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.create_branch.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            branch="dev",
             revision=None,
-            include=["*.json"],
-            exclude=["data/*"],
-            force_download=True,
-            resume_download=True,
-            cache_dir=None,
-            quiet=False,
-            local_dir=None,
-            max_workers=8,
+            repo_type="model",
+            exist_ok=False,
         )
 
-        with self.assertWarns(UserWarning):
-            # warns that patterns are ignored
-            DownloadCommand(args).run()
+    def test_branch_create_with_all_options(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                [
+                    "repo",
+                    "branch",
+                    "create",
+                    DUMMY_MODEL_ID,
+                    "dev",
+                    "--repo-type",
+                    "dataset",
+                    "--revision",
+                    "v1.0.0",
+                    "--token",
+                    "my-token",
+                    "--exist-ok",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token="my-token")
+        api.create_branch.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            branch="dev",
+            revision="v1.0.0",
+            repo_type="dataset",
+            exist_ok=True,
+        )
 
-        mock.assert_called_once_with(
-            repo_id="author/model",
+    def test_branch_delete_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(app, ["repo", "branch", "delete", DUMMY_MODEL_ID, "dev"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.delete_branch.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            branch="dev",
             repo_type="model",
+        )
+
+    def test_branch_delete_with_all_options(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                [
+                    "repo",
+                    "branch",
+                    "delete",
+                    DUMMY_MODEL_ID,
+                    "dev",
+                    "--repo-type",
+                    "dataset",
+                    "--token",
+                    "my-token",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token="my-token")
+        api.delete_branch.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            branch="dev",
+            repo_type="dataset",
+        )
+
+
+class TestRepoMoveCommand:
+    def test_repo_move_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(app, ["repo", "move", DUMMY_MODEL_ID, "new-id"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.move_repo.assert_called_once_with(
+            from_id=DUMMY_MODEL_ID,
+            to_id="new-id",
+            repo_type="model",
+        )
+
+    def test_repo_move_with_all_options(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                [
+                    "repo",
+                    "move",
+                    DUMMY_MODEL_ID,
+                    "new-id",
+                    "--repo-type",
+                    "dataset",
+                    "--token",
+                    "my-token",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token="my-token")
+        api.move_repo.assert_called_once_with(
+            from_id=DUMMY_MODEL_ID,
+            to_id="new-id",
+            repo_type="dataset",
+        )
+
+
+class TestRepoSettingsCommand:
+    def test_repo_settings_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(app, ["repo", "settings", DUMMY_MODEL_ID])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.update_repo_settings.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            gated=None,
+            private=None,
+            xet_enabled=None,
+            repo_type="model",
+        )
+
+    def test_repo_settings_with_all_options(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                [
+                    "repo",
+                    "settings",
+                    DUMMY_MODEL_ID,
+                    "--gated",
+                    "manual",
+                    "--private",
+                    "--repo-type",
+                    "dataset",
+                    "--token",
+                    "my-token",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token="my-token")
+        kwargs = api.update_repo_settings.call_args.kwargs
+        assert kwargs["repo_id"] == DUMMY_MODEL_ID
+        assert kwargs["repo_type"] == "dataset"
+        assert kwargs["private"] is True
+        assert kwargs["xet_enabled"] is None
+        assert kwargs["gated"] == "manual"
+
+
+class TestRepoDeleteCommand:
+    def test_repo_delete_basic(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(app, ["repo", "delete", DUMMY_MODEL_ID])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.delete_repo.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            repo_type="model",
+            missing_ok=False,
+        )
+
+    def test_repo_delete_with_all_options(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.repo.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(
+                app,
+                [
+                    "repo",
+                    "delete",
+                    DUMMY_MODEL_ID,
+                    "--repo-type",
+                    "dataset",
+                    "--token",
+                    "my-token",
+                    "--missing-ok",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token="my-token")
+        api.delete_repo.assert_called_once_with(
+            repo_id=DUMMY_MODEL_ID,
+            repo_type="dataset",
+            missing_ok=True,
+        )
+
+
+class TestInferenceEndpointsCommands:
+    def test_list(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "demo"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.list_inference_endpoints.return_value = [endpoint]
+            result = runner.invoke(app, ["endpoints", "ls"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.list_inference_endpoints.assert_called_once_with(namespace=None, token=None)
+        assert '"items"' in result.stdout
+        assert '"name": "demo"' in result.stdout
+
+    def test_inference_endpoints_alias(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "alias"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.list_inference_endpoints.return_value = [endpoint]
+            result = runner.invoke(app, ["inference-endpoints", "ls"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.list_inference_endpoints.assert_called_once_with(namespace=None, token=None)
+        assert '"name": "alias"' in result.stdout
+
+    def test_deploy_from_hub(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "hub"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.create_inference_endpoint.return_value = endpoint
+            result = runner.invoke(
+                app,
+                [
+                    "endpoints",
+                    "deploy",
+                    "my-endpoint",
+                    "--repo",
+                    "my-repo",
+                    "--framework",
+                    "custom",
+                    "--accelerator",
+                    "cpu",
+                    "--instance-size",
+                    "x4",
+                    "--instance-type",
+                    "standard",
+                    "--region",
+                    "us-east-1",
+                    "--vendor",
+                    "aws",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.create_inference_endpoint.assert_called_once_with(
+            name="my-endpoint",
+            repository="my-repo",
+            framework="custom",
+            accelerator="cpu",
+            instance_size="x4",
+            instance_type="standard",
+            region="us-east-1",
+            vendor="aws",
+            namespace=None,
+            token=None,
+            task=None,
+        )
+        assert '"name": "hub"' in result.stdout
+
+    def test_deploy_from_catalog(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "catalog"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.create_inference_endpoint_from_catalog.return_value = endpoint
+            result = runner.invoke(
+                app,
+                [
+                    "endpoints",
+                    "catalog",
+                    "deploy",
+                    "catalog-endpoint",
+                    "--repo",
+                    "catalog/model",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.create_inference_endpoint_from_catalog.assert_called_once_with(
+            repo_id="catalog/model",
+            name="catalog-endpoint",
+            namespace=None,
+            token=None,
+        )
+        assert '"name": "catalog"' in result.stdout
+
+    def test_describe(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "describe"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.get_inference_endpoint.return_value = endpoint
+            result = runner.invoke(app, ["endpoints", "describe", "my-endpoint"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.get_inference_endpoint.assert_called_once_with(name="my-endpoint", namespace=None, token=None)
+        assert '"name": "describe"' in result.stdout
+
+    def test_update(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "updated"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.update_inference_endpoint.return_value = endpoint
+            result = runner.invoke(
+                app,
+                [
+                    "endpoints",
+                    "update",
+                    "my-endpoint",
+                    "--repo",
+                    "my-repo",
+                    "--accelerator",
+                    "gpu",
+                    "--instance-size",
+                    "x4",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.update_inference_endpoint.assert_called_once_with(
+            name="my-endpoint",
+            namespace=None,
+            repository="my-repo",
+            framework=None,
             revision=None,
-            allow_patterns=["README.md", "config.json"],  # `filenames` has priority over the patterns
-            ignore_patterns=None,  # cleaned up
-            force_download=True,
-            cache_dir=None,
+            task=None,
+            accelerator="gpu",
+            instance_size="x4",
+            instance_type=None,
+            min_replica=None,
+            max_replica=None,
+            scale_to_zero_timeout=None,
+            token=None,
+        )
+        assert '"name": "updated"' in result.stdout
+
+    def test_delete(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(app, ["endpoints", "delete", "my-endpoint", "--yes"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.delete_inference_endpoint.assert_called_once_with(name="my-endpoint", namespace=None, token=None)
+        assert "Deleted 'my-endpoint'." in result.stdout
+
+    def test_pause(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "paused"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.pause_inference_endpoint.return_value = endpoint
+            result = runner.invoke(app, ["endpoints", "pause", "my-endpoint"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.pause_inference_endpoint.assert_called_once_with(name="my-endpoint", namespace=None, token=None)
+        assert '"name": "paused"' in result.stdout
+
+    def test_resume(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "resumed"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.resume_inference_endpoint.return_value = endpoint
+            result = runner.invoke(app, ["endpoints", "resume", "my-endpoint"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.resume_inference_endpoint.assert_called_once_with(
+            name="my-endpoint",
+            namespace=None,
+            token=None,
+            running_ok=True,
+        )
+        assert '"name": "resumed"' in result.stdout
+
+    def test_resume_fail_if_already_running(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "resumed"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.resume_inference_endpoint.return_value = endpoint
+            result = runner.invoke(
+                app,
+                [
+                    "endpoints",
+                    "resume",
+                    "my-endpoint",
+                    "--fail-if-already-running",
+                ],
+            )
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.resume_inference_endpoint.assert_called_once_with(
+            name="my-endpoint",
+            namespace=None,
+            token=None,
+            running_ok=False,
+        )
+        assert '"name": "resumed"' in result.stdout
+
+    def test_scale_to_zero(self, runner: CliRunner) -> None:
+        endpoint = Mock(raw={"name": "zero"})
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.scale_to_zero_inference_endpoint.return_value = endpoint
+            result = runner.invoke(app, ["endpoints", "scale-to-zero", "my-endpoint"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.scale_to_zero_inference_endpoint.assert_called_once_with(
+            name="my-endpoint",
+            namespace=None,
             token=None,
-            local_dir=None,
-            library_name="hf",
-            max_workers=8,
         )
+        assert '"name": "zero"' in result.stdout
 
-        # Same but quiet (no warnings)
-        args.quiet = True
-        with warnings.catch_warnings():
-            # Taken from https://docs.pytest.org/en/latest/how-to/capture-warnings.html#additional-use-cases-of-warnings-in-tests
-            warnings.simplefilter("error")
-            DownloadCommand(args).run()
-
-
-class TestTagCommands(unittest.TestCase):
-    def setUp(self) -> None:
-        """
-        Set up CLI as in `src/huggingface_hub/cli/hf.py`.
-        """
-        self.parser = ArgumentParser("hf", usage="hf <command> [<args>]")
-        commands_parser = self.parser.add_subparsers()
-        RepoCommands.register_subcommand(commands_parser)
-
-    def test_tag_create_basic(self) -> None:
-        args = self.parser.parse_args(["repo", "tag", "create", DUMMY_MODEL_ID, "1.0", "-m", "My tag message"])
-        assert args.repo_id == DUMMY_MODEL_ID
-        assert args.tag == "1.0"
-        assert args.message is not None
-        assert args.revision is None
-        assert args.token is None
-        assert args.repo_type == "model"
-
-    def test_tag_create_with_all_options(self) -> None:
-        args = self.parser.parse_args(
-            [
-                "repo",
-                "tag",
-                "create",
-                DUMMY_MODEL_ID,
-                "1.0",
-                "--message",
-                "My tag message",
-                "--revision",
-                "v1.0.0",
-                "--token",
-                "my-token",
-                "--repo-type",
-                "dataset",
-            ]
-        )
-        assert args.repo_id == DUMMY_MODEL_ID
-        assert args.tag == "1.0"
-        assert args.message == "My tag message"
-        assert args.revision == "v1.0.0"
-        assert args.token == "my-token"
-        assert args.repo_type == "dataset"
-
-    def test_tag_list_basic(self) -> None:
-        args = self.parser.parse_args(["repo", "tag", "list", DUMMY_MODEL_ID])
-        assert args.repo_id == DUMMY_MODEL_ID
-        assert args.token is None
-        assert args.repo_type == "model"
-
-    def test_tag_delete_basic(self) -> None:
-        args = self.parser.parse_args(["repo", "tag", "delete", DUMMY_MODEL_ID, "1.0"])
-        assert args.repo_id == DUMMY_MODEL_ID
-        assert args.tag == "1.0"
-        assert args.token is None
-        assert args.repo_type == "model"
-        assert args.yes is False
+    def test_list_catalog(self, runner: CliRunner) -> None:
+        with patch("huggingface_hub.cli.inference_endpoints.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            api.list_inference_catalog.return_value = ["model"]
+            result = runner.invoke(app, ["endpoints", "catalog", "ls"])
+        assert result.exit_code == 0
+        api_cls.assert_called_once_with(token=None)
+        api.list_inference_catalog.assert_called_once_with(token=None)
+        assert '"models"' in result.stdout
+        assert '"model"' in result.stdout
 
 
 @contextmanager
 def tmp_current_directory() -> Generator[str, None, None]:
-    """Change current directory to a tmp dir and revert back when exiting."""
     with SoftTemporaryDirectory() as tmp_dir:
         cwd = os.getcwd()
         os.chdir(tmp_dir)
         try:
             yield tmp_dir
-        except:
-            raise
         finally:
             os.chdir(cwd)
 
 
-class TestRepoFilesCommand(unittest.TestCase):
-    def setUp(self) -> None:
-        """
-        Set up CLI as in `src/huggingface_hub/cli/hf.py`.
-        """
-        self.parser = ArgumentParser("hf", usage="hf <command> [<args>]")
-        commands_parser = self.parser.add_subparsers()
-        RepoFilesCommand.register_subcommand(commands_parser)
-
-    @patch("huggingface_hub.cli.repo_files.HfApi.delete_files")
-    def test_delete(self, delete_files_mock: Mock) -> None:
-        fixtures = [
-            {
-                "input_args": [
-                    "repo-files",
-                    "delete",
-                    DUMMY_MODEL_ID,
-                    "*",
-                ],
-                "delete_files_args": {
-                    "delete_patterns": [
-                        "*",
-                    ],
+class TestRepoFilesCommand:
+    @pytest.mark.parametrize(
+        "cli_args, expected_kwargs",
+        [
+            (
+                ["repo-files", "delete", DUMMY_MODEL_ID, "*"],
+                {
+                    "delete_patterns": ["*"],
                     "repo_id": DUMMY_MODEL_ID,
                     "repo_type": "model",
                     "revision": None,
@@ -693,18 +1240,11 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "commit_description": None,
                     "create_pr": False,
                 },
-            },
-            {
-                "input_args": [
-                    "repo-files",
-                    "delete",
-                    DUMMY_MODEL_ID,
-                    "file.txt",
-                ],
-                "delete_files_args": {
-                    "delete_patterns": [
-                        "file.txt",
-                    ],
+            ),
+            (
+                ["repo-files", "delete", DUMMY_MODEL_ID, "file.txt"],
+                {
+                    "delete_patterns": ["file.txt"],
                     "repo_id": DUMMY_MODEL_ID,
                     "repo_type": "model",
                     "revision": None,
@@ -712,18 +1252,11 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "commit_description": None,
                     "create_pr": False,
                 },
-            },
-            {
-                "input_args": [
-                    "repo-files",
-                    "delete",
-                    DUMMY_MODEL_ID,
-                    "folder/",
-                ],
-                "delete_files_args": {
-                    "delete_patterns": [
-                        "folder/",
-                    ],
+            ),
+            (
+                ["repo-files", "delete", DUMMY_MODEL_ID, "folder/"],
+                {
+                    "delete_patterns": ["folder/"],
                     "repo_id": DUMMY_MODEL_ID,
                     "repo_type": "model",
                     "revision": None,
@@ -731,17 +1264,10 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "commit_description": None,
                     "create_pr": False,
                 },
-            },
-            {
-                "input_args": [
-                    "repo-files",
-                    "delete",
-                    DUMMY_MODEL_ID,
-                    "file1.txt",
-                    "folder/",
-                    "file2.txt",
-                ],
-                "delete_files_args": {
+            ),
+            (
+                ["repo-files", "delete", DUMMY_MODEL_ID, "file1.txt", "folder/", "file2.txt"],
+                {
                     "delete_patterns": [
                         "file1.txt",
                         "folder/",
@@ -754,9 +1280,9 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "commit_description": None,
                     "create_pr": False,
                 },
-            },
-            {
-                "input_args": [
+            ),
+            (
+                [
                     "repo-files",
                     "delete",
                     DUMMY_MODEL_ID,
@@ -764,7 +1290,7 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "*.json",
                     "folder/*.parquet",
                 ],
-                "delete_files_args": {
+                {
                     "delete_patterns": [
                         "file.txt *",
                         "*.json",
@@ -777,9 +1303,9 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "commit_description": None,
                     "create_pr": False,
                 },
-            },
-            {
-                "input_args": [
+            ),
+            (
+                [
                     "repo-files",
                     "delete",
                     DUMMY_MODEL_ID,
@@ -794,10 +1320,8 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "My commit description",
                     "--create-pr",
                 ],
-                "delete_files_args": {
-                    "delete_patterns": [
-                        "file.txt *",
-                    ],
+                {
+                    "delete_patterns": ["file.txt *"],
                     "repo_id": DUMMY_MODEL_ID,
                     "repo_type": "dataset",
                     "revision": "test_revision",
@@ -805,182 +1329,138 @@ def test_delete(self, delete_files_mock: Mock) -> None:
                     "commit_description": "My commit description",
                     "create_pr": True,
                 },
-            },
-        ]
-
-        for expected in fixtures:
-            # subTest is similar to pytest.mark.parametrize, but using the unittest
-            # framework
-            with self.subTest(expected):
-                delete_files_args = expected["delete_files_args"]
-
-                cmd = DeleteFilesSubCommand(self.parser.parse_args(expected["input_args"]))
-                cmd.run()
-
-                if delete_files_args is None:
-                    assert delete_files_mock.call_count == 0
-                else:
-                    assert delete_files_mock.call_count == 1
-                    # Inspect the captured calls
-                    _, kwargs = delete_files_mock.call_args_list[0]
-                    assert kwargs == delete_files_args
-
-                delete_files_mock.reset_mock()
-
-
-class DummyResponse:
-    def __init__(self, json):
-        self._json = json
-
-    def raise_for_status(self):
-        pass
-
-    def json(self):
-        return self._json
-
-
-class DummyCommit:
-    def __init__(self, oid: str):
-        self.oid = oid
-
-
-class TestJobsCommand(unittest.TestCase):
-    def setUp(self) -> None:
-        """
-        Set up CLI as in `src/huggingface_hub/commands/huggingface_cli.py`.
-        """
-        self.parser = ArgumentParser("hf", usage="hf <command> [<args>]")
-        commands_parser = self.parser.add_subparsers()
-        JobsCommands.register_subcommand(commands_parser)
-
-    patch_requests_post = patch(
-        "requests.Session.post",
-        return_value=DummyResponse(
-            {
-                "id": "my-job-id",
-                "owner": {
-                    "id": "userid",
-                    "name": "my-username",
-                    "type": "user",
-                },
-                "status": {"stage": "RUNNING"},
-            }
-        ),
+            ),
+        ],
     )
-    patch_whoami = patch("huggingface_hub.hf_api.HfApi.whoami", return_value={"name": "my-username"})
-    patch_get_token = patch("huggingface_hub.hf_api.get_token", return_value="hf_xxx")
-    patch_repo_info = patch("huggingface_hub.hf_api.HfApi.repo_info")
-    patch_upload_file = patch("huggingface_hub.hf_api.HfApi.upload_file", return_value=DummyCommit(oid="ae068f"))
-
-    @patch_requests_post
-    @patch_whoami
-    def test_run(self, whoami: Mock, requests_post: Mock) -> None:
-        input_args = ["jobs", "run", "--detach", "ubuntu", "echo", "hello"]
-        cmd = RunCommand(self.parser.parse_args(input_args))
-        cmd.run()
-        assert requests_post.call_count == 1
-        args, kwargs = requests_post.call_args_list[0]
-        assert args == ("https://huggingface.co/api/jobs/my-username",)
-        assert kwargs["json"] == {
-            "command": ["echo", "hello"],
-            "arguments": [],
-            "environment": {},
-            "flavor": "cpu-basic",
-            "dockerImage": "ubuntu",
-        }
-
-    @patch(
-        "requests.Session.post",
-        return_value=DummyResponse(
-            {
-                "id": "my-job-id",
-                "owner": {
-                    "id": "userid",
-                    "name": "my-username",
-                    "type": "user",
-                },
-                "status": {"lastJob": None, "nextJobRunAt": "2025-08-20T15:35:00.000Z"},
-                "jobSpec": {},
-            }
-        ),
-    )
-    @patch("huggingface_hub.hf_api.HfApi.whoami", return_value={"name": "my-username"})
-    def test_create_scheduled_job(self, whoami: Mock, requests_post: Mock) -> None:
-        input_args = ["jobs", "scheduled", "run", "@hourly", "ubuntu", "echo", "hello"]
-        cmd = ScheduledRunCommand(self.parser.parse_args(input_args))
-        cmd.run()
-        assert requests_post.call_count == 1
-        args, kwargs = requests_post.call_args_list[0]
-        assert args == ("https://huggingface.co/api/scheduled-jobs/my-username",)
-        assert kwargs["json"] == {
-            "jobSpec": {
-                "command": ["echo", "hello"],
-                "arguments": [],
-                "environment": {},
-                "flavor": "cpu-basic",
-                "dockerImage": "ubuntu",
-            },
-            "schedule": "@hourly",
-        }
-
-    @patch_requests_post
-    @patch_whoami
-    def test_uv_command(self, whoami: Mock, requests_post: Mock) -> None:
-        input_args = ["jobs", "uv", "run", "--detach", "echo", "hello"]
-        cmd = UvCommand(self.parser.parse_args(input_args))
-        cmd.run()
-        assert requests_post.call_count == 1
-        args, kwargs = requests_post.call_args_list[0]
-        assert args == ("https://huggingface.co/api/jobs/my-username",)
-        assert kwargs["json"] == {
-            "command": ["uv", "run", "echo", "hello"],
-            "arguments": [],
-            "environment": {},
-            "flavor": "cpu-basic",
-            "dockerImage": "ghcr.io/astral-sh/uv:python3.12-bookworm",
-        }
-
-    @patch_requests_post
-    @patch_whoami
-    def test_uv_remote_script(self, whoami: Mock, requests_post: Mock) -> None:
-        input_args = ["jobs", "uv", "run", "--detach", "https://.../script.py"]
-        cmd = UvCommand(self.parser.parse_args(input_args))
-        cmd.run()
-        assert requests_post.call_count == 1
-        args, kwargs = requests_post.call_args_list[0]
-        assert args == ("https://huggingface.co/api/jobs/my-username",)
-        assert kwargs["json"] == {
-            "command": ["uv", "run", "https://.../script.py"],
-            "arguments": [],
-            "environment": {},
-            "flavor": "cpu-basic",
-            "dockerImage": "ghcr.io/astral-sh/uv:python3.12-bookworm",
-        }
-
-    @patch_requests_post
-    @patch_whoami
-    @patch_get_token
-    @patch_repo_info
-    @patch_upload_file
-    def test_uv_local_script(
-        self, upload_file: Mock, repo_info: Mock, get_token: Mock, whoami: Mock, requests_post: Mock
-    ) -> None:
-        input_args = ["jobs", "uv", "run", "--detach", __file__]
-        cmd = UvCommand(self.parser.parse_args(input_args))
-        cmd.run()
-        assert requests_post.call_count == 1
-        args, kwargs = requests_post.call_args_list[0]
-        assert args == ("https://huggingface.co/api/jobs/my-username",)
-        command = kwargs["json"].pop("command")
-        assert "UV_SCRIPT_URL" in " ".join(command)
-        assert kwargs["json"] == {
-            "arguments": [],
-            "environment": {
-                "UV_SCRIPT_URL": "https://hub-ci.huggingface.co/datasets/my-username/hf-cli-jobs-uv-run-scripts/resolve/ae068f/test_cli.py"
-            },
-            "secrets": {"UV_SCRIPT_HF_TOKEN": "hf_xxx"},
-            "flavor": "cpu-basic",
-            "dockerImage": "ghcr.io/astral-sh/uv:python3.12-bookworm",
-        }
-        assert repo_info.call_count == 1  # check if repo exists
-        assert upload_file.call_count == 2  # script and readme
+    def test_delete(self, runner: CliRunner, cli_args: list[str], expected_kwargs: dict[str, object]) -> None:
+        with patch("huggingface_hub.cli.repo_files.get_hf_api") as api_cls:
+            api = api_cls.return_value
+            result = runner.invoke(app, cli_args)
+        assert result.exit_code == 0
+        api.delete_files.assert_called_once_with(**expected_kwargs)
+
+
+class TestJobsCommand:
+    def test_run(self, runner: CliRunner) -> None:
+        job = Mock(id="my-job-id", url="https://huggingface.co/api/jobs/my-username/my-job-id")
+        with (
+            patch("huggingface_hub.cli.jobs.get_hf_api") as api_cls,
+            patch("huggingface_hub.cli.jobs._get_extended_environ", return_value={}),
+        ):
+            api = api_cls.return_value
+            api.run_job.return_value = job
+            result = runner.invoke(app, ["jobs", "run", "--detach", "ubuntu", "echo", "hello"])
+        assert result.exit_code == 0
+        api.run_job.assert_called_once_with(
+            image="ubuntu",
+            command=["echo", "hello"],
+            env={},
+            secrets={},
+            flavor=None,
+            timeout=None,
+            namespace=None,
+        )
+        api.fetch_job_logs.assert_not_called()
+
+    def test_create_scheduled_job(self, runner: CliRunner) -> None:
+        scheduled_job = Mock(id="my-job-id")
+        with (
+            patch("huggingface_hub.cli.jobs.get_hf_api") as api_cls,
+            patch("huggingface_hub.cli.jobs._get_extended_environ", return_value={}),
+        ):
+            api = api_cls.return_value
+            api.create_scheduled_job.return_value = scheduled_job
+            result = runner.invoke(
+                app,
+                ["jobs", "scheduled", "run", "@hourly", "ubuntu", "echo", "hello"],
+            )
+        assert result.exit_code == 0
+        api.create_scheduled_job.assert_called_once_with(
+            image="ubuntu",
+            command=["echo", "hello"],
+            schedule="@hourly",
+            suspend=None,
+            concurrency=None,
+            env={},
+            secrets={},
+            flavor=None,
+            timeout=None,
+            namespace=None,
+        )
+
+    def test_uv_command(self, runner: CliRunner) -> None:
+        job = Mock(id="my-job-id", url="https://huggingface.co/api/jobs/my-username/my-job-id")
+        with (
+            patch("huggingface_hub.cli.jobs.get_hf_api") as api_cls,
+            patch("huggingface_hub.cli.jobs._get_extended_environ", return_value={}),
+        ):
+            api = api_cls.return_value
+            api.run_uv_job.return_value = job
+            result = runner.invoke(app, ["jobs", "uv", "run", "--detach", "echo", "hello"])
+        assert result.exit_code == 0
+        api.run_uv_job.assert_called_once_with(
+            script="echo",
+            script_args=["hello"],
+            dependencies=None,
+            python=None,
+            image=None,
+            env={},
+            secrets={},
+            flavor=None,
+            timeout=None,
+            namespace=None,
+            _repo=None,
+        )
+        api.fetch_job_logs.assert_not_called()
+
+    def test_uv_remote_script(self, runner: CliRunner) -> None:
+        job = Mock(id="my-job-id", url="https://huggingface.co/api/jobs/my-username/my-job-id")
+        with (
+            patch("huggingface_hub.cli.jobs.get_hf_api") as api_cls,
+            patch("huggingface_hub.cli.jobs._get_extended_environ", return_value={}),
+        ):
+            api = api_cls.return_value
+            api.run_uv_job.return_value = job
+            result = runner.invoke(app, ["jobs", "uv", "run", "--detach", "https://.../script.py"])
+        assert result.exit_code == 0
+        api.run_uv_job.assert_called_once_with(
+            script="https://.../script.py",
+            script_args=[],
+            dependencies=None,
+            python=None,
+            image=None,
+            env={},
+            secrets={},
+            flavor=None,
+            timeout=None,
+            namespace=None,
+            _repo=None,
+        )
+
+    def test_uv_local_script(self, runner: CliRunner, tmp_path: Path) -> None:
+        script_path = tmp_path / "script.py"
+        script_path.write_text("print('hello')")
+        job = Mock(id="my-job-id", url="https://huggingface.co/api/jobs/my-username/my-job-id")
+        with (
+            patch("huggingface_hub.cli.jobs.get_hf_api") as api_cls,
+            patch("huggingface_hub.cli.jobs._get_extended_environ", return_value={}),
+            patch("huggingface_hub.cli.jobs.get_token", return_value="hf_xxx"),
+        ):
+            api = api_cls.return_value
+            api.run_uv_job.return_value = job
+            result = runner.invoke(app, ["jobs", "uv", "run", "--detach", str(script_path)])
+        assert result.exit_code == 0
+        api.run_uv_job.assert_called_once_with(
+            script=str(script_path),
+            script_args=[],
+            dependencies=None,
+            python=None,
+            image=None,
+            env={},
+            secrets={},
+            flavor=None,
+            timeout=None,
+            namespace=None,
+            _repo=None,
+        )
+        api.fetch_job_logs.assert_not_called()
diff --git a/tests/test_command_delete_cache.py b/tests/test_command_delete_cache.py
index 1d82b50ad8..49041ea6ec 100644
--- a/tests/test_command_delete_cache.py
+++ b/tests/test_command_delete_cache.py
@@ -7,9 +7,8 @@
 from InquirerPy.base.control import Choice
 from InquirerPy.separator import Separator
 
-from huggingface_hub.commands.delete_cache import (
+from huggingface_hub.cli.cache import (
     _CANCEL_DELETION_STR,
-    DeleteCacheCommand,
     _ask_for_confirmation_no_tui,
     _get_expectations_str,
     _get_tui_choices_from_scan,
@@ -18,17 +17,15 @@
 )
 from huggingface_hub.utils import SoftTemporaryDirectory, capture_output
 
-from .testing_utils import handle_injection
-
 
 class TestDeleteCacheHelpers(unittest.TestCase):
     def test_get_tui_choices_from_scan_empty(self) -> None:
         choices = _get_tui_choices_from_scan(repos={}, preselected=[], sort_by=None)
-        self.assertEqual(len(choices), 1)
-        self.assertIsInstance(choices[0], Choice)
-        self.assertEqual(choices[0].value, _CANCEL_DELETION_STR)
-        self.assertTrue(len(choices[0].name) != 0)  # Something displayed to the user
-        self.assertFalse(choices[0].enabled)
+        assert len(choices) == 1
+        assert isinstance(choices[0], Choice)
+        assert choices[0].value == _CANCEL_DELETION_STR
+        assert len(choices[0].name) != 0  # Something displayed to the user
+        assert not choices[0].enabled
 
     def test_get_tui_choices_from_scan_with_preselection(self) -> None:
         choices = _get_tui_choices_from_scan(
@@ -40,76 +37,72 @@ def test_get_tui_choices_from_scan_with_preselection(self) -> None:
             ],
             sort_by=None,  # Don't sort to maintain original order
         )
-        self.assertEqual(len(choices), 8)
+        assert len(choices) == 8
 
         # Item to cancel everything
-        self.assertIsInstance(choices[0], Choice)
-        self.assertEqual(choices[0].value, _CANCEL_DELETION_STR)
-        self.assertTrue(len(choices[0].name) != 0)
-        self.assertFalse(choices[0].enabled)
+        assert isinstance(choices[0], Choice)
+        assert choices[0].value == _CANCEL_DELETION_STR
+        assert len(choices[0].name) != 0
+        assert not choices[0].enabled
 
         # Dataset repo separator
-        self.assertIsInstance(choices[1], Separator)
-        self.assertEqual(choices[1]._line, "\nDataset dummy_dataset (8M, used 2 weeks ago)")
+        assert isinstance(choices[1], Separator)
+        assert choices[1]._line == "\nDataset dummy_dataset (8M, used 2 weeks ago)"
 
         # Only revision of `dummy_dataset`
-        self.assertIsInstance(choices[2], Choice)
-        self.assertEqual(choices[2].value, "dataset_revision_hash_id")
-        self.assertEqual(
-            choices[2].name,
-            # truncated hash id + detached + last modified
-            "dataset_: (detached) # modified 1 day ago",
-        )
-        self.assertTrue(choices[2].enabled)  # preselected
+        assert isinstance(choices[2], Choice)
+        assert choices[2].value == "dataset_revision_hash_id"
+        assert choices[2].name == "dataset_: (detached) # modified 1 day ago"
+        assert choices[2].enabled  # preselected
 
         # Model `dummy_model` separator
-        self.assertIsInstance(choices[3], Separator)
-        self.assertEqual(choices[3]._line, "\nModel dummy_model (1.4K, used 2 years ago)")
+        assert isinstance(choices[3], Separator)
+        assert choices[3]._line == "\nModel dummy_model (1.4K, used 2 years ago)"
 
         # Recent revision of `dummy_model` (appears first due to sorting by last_modified)
-        self.assertIsInstance(choices[4], Choice)
-        self.assertEqual(choices[4].value, "recent_hash_id")
-        self.assertEqual(choices[4].name, "recent_h: main # modified 2 years ago")
-        self.assertFalse(choices[4].enabled)
+        assert isinstance(choices[4], Choice)
+        assert choices[4].value == "recent_hash_id"
+        assert choices[4].name == "recent_h: main # modified 2 years ago"
+        assert not choices[4].enabled
 
         # Oldest revision of `dummy_model`
-        self.assertIsInstance(choices[5], Choice)
-        self.assertEqual(choices[5].value, "older_hash_id")
-        self.assertEqual(choices[5].name, "older_ha: (detached) # modified 3 years ago")
-        self.assertTrue(choices[5].enabled)  # preselected
+        assert isinstance(choices[5], Choice)
+        assert choices[5].value == "older_hash_id"
+        assert choices[5].name == "older_ha: (detached) # modified 3 years ago"
+        assert choices[5].enabled  # preselected
 
         # Model `gpt2` separator
-        self.assertIsInstance(choices[6], Separator)
-        self.assertEqual(choices[6]._line, "\nModel gpt2 (3.6G, used 2 hours ago)")
+        assert isinstance(choices[6], Separator)
+        assert choices[6]._line == "\nModel gpt2 (3.6G, used 2 hours ago)"
 
         # Only revision of `gpt2`
-        self.assertIsInstance(choices[7], Choice)
-        self.assertEqual(choices[7].value, "abcdef123456789")
-        self.assertEqual(choices[7].name, "abcdef12: main, refs/pr/1 # modified 2 years ago")
-        self.assertFalse(choices[7].enabled)
+        assert isinstance(choices[7], Choice)
+        assert choices[7].value == "abcdef123456789"
+        assert choices[7].name == "abcdef12: main, refs/pr/1 # modified 2 years ago"
+        assert not choices[7].enabled
 
     def test_get_tui_choices_from_scan_with_sort_size(self) -> None:
         """Test sorting by size."""
         choices = _get_tui_choices_from_scan(repos=_get_cache_mock().repos, preselected=[], sort_by="size")
 
         # Verify repo order: gpt2 (3.6G) -> dummy_dataset (8M) -> dummy_model (1.4K)
-        self.assertIsInstance(choices[1], Separator)
-        self.assertIn("gpt2", choices[1]._line)
+        assert isinstance(choices[1], Separator)
+        assert "gpt2" in choices[1]._line
 
-        self.assertIsInstance(choices[3], Separator)
-        self.assertIn("dummy_dataset", choices[3]._line)
+        assert isinstance(choices[3], Separator)
+        assert "dummy_dataset" in choices[3]._line
 
-        self.assertIsInstance(choices[5], Separator)
-        self.assertIn("dummy_model", choices[5]._line)
+        assert isinstance(choices[5], Separator)
+        assert "dummy_model" in choices[5]._line
 
     def test_get_expectations_str_on_no_deletion_item(self) -> None:
         """Test `_get_instructions` when `_CANCEL_DELETION_STR` is passed."""
-        self.assertEqual(
+        assert (
             _get_expectations_str(
                 hf_cache_info=Mock(),
                 selected_hashes=["hash_1", _CANCEL_DELETION_STR, "hash_2"],
-            ),
-            "Nothing will be deleted.",
+            )
+            == "Nothing will be deleted."
         )
 
     def test_get_expectations_str_with_selection(self) -> None:
@@ -120,12 +113,12 @@ def test_get_expectations_str_with_selection(self) -> None:
         cache_mock = Mock()
         cache_mock.delete_revisions.return_value = strategy_mock
 
-        self.assertEqual(
+        assert (
             _get_expectations_str(
                 hf_cache_info=cache_mock,
                 selected_hashes=["hash_1", "hash_2"],
-            ),
-            "2 revisions selected counting for 5.1M.",
+            )
+            == "2 revisions selected counting for 5.1M."
         )
         cache_mock.delete_revisions.assert_called_once_with("hash_1", "hash_2")
 
@@ -154,19 +147,16 @@ def test_read_manual_review_tmp_file(self) -> None:
 
             # Only non-commented lines are returned
             # Order is kept and lines are not de-duplicated
-            self.assertListEqual(
-                _read_manual_review_tmp_file(tmp_path),
-                [
-                    "a_revision_hash",
-                    "a_revision_hash_with_a_comment",
-                    "a_revision_hash_after_spaces",
-                    "a_revision_hash_with_a_comment_after_spaces",
-                    "a_revision_hash",
-                ],
-            )
-
-    @patch("huggingface_hub.commands.delete_cache.input")
-    @patch("huggingface_hub.commands.delete_cache.mkstemp")
+            assert _read_manual_review_tmp_file(tmp_path) == [
+                "a_revision_hash",
+                "a_revision_hash_with_a_comment",
+                "a_revision_hash_after_spaces",
+                "a_revision_hash_with_a_comment_after_spaces",
+                "a_revision_hash",
+            ]
+
+    @patch("huggingface_hub.cli.cache.input")
+    @patch("huggingface_hub.cli.cache.mkstemp")
     def test_manual_review_no_tui(self, mock_mkstemp: Mock, mock_input: Mock) -> None:
         # Mock file creation so that we know the file location in test
         fd, tmp_path = mkstemp()
@@ -183,16 +173,16 @@ def _input_answers():
             self.assertTrue(content.startswith("# INSTRUCTIONS"))
 
             # older_hash_id is not commented
-            self.assertIn("\n    older_hash_id # Refs: (detached)", content)
+            self.assertIn("\n   older_hash_id # Refs: (detached)", content)
             # same for abcdef123456789
-            self.assertIn("\n    abcdef123456789 # Refs: main, refs/pr/1", content)
+            self.assertIn("\n   abcdef123456789 # Refs: main, refs/pr/1", content)
             # dataset revision is not preselected
-            self.assertIn("#    dataset_revision_hash_id", content)
+            self.assertIn("#   dataset_revision_hash_id", content)
             # same for recent_hash_id
-            self.assertIn("#    recent_hash_id", content)
+            self.assertIn("#   recent_hash_id", content)
 
             # Select dataset revision
-            content = content.replace("#    dataset_revision_hash_id", "dataset_revision_hash_id")
+            content = content.replace("#   dataset_revision_hash_id", "dataset_revision_hash_id")
             # Deselect abcdef123456789
             content = content.replace("abcdef123456789", "# abcdef123456789")
             with open(tmp_path, "w") as f:
@@ -206,49 +196,48 @@ def _input_answers():
         # Run manual review
         with capture_output() as output:
             selected_hashes = _manual_review_no_tui(
-                hf_cache_info=cache_mock, preselected=["abcdef123456789", "older_hash_id"], sort_by=None
+                hf_cache_info=cache_mock,
+                preselected=["abcdef123456789", "older_hash_id"],
+                sort_by=None,
             )
 
         # Tmp file has been created but is now deleted
         mock_mkstemp.assert_called_once_with(suffix=".txt")
-        self.assertFalse(os.path.isfile(tmp_path))  # now deleted
+        assert not os.path.isfile(tmp_path)  # now deleted
 
         # User changed the selection
-        self.assertListEqual(selected_hashes, ["dataset_revision_hash_id", "older_hash_id"])
+        assert selected_hashes == ["dataset_revision_hash_id", "older_hash_id"]
 
         # Check printed instructions
         printed = output.getvalue()
-        self.assertTrue(printed.startswith("TUI is disabled. In order to"))  # ...
-        self.assertIn(tmp_path, printed)
+        assert printed.startswith("TUI is disabled. In order to")
+        assert str(tmp_path) in printed
 
         # Check input called twice
-        self.assertEqual(mock_input.call_count, 2)
+        assert mock_input.call_count == 2
 
-    @patch("huggingface_hub.commands.delete_cache.input")
+    @patch("huggingface_hub.cli.cache.input")
     def test_ask_for_confirmation_no_tui(self, mock_input: Mock) -> None:
         """Test `_ask_for_confirmation_no_tui`."""
         # Answer yes
         mock_input.side_effect = ("y",)
         value = _ask_for_confirmation_no_tui("custom message 1", default=True)
         mock_input.assert_called_with("custom message 1 (Y/n) ")
-        self.assertTrue(value)
+        assert value
 
         # Answer no
         mock_input.side_effect = ("NO",)
         value = _ask_for_confirmation_no_tui("custom message 2", default=True)
         mock_input.assert_called_with("custom message 2 (Y/n) ")
-        self.assertFalse(value)
+        assert not value
 
         # Answer invalid, then default
         mock_input.side_effect = ("foo", "")
         with capture_output() as output:
             value = _ask_for_confirmation_no_tui("custom message 3", default=False)
         mock_input.assert_called_with("custom message 3 (y/N) ")
-        self.assertFalse(value)
-        self.assertEqual(
-            output.getvalue(),
-            "Invalid input. Must be one of ('y', 'yes', '1', 'n', 'no', '0', '')\n",
-        )
+        assert not value
+        assert output.getvalue() == "Invalid input. Must be one of ('y', 'yes', '1', 'n', 'no', '0', '')\n"
 
     def test_get_tui_choices_from_scan_with_different_sorts(self) -> None:
         """Test different sorting modes."""
@@ -257,185 +246,42 @@ def test_get_tui_choices_from_scan_with_different_sorts(self) -> None:
         # Test size sorting (largest first) - order: gpt2 (3.6G) -> dummy_dataset (8M) -> dummy_model (1.4K)
         size_choices = _get_tui_choices_from_scan(cache_mock.repos, [], sort_by="size")
         # Separators at positions 1, 3, 5
-        self.assertIsInstance(size_choices[1], Separator)
-        self.assertIn("gpt2", size_choices[1]._line)
-        self.assertIsInstance(size_choices[3], Separator)
-        self.assertIn("dummy_dataset", size_choices[3]._line)
-        self.assertIsInstance(size_choices[5], Separator)
-        self.assertIn("dummy_model", size_choices[5]._line)
+        assert isinstance(size_choices[1], Separator)
+        assert "gpt2" in size_choices[1]._line
+        assert isinstance(size_choices[3], Separator)
+        assert "dummy_dataset" in size_choices[3]._line
+        assert isinstance(size_choices[5], Separator)
+        assert "dummy_model" in size_choices[5]._line
 
         # Test alphabetical sorting - order: dummy_dataset -> dummy_model -> gpt2
         alpha_choices = _get_tui_choices_from_scan(cache_mock.repos, [], sort_by="alphabetical")
         # Separators at positions 1, 3, 6 (dummy_model has 2 revisions)
-        self.assertIsInstance(alpha_choices[1], Separator)
-        self.assertIn("dummy_dataset", alpha_choices[1]._line)
-        self.assertIsInstance(alpha_choices[3], Separator)
-        self.assertIn("dummy_model", alpha_choices[3]._line)
-        self.assertIsInstance(alpha_choices[6], Separator)
-        self.assertIn("gpt2", alpha_choices[6]._line)
+        assert isinstance(alpha_choices[1], Separator)
+        assert "dummy_dataset" in alpha_choices[1]._line
+        assert isinstance(alpha_choices[3], Separator)
+        assert "dummy_model" in alpha_choices[3]._line
+        assert isinstance(alpha_choices[6], Separator)
+        assert "gpt2" in alpha_choices[6]._line
 
         # Test lastUpdated sorting - order: dummy_dataset (1 day) -> gpt2 (2 years) -> dummy_model (3 years)
         updated_choices = _get_tui_choices_from_scan(cache_mock.repos, [], sort_by="lastUpdated")
         # Separators at positions 1, 3, 5
-        self.assertIsInstance(updated_choices[1], Separator)
-        self.assertIn("dummy_dataset", updated_choices[1]._line)
-        self.assertIsInstance(updated_choices[3], Separator)
-        self.assertIn("gpt2", updated_choices[3]._line)
-        self.assertIsInstance(updated_choices[5], Separator)
-        self.assertIn("dummy_model", updated_choices[5]._line)
+        assert isinstance(updated_choices[1], Separator)
+        assert "dummy_dataset" in updated_choices[1]._line
+        assert isinstance(updated_choices[3], Separator)
+        assert "gpt2" in updated_choices[3]._line
+        assert isinstance(updated_choices[5], Separator)
+        assert "dummy_model" in updated_choices[5]._line
 
         # Test lastUsed sorting - order: gpt2 (2h) -> dummy_dataset (2w) -> dummy_model (2y)
         used_choices = _get_tui_choices_from_scan(cache_mock.repos, [], sort_by="lastUsed")
         # Separators at positions 1, 3, 5
-        self.assertIsInstance(used_choices[1], Separator)
-        self.assertIn("gpt2", used_choices[1]._line)
-        self.assertIsInstance(used_choices[3], Separator)
-        self.assertIn("dummy_dataset", used_choices[3]._line)
-        self.assertIsInstance(used_choices[5], Separator)
-        self.assertIn("dummy_model", used_choices[5]._line)
-
-
-@patch("huggingface_hub.commands.delete_cache._ask_for_confirmation_no_tui")
-@patch("huggingface_hub.commands.delete_cache._get_expectations_str")
-@patch("huggingface_hub.commands.delete_cache.inquirer.confirm")
-@patch("huggingface_hub.commands.delete_cache._manual_review_tui")
-@patch("huggingface_hub.commands.delete_cache._manual_review_no_tui")
-@patch("huggingface_hub.commands.delete_cache.scan_cache_dir")
-@handle_injection
-class TestMockedDeleteCacheCommand(unittest.TestCase):
-    """Test case with a patched `DeleteCacheCommand` to test `.run()` without testing
-    the manual review.
-    """
-
-    args: Mock
-    command: DeleteCacheCommand
-
-    def setUp(self) -> None:
-        self.args = Mock()
-        self.args.sort = None
-        self.command = DeleteCacheCommand(self.args)
-
-    def test_run_and_delete_with_tui(
-        self,
-        mock_scan_cache_dir: Mock,
-        mock__manual_review_tui: Mock,
-        mock__get_expectations_str: Mock,
-        mock_confirm: Mock,
-    ) -> None:
-        """Test command run with a mocked manual review step."""
-        # Mock return values
-        mock__manual_review_tui.return_value = ["hash_1", "hash_2"]
-        mock__get_expectations_str.return_value = "Will delete A and B."
-        mock_confirm.return_value.execute.return_value = True
-        mock_scan_cache_dir.return_value = _get_cache_mock()
-
-        # Run
-        self.command.disable_tui = False
-        with capture_output() as output:
-            self.command.run()
-
-        # Step 1: scan
-        mock_scan_cache_dir.assert_called_once_with(self.args.dir)
-        cache_mock = mock_scan_cache_dir.return_value
-
-        # Step 2: manual review
-        mock__manual_review_tui.assert_called_once_with(cache_mock, preselected=[], sort_by=None)
-
-        # Step 3: ask confirmation
-        mock__get_expectations_str.assert_called_once_with(cache_mock, ["hash_1", "hash_2"])
-        mock_confirm.assert_called_once_with("Will delete A and B. Confirm deletion ?", default=True)
-        mock_confirm().execute.assert_called_once_with()
-
-        # Step 4: delete
-        cache_mock.delete_revisions.assert_called_once_with("hash_1", "hash_2")
-        strategy_mock = cache_mock.delete_revisions.return_value
-        strategy_mock.execute.assert_called_once_with()
-
-        # Check output
-        assert "Start deletion.\nDone. Deleted 0 repo(s) and 0 revision(s) for a total of 5.1M.\n" in output.getvalue()
-
-    def test_run_nothing_selected_with_tui(self, mock__manual_review_tui: Mock) -> None:
-        """Test command run but nothing is selected in manual review."""
-        # Mock return value
-        mock__manual_review_tui.return_value = []
-
-        # Run
-        self.command.disable_tui = False
-        with capture_output() as output:
-            self.command.run()
-
-        # Check output
-        assert "Deletion is cancelled. Do nothing.\n" in output.getvalue()
-
-    def test_run_stuff_selected_but_cancel_item_as_well_with_tui(self, mock__manual_review_tui: Mock) -> None:
-        """Test command run when some are selected but "cancel item" as well."""
-        # Mock return value
-        mock__manual_review_tui.return_value = [
-            "hash_1",
-            "hash_2",
-            _CANCEL_DELETION_STR,
-        ]
-
-        # Run
-        self.command.disable_tui = False
-        with capture_output() as output:
-            self.command.run()
-
-        # Check output
-        assert "Deletion is cancelled. Do nothing.\n" in output.getvalue()
-
-    def test_run_and_delete_no_tui(
-        self,
-        mock_scan_cache_dir: Mock,
-        mock__manual_review_no_tui: Mock,
-        mock__get_expectations_str: Mock,
-        mock__ask_for_confirmation_no_tui: Mock,
-    ) -> None:
-        """Test command run with a mocked manual review step."""
-        # Mock return values
-        mock__manual_review_no_tui.return_value = ["hash_1", "hash_2"]
-        mock__get_expectations_str.return_value = "Will delete A and B."
-        mock__ask_for_confirmation_no_tui.return_value.return_value = True
-        mock_scan_cache_dir.return_value = _get_cache_mock()
-
-        # Run
-        self.command.disable_tui = True
-        with capture_output() as output:
-            self.command.run()
-
-        # Step 1: scan
-        mock_scan_cache_dir.assert_called_once_with(self.args.dir)
-        cache_mock = mock_scan_cache_dir.return_value
-
-        # Step 2: manual review
-        mock__manual_review_no_tui.assert_called_once_with(cache_mock, preselected=[], sort_by=None)
-
-        # Step 3: ask confirmation
-        mock__get_expectations_str.assert_called_once_with(cache_mock, ["hash_1", "hash_2"])
-        mock__ask_for_confirmation_no_tui.assert_called_once_with("Will delete A and B. Confirm deletion ?")
-
-        # Step 4: delete
-        cache_mock.delete_revisions.assert_called_once_with("hash_1", "hash_2")
-        strategy_mock = cache_mock.delete_revisions.return_value
-        strategy_mock.execute.assert_called_once_with()
-
-        # Check output
-        assert "Start deletion.\nDone. Deleted 0 repo(s) and 0 revision(s) for a total of 5.1M.\n" in output.getvalue()
-
-    def test_run_with_sorting(self):
-        """Test command run with sorting enabled."""
-        self.args.sort = "size"
-        self.command = DeleteCacheCommand(self.args)
-
-        mock_scan_cache_dir = Mock()
-        mock_scan_cache_dir.return_value = _get_cache_mock()
-
-        with patch("huggingface_hub.commands.delete_cache.scan_cache_dir", mock_scan_cache_dir):
-            with patch("huggingface_hub.commands.delete_cache._manual_review_tui") as mock_review:
-                self.command.disable_tui = False
-                self.command.run()
-
-                mock_review.assert_called_once_with(mock_scan_cache_dir.return_value, preselected=[], sort_by="size")
+        assert isinstance(used_choices[1], Separator)
+        assert "gpt2" in used_choices[1]._line
+        assert isinstance(used_choices[3], Separator)
+        assert "dummy_dataset" in used_choices[3]._line
+        assert isinstance(used_choices[5], Separator)
+        assert "dummy_model" in used_choices[5]._line
 
 
 def _get_cache_mock() -> Mock:
diff --git a/tests/test_commit_scheduler.py b/tests/test_commit_scheduler.py
index a38d8cb947..872f5c6e44 100644
--- a/tests/test_commit_scheduler.py
+++ b/tests/test_commit_scheduler.py
@@ -206,13 +206,22 @@ def test_read_partial_file_too_much(self) -> None:
         self.assertEqual(file.read(20), b"12345")
 
     def test_partial_file_len(self) -> None:
-        """Useful for `requests` internally."""
+        """Useful for httpx internally."""
         file = PartialFileIO(self.file_path, size_limit=5)
         self.assertEqual(len(file), 5)
 
         file = PartialFileIO(self.file_path, size_limit=50)
         self.assertEqual(len(file), 9)
 
+    def test_partial_file_fileno(self) -> None:
+        """We explicitly do not implement fileno() to avoid misuse.
+
+        httpx tries to use it to check file size which we don't want for PartialFileIO.
+        """
+        file = PartialFileIO(self.file_path, size_limit=5)
+        with self.assertRaises(AttributeError):
+            file.fileno()
+
     def test_partial_file_seek_and_tell(self) -> None:
         file = PartialFileIO(self.file_path, size_limit=5)
 
diff --git a/tests/test_dduf.py b/tests/test_dduf.py
index 7c4b5afc9f..c4d509ec85 100644
--- a/tests/test_dduf.py
+++ b/tests/test_dduf.py
@@ -1,7 +1,7 @@
 import json
 import zipfile
 from pathlib import Path
-from typing import Iterable, Tuple, Union
+from typing import Iterable, Union
 
 import pytest
 from pytest_mock import MockerFixture
@@ -146,7 +146,7 @@ def test_export_folder(self, dummy_folder: Path, mocker: MockerFixture):
 
 class TestExportEntries:
     @pytest.fixture
-    def dummy_entries(self, tmp_path: Path) -> Iterable[Tuple[str, Union[str, Path, bytes]]]:
+    def dummy_entries(self, tmp_path: Path) -> Iterable[tuple[str, Union[str, Path, bytes]]]:
         (tmp_path / "model_index.json").write_text(json.dumps({"foo": "bar"}))
         (tmp_path / "doesnt_have_to_be_same_name.safetensors").write_bytes(b"this is safetensors content")
 
@@ -157,7 +157,7 @@ def dummy_entries(self, tmp_path: Path) -> Iterable[Tuple[str, Union[str, Path,
         ]
 
     def test_export_entries(
-        self, tmp_path: Path, dummy_entries: Iterable[Tuple[str, Union[str, Path, bytes]]], mocker: MockerFixture
+        self, tmp_path: Path, dummy_entries: Iterable[tuple[str, Union[str, Path, bytes]]], mocker: MockerFixture
     ):
         mock = mocker.patch("huggingface_hub.serialization._dduf._validate_dduf_structure")
         export_entries_as_dduf(tmp_path / "dummy.dduf", dummy_entries)
diff --git a/tests/test_file_download.py b/tests/test_file_download.py
index f5ab794a0c..b1bbfc9790 100644
--- a/tests/test_file_download.py
+++ b/tests/test_file_download.py
@@ -19,12 +19,11 @@
 import warnings
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Iterable, List
+from typing import Iterable
 from unittest.mock import Mock, patch
 
+import httpx
 import pytest
-import requests
-from requests import Response
 
 import huggingface_hub.file_download
 from huggingface_hub import HfApi, RepoUrl, constants
@@ -37,7 +36,6 @@
     _create_symlink,
     _get_pointer_path,
     _normalize_etag,
-    _request_wrapper,
     get_hf_file_metadata,
     hf_hub_download,
     hf_hub_url,
@@ -46,6 +44,7 @@
 )
 from huggingface_hub.utils import SoftTemporaryDirectory, get_session, hf_raise_for_status, is_hf_transfer_available
 from huggingface_hub.utils._headers import build_hf_headers
+from huggingface_hub.utils._http import _http_backoff_base
 
 from .testing_constants import ENDPOINT_STAGING, OTHER_TOKEN, TOKEN
 from .testing_utils import (
@@ -307,7 +306,7 @@ def _check_user_agent(headers: dict):
             assert "foo/bar" in headers["user-agent"]
 
         with SoftTemporaryDirectory() as cache_dir:
-            with patch("huggingface_hub.file_download._request_wrapper", wraps=_request_wrapper) as mock_request:
+            with patch("huggingface_hub.utils._http._http_backoff_base", wraps=_http_backoff_base) as mock_request:
                 # First download
                 hf_hub_download(
                     DUMMY_MODEL_ID,
@@ -322,7 +321,7 @@ def _check_user_agent(headers: dict):
                 for call in calls:
                     _check_user_agent(call.kwargs["headers"])
 
-            with patch("huggingface_hub.file_download._request_wrapper", wraps=_request_wrapper) as mock_request:
+            with patch("huggingface_hub.utils._http._http_backoff_base", wraps=_http_backoff_base) as mock_request:
                 # Second download: no GET call
                 hf_hub_download(
                     DUMMY_MODEL_ID,
@@ -356,9 +355,9 @@ def test_hf_hub_url_with_empty_subfolder(self):
             )
         )
 
-    @patch("huggingface_hub.file_download.constants.ENDPOINT", "https://huggingface.co")
+    @patch("huggingface_hub.constants.ENDPOINT", "https://huggingface.co")
     @patch(
-        "huggingface_hub.file_download.HUGGINGFACE_CO_URL_TEMPLATE",
+        "huggingface_hub.constants.HUGGINGFACE_CO_URL_TEMPLATE",
         "https://huggingface.co/{repo_id}/resolve/{revision}/{filename}",
     )
     def test_hf_hub_url_with_endpoint(self):
@@ -818,6 +817,101 @@ def test_passing_token_false_is_respected(self, mock: Mock):
             assert call.kwargs["token"] is False
 
 
+@with_production_testing
+class TestFileDownloadDryRun(unittest.TestCase):
+    def test_dry_run_cache_dir(self):
+        with SoftTemporaryDirectory() as tmpdir:
+            # Dry-run a first time => file is not cached
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, cache_dir=tmpdir, dry_run=True
+            )
+            assert dry_run_info.commit_hash is not None
+            commit_hash = dry_run_info.commit_hash
+            assert dry_run_info.file_size > 0
+            assert not dry_run_info.is_cached
+            assert dry_run_info.will_download
+            expected_path = str(tmpdir / "models--julien-c--dummy-unknown" / "snapshots" / commit_hash / "config.json")
+            assert dry_run_info.local_path == expected_path
+
+            # Download the file => file is cached
+            hf_hub_download(DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, cache_dir=tmpdir, local_files_only=False)
+
+            # Dry-run a second time => file is cached
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, cache_dir=tmpdir, dry_run=True
+            )
+            assert dry_run_info.commit_hash == commit_hash  # same commit hash
+            assert dry_run_info.is_cached
+            assert not dry_run_info.will_download
+
+            # Dry-run with force_download => file is cached but we will still download
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, cache_dir=tmpdir, dry_run=True, force_download=True
+            )
+            assert dry_run_info.commit_hash == commit_hash  # same commit hash
+            assert dry_run_info.is_cached
+            assert dry_run_info.will_download
+
+            # Delete pointer file => file is still cached (metadata exists) but not the file itself => won't download again
+            # This is different than when using local dir
+            os.remove(expected_path)
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, cache_dir=tmpdir, dry_run=True
+            )
+            if os.name == "nt":
+                # On Windows, symlinks are not supported by default so when we deleted the pointer, we were
+                # deleting the actual file. Hence the file is not cached anymore.
+                assert not dry_run_info.is_cached
+                assert dry_run_info.will_download
+            else:
+                assert dry_run_info.is_cached
+                assert not dry_run_info.will_download
+
+    def test_dry_run_local_dir(self):
+        with SoftTemporaryDirectory() as tmpdir:
+            # Dry-run a first time => file is not cached
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID,
+                filename=constants.CONFIG_NAME,
+                local_dir=tmpdir,
+                dry_run=True,
+            )
+            assert dry_run_info.commit_hash is not None
+            commit_hash = dry_run_info.commit_hash
+            assert dry_run_info.file_size > 0
+            assert not dry_run_info.is_cached
+            assert dry_run_info.will_download
+            expected_path = str(tmpdir / "config.json")  # local dir => not the cache structure
+            assert dry_run_info.local_path == expected_path
+
+            # Download the file => file is cached
+            hf_hub_download(DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, local_dir=tmpdir, local_files_only=False)
+
+            # Dry-run a second time => file is cached
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, local_dir=tmpdir, dry_run=True
+            )
+            assert dry_run_info.commit_hash == commit_hash
+            assert dry_run_info.is_cached
+            assert not dry_run_info.will_download
+
+            # Dry-run with force_download => file is cached but we will still download
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, local_dir=tmpdir, dry_run=True, force_download=True
+            )
+            assert dry_run_info.is_cached
+            assert dry_run_info.will_download
+
+            # Delete file => not cached anymore even if metadata exists => re-download
+            # This is different than when using cache_dir structure
+            os.remove(expected_path)
+            dry_run_info = hf_hub_download(
+                DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, local_dir=tmpdir, dry_run=True
+            )
+            assert not dry_run_info.is_cached
+            assert dry_run_info.will_download
+
+
 @pytest.mark.usefixtures("fx_cache_dir")
 class StagingCachedDownloadOnAwfulFilenamesTest(unittest.TestCase):
     """Implement regression tests for #1161.
@@ -926,17 +1020,17 @@ def test_http_get_with_ssl_and_timeout_error(self, caplog):
         def _iter_content_1() -> Iterable[bytes]:
             yield b"0" * 10
             yield b"0" * 10
-            raise requests.exceptions.SSLError("Fake SSLError")
+            raise httpx.ConnectError("Fake ConnectError")
 
         def _iter_content_2() -> Iterable[bytes]:
             yield b"0" * 10
-            raise requests.ReadTimeout("Fake ReadTimeout")
+            raise httpx.TimeoutException("Fake TimeoutException")
 
         def _iter_content_3() -> Iterable[bytes]:
             yield b"0" * 10
             yield b"0" * 10
             yield b"0" * 10
-            raise requests.ConnectionError("Fake ConnectionError")
+            raise httpx.ConnectError("Fake ConnectionError")
 
         def _iter_content_4() -> Iterable[bytes]:
             yield b"0" * 10
@@ -944,15 +1038,21 @@ def _iter_content_4() -> Iterable[bytes]:
             yield b"0" * 10
             yield b"0" * 10
 
-        with patch("huggingface_hub.file_download._request_wrapper") as mock:
-            mock.return_value.headers = {"Content-Length": 100}
-            mock.return_value.iter_content.side_effect = [
+        with patch("huggingface_hub.file_download.http_stream_backoff") as mock_stream_backoff:
+            # Create a mock response object
+            mock_response = Mock()
+            mock_response.headers = {"Content-Length": "100"}
+            mock_response.iter_bytes.side_effect = [
                 _iter_content_1(),
                 _iter_content_2(),
                 _iter_content_3(),
                 _iter_content_4(),
             ]
 
+            # Mock the context manager behavior
+            mock_stream_backoff.return_value.__enter__.return_value = mock_response
+            mock_stream_backoff.return_value.__exit__.return_value = None
+
             temp_file = io.BytesIO()
 
             http_get("fake_url", temp_file=temp_file)
@@ -964,11 +1064,9 @@ def _iter_content_4() -> Iterable[bytes]:
         assert temp_file.getvalue() == b"0" * 100
 
         # Check number of calls + correct range headers
-        assert len(mock.call_args_list) == 4
-        assert mock.call_args_list[0].kwargs["headers"] == {}
-        assert mock.call_args_list[1].kwargs["headers"] == {"Range": "bytes=20-"}
-        assert mock.call_args_list[2].kwargs["headers"] == {"Range": "bytes=30-"}
-        assert mock.call_args_list[3].kwargs["headers"] == {"Range": "bytes=60-"}
+        assert len(mock_response.iter_bytes.call_args_list) == 4
+        # Note: The range headers are now handled internally by http_get's retry mechanism
+        # The test verifies that the download completed successfully after retries
 
     @pytest.mark.parametrize(
         "initial_range,expected_ranges",
@@ -1005,21 +1103,21 @@ def _iter_content_4() -> Iterable[bytes]:
             ),
         ],
     )
-    def test_http_get_with_range_headers(self, caplog, initial_range: str, expected_ranges: List[str]):
+    def test_http_get_with_range_headers(self, caplog, initial_range: str, expected_ranges: list[str]):
         def _iter_content_1() -> Iterable[bytes]:
             yield b"0" * 10
             yield b"0" * 10
-            raise requests.exceptions.SSLError("Fake SSLError")
+            raise httpx.ConnectError("Fake ConnectError")
 
         def _iter_content_2() -> Iterable[bytes]:
             yield b"0" * 10
-            raise requests.ReadTimeout("Fake ReadTimeout")
+            raise httpx.TimeoutException("Fake TimeoutException")
 
         def _iter_content_3() -> Iterable[bytes]:
             yield b"0" * 10
             yield b"0" * 10
             yield b"0" * 10
-            raise requests.ConnectionError("Fake ConnectionError")
+            raise httpx.ConnectError("Fake ConnectionError")
 
         def _iter_content_4() -> Iterable[bytes]:
             yield b"0" * 10
@@ -1027,15 +1125,21 @@ def _iter_content_4() -> Iterable[bytes]:
             yield b"0" * 10
             yield b"0" * 10
 
-        with patch("huggingface_hub.file_download._request_wrapper") as mock:
-            mock.return_value.headers = {"Content-Length": 100}
-            mock.return_value.iter_content.side_effect = [
+        with patch("huggingface_hub.file_download.http_stream_backoff") as mock_stream_backoff:
+            # Create a mock response object
+            mock_response = Mock()
+            mock_response.headers = {"Content-Length": "100"}
+            mock_response.iter_bytes.side_effect = [
                 _iter_content_1(),
                 _iter_content_2(),
                 _iter_content_3(),
                 _iter_content_4(),
             ]
 
+            # Mock the context manager behavior
+            mock_stream_backoff.return_value.__enter__.return_value = mock_response
+            mock_stream_backoff.return_value.__exit__.return_value = None
+
             temp_file = io.BytesIO()
 
             http_get("fake_url", temp_file=temp_file, headers={"Range": initial_range})
@@ -1045,9 +1149,10 @@ def _iter_content_4() -> Iterable[bytes]:
         assert temp_file.tell() == 100
         assert temp_file.getvalue() == b"0" * 100
 
-        assert len(mock.call_args_list) == 4
+        # Check that http_stream_backoff was called with the correct range headers
+        assert len(mock_stream_backoff.call_args_list) == 4
         for i, expected_range in enumerate(expected_ranges):
-            assert mock.call_args_list[i].kwargs["headers"] == {"Range": expected_range}
+            assert mock_stream_backoff.call_args_list[i].kwargs["headers"] == {"Range": expected_range}
 
 
 class CreateSymlinkTest(unittest.TestCase):
@@ -1125,20 +1230,19 @@ def test_weak_reference(self):
     @with_production_testing
     def test_resolve_endpoint_on_regular_file(self):
         url = "https://huggingface.co/gpt2/resolve/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/README.md"
-        response = requests.head(url, headers=build_hf_headers(user_agent="is_ci/true"))
+        response = httpx.head(url, headers=build_hf_headers(user_agent="is_ci/true"))
         self.assertEqual(self._get_etag_and_normalize(response), "a16a55fda99d2f2e7b69cce5cf93ff4ad3049930")
 
     @with_production_testing
     def test_resolve_endpoint_on_lfs_file(self):
         url = "https://huggingface.co/gpt2/resolve/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin"
-        response = requests.head(url, headers=build_hf_headers(user_agent="is_ci/true"))
+        response = httpx.head(url, headers=build_hf_headers(user_agent="is_ci/true"))
         self.assertEqual(
             self._get_etag_and_normalize(response), "7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421"
         )
 
     @staticmethod
-    def _get_etag_and_normalize(response: Response) -> str:
-        response.raise_for_status()
+    def _get_etag_and_normalize(response: httpx.Response) -> str:
         return _normalize_etag(
             response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or response.headers.get("ETag")
         )
diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py
index abb2f5e3f0..935bdbb1d4 100644
--- a/tests/test_hf_api.py
+++ b/tests/test_hf_api.py
@@ -25,13 +25,11 @@
 from dataclasses import fields
 from io import BytesIO
 from pathlib import Path
-from typing import List, Optional, Set, Union, get_args
+from typing import Optional, Union, get_args
 from unittest.mock import Mock, patch
-from urllib.parse import quote, urlparse
+from urllib.parse import urlparse
 
 import pytest
-import requests
-from requests.exceptions import HTTPError
 
 import huggingface_hub.lfs
 from huggingface_hub import HfApi, SpaceHardware, SpaceStage, SpaceStorage, constants
@@ -197,7 +195,7 @@ def test_delete_repo_error_message(self):
         # test for #751
         # See https://github.com/huggingface/huggingface_hub/issues/751
         with self.assertRaisesRegex(
-            requests.exceptions.HTTPError,
+            HfHubHTTPError,
             re.compile(
                 r"404 Client Error(.+)\(Request ID: .+\)(.*)Repository Not Found",
                 flags=re.DOTALL,
@@ -208,23 +206,11 @@ def test_delete_repo_error_message(self):
     def test_delete_repo_missing_ok(self) -> None:
         self._api.delete_repo("repo-that-does-not-exist", missing_ok=True)
 
-    def test_update_repo_visibility(self):
-        repo_id = self._api.create_repo(repo_id=repo_name()).repo_id
-
-        self._api.update_repo_settings(repo_id=repo_id, private=True)
-        assert self._api.model_info(repo_id).private
-
-        self._api.update_repo_settings(repo_id=repo_id, private=False)
-        assert not self._api.model_info(repo_id).private
-
-        self._api.delete_repo(repo_id=repo_id)
-
     def test_move_repo_normal_usage(self):
-        repo_id = f"{USER}/{repo_name()}"
-        new_repo_id = f"{USER}/{repo_name()}"
-
         # Spaces not tested on staging (error 500)
         for repo_type in [None, constants.REPO_TYPE_MODEL, constants.REPO_TYPE_DATASET]:
+            repo_id = f"{USER}/{repo_name()}"
+            new_repo_id = f"{USER}/{repo_name()}"
             self._api.create_repo(repo_id=repo_id, repo_type=repo_type)
             self._api.move_repo(from_id=repo_id, to_id=new_repo_id, repo_type=repo_type)
             self._api.delete_repo(repo_id=new_repo_id, repo_type=repo_type)
@@ -282,17 +268,6 @@ def test_update_repo_settings_xet_enabled(self, repo_url: RepoUrl):
         info = self._api.model_info(repo_id, expand="xetEnabled")
         assert info.xet_enabled
 
-    @expect_deprecation("get_token_permission")
-    def test_get_token_permission_on_oauth_token(self):
-        whoami = {
-            "type": "user",
-            "auth": {"type": "oauth", "expiresAt": "2024-10-24T19:43:43.000Z"},
-            # ...
-            # other values are ignored as we only need to check the "auth" value
-        }
-        with patch.object(self._api, "whoami", return_value=whoami):
-            assert self._api.get_token_permission() is None
-
 
 class CommitApiTest(HfApiCommonTest):
     def setUp(self) -> None:
@@ -340,8 +315,8 @@ def test_upload_file_str_path(self, repo_url: RepoUrl) -> None:
             path_in_repo="temp/new_file.md",
             repo_id=repo_id,
         )
-        self.assertEqual(return_val, f"{repo_url}/blob/main/temp/new_file.md")
-        self.assertIsInstance(return_val, CommitInfo)
+        assert isinstance(return_val, CommitInfo)
+        assert return_val.startswith(f"{repo_url}/commit/")
 
         with SoftTemporaryDirectory() as cache_dir:
             with open(hf_hub_download(repo_id=repo_id, filename="temp/new_file.md", cache_dir=cache_dir)) as f:
@@ -362,7 +337,8 @@ def test_upload_file_fileobj(self, repo_url: RepoUrl) -> None:
                 path_in_repo="temp/new_file.md",
                 repo_id=repo_id,
             )
-        self.assertEqual(return_val, f"{repo_url}/blob/main/temp/new_file.md")
+        assert isinstance(return_val, CommitInfo)
+        assert return_val.startswith(f"{repo_url}/commit/")
 
         with SoftTemporaryDirectory() as cache_dir:
             with open(hf_hub_download(repo_id=repo_id, filename="temp/new_file.md", cache_dir=cache_dir)) as f:
@@ -377,7 +353,8 @@ def test_upload_file_bytesio(self, repo_url: RepoUrl) -> None:
             path_in_repo="temp/new_file.md",
             repo_id=repo_id,
         )
-        self.assertEqual(return_val, f"{repo_url}/blob/main/temp/new_file.md")
+        assert isinstance(return_val, CommitInfo)
+        assert return_val.startswith(f"{repo_url}/commit/")
 
         with SoftTemporaryDirectory() as cache_dir:
             with open(hf_hub_download(repo_id=repo_id, filename="temp/new_file.md", cache_dir=cache_dir)) as f:
@@ -446,8 +423,9 @@ def test_upload_file_create_pr(self, repo_url: RepoUrl) -> None:
             repo_id=repo_id,
             create_pr=True,
         )
-        self.assertEqual(return_val, f"{repo_url}/blob/{quote('refs/pr/1', safe='')}/temp/new_file.md")
-        self.assertIsInstance(return_val, CommitInfo)
+        assert isinstance(return_val, CommitInfo)
+        assert return_val.startswith(f"{repo_url}/commit/")
+        assert return_val.pr_revision == "refs/pr/1"
 
         with SoftTemporaryDirectory() as cache_dir:
             with open(
@@ -484,19 +462,14 @@ def test_upload_folder(self, repo_url: RepoUrl) -> None:
 
         # Upload folder
         url = self._api.upload_folder(folder_path=self.tmp_dir, path_in_repo="temp/dir", repo_id=repo_id)
-        self.assertEqual(
-            url,
-            f"{self._api.endpoint}/{repo_id}/tree/main/temp/dir",
-        )
-        self.assertIsInstance(url, CommitInfo)
+        assert isinstance(url, CommitInfo)
+        assert url.startswith(f"{repo_url}/commit/")
 
         # Check files are uploaded
         for rpath in ["temp", "nested/file.bin"]:
             local_path = os.path.join(self.tmp_dir, rpath)
             remote_path = f"temp/dir/{rpath}"
-            filepath = hf_hub_download(
-                repo_id=repo_id, filename=remote_path, revision="main", use_auth_token=self._token
-            )
+            filepath = hf_hub_download(repo_id=repo_id, filename=remote_path, revision="main", token=self._token)
             assert filepath is not None
             with open(filepath, "rb") as downloaded_file:
                 content = downloaded_file.read()
@@ -516,7 +489,9 @@ def test_upload_folder_create_pr(self, repo_url: RepoUrl) -> None:
         return_val = self._api.upload_folder(
             folder_path=self.tmp_dir, path_in_repo="temp/dir", repo_id=repo_id, create_pr=True
         )
-        self.assertEqual(return_val, f"{self._api.endpoint}/{repo_id}/tree/refs%2Fpr%2F1/temp/dir")
+        assert isinstance(return_val, CommitInfo)
+        assert return_val.startswith(f"{repo_url}/commit/")
+        assert return_val.pr_revision == "refs/pr/1"
 
         # Check files are uploaded
         for rpath in ["temp", "nested/file.bin"]:
@@ -524,13 +499,6 @@ def test_upload_folder_create_pr(self, repo_url: RepoUrl) -> None:
             filepath = hf_hub_download(repo_id=repo_id, filename=f"temp/dir/{rpath}", revision="refs/pr/1")
             assert Path(local_path).read_bytes() == Path(filepath).read_bytes()
 
-    def test_upload_folder_default_path_in_repo(self):
-        REPO_NAME = repo_name("upload_folder_to_root")
-        self._api.create_repo(repo_id=REPO_NAME, exist_ok=False)
-        url = self._api.upload_folder(folder_path=self.tmp_dir, repo_id=f"{USER}/{REPO_NAME}")
-        # URL to root of repository
-        self.assertEqual(url, f"{self._api.endpoint}/{USER}/{REPO_NAME}/tree/main/")
-
     @use_tmp_repo()
     def test_upload_folder_git_folder_excluded(self, repo_url: RepoUrl) -> None:
         # Simulate a folder with a .git folder
@@ -607,7 +575,7 @@ def test_create_commit_create_pr(self, repo_url: RepoUrl) -> None:
         self.assertEqual(resp.pr_revision, "refs/pr/1")
 
         # File doesn't exist on main...
-        with self.assertRaises(HTTPError) as ctx:
+        with self.assertRaises(HfHubHTTPError) as ctx:
             # Should raise a 404
             self._api.hf_hub_download(repo_id, "buffer")
             self.assertEqual(ctx.exception.response.status_code, 404)
@@ -708,7 +676,7 @@ def test_create_commit(self, repo_url: RepoUrl) -> None:
             self.assertIsNone(resp.pr_num)
             self.assertIsNone(resp.pr_revision)
 
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             # Should raise a 404
             hf_hub_download(repo_id, "temp/new_file.md")
 
@@ -737,7 +705,7 @@ def test_create_commit_conflict(self, repo_url: RepoUrl) -> None:
         operations = [
             CommitOperationAdd(path_in_repo="buffer", path_or_fileobj=b"Buffer data"),
         ]
-        with self.assertRaises(HTTPError) as exc_ctx:
+        with self.assertRaises(HfHubHTTPError) as exc_ctx:
             self._api.create_commit(
                 operations=operations,
                 commit_message="Test create_commit",
@@ -1350,18 +1318,18 @@ def test_create_commit_delete_folder_implicit(self):
         )
 
         with self.assertRaises(EntryNotFoundError):
-            hf_hub_download(self.repo_id, "1/file_1.md", use_auth_token=self._token)
+            hf_hub_download(self.repo_id, "1/file_1.md", token=self._token)
 
         with self.assertRaises(EntryNotFoundError):
-            hf_hub_download(self.repo_id, "1/file_2.md", use_auth_token=self._token)
+            hf_hub_download(self.repo_id, "1/file_2.md", token=self._token)
 
         # Still exists
-        hf_hub_download(self.repo_id, "2/file_3.md", use_auth_token=self._token)
+        hf_hub_download(self.repo_id, "2/file_3.md", token=self._token)
 
     def test_create_commit_delete_folder_explicit(self):
         self._api.delete_folder(path_in_repo="1", repo_id=self.repo_id)
         with self.assertRaises(EntryNotFoundError):
-            hf_hub_download(self.repo_id, "1/file_1.md", use_auth_token=self._token)
+            hf_hub_download(self.repo_id, "1/file_1.md", token=self._token)
 
     def test_create_commit_implicit_delete_folder_is_ok(self):
         self._api.create_commit(
@@ -1592,7 +1560,7 @@ def test_create_tag_on_commit_oid(self, repo_url: RepoUrl) -> None:
     @use_tmp_repo("model")
     def test_invalid_tag_name(self, repo_url: RepoUrl) -> None:
         """Check `create_tag` with an invalid tag name."""
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.create_tag(repo_url.repo_id, tag="invalid tag")
 
     @use_tmp_repo("model")
@@ -1760,7 +1728,7 @@ def tearDown(self) -> None:
         self._api.delete_repo(repo_id=self.repo_id)
         super().tearDown()
 
-    def remote_files(self) -> Set[set]:
+    def remote_files(self) -> set[set]:
         return set(self._api.list_repo_files(repo_id=self.repo_id))
 
     def test_delete_single_file(self):
@@ -2198,7 +2166,7 @@ def test_dataset_info_with_file_metadata(self):
         assert files is not None
         self._check_siblings_metadata(files)
 
-    def _check_siblings_metadata(self, files: List[RepoSibling]):
+    def _check_siblings_metadata(self, files: list[RepoSibling]):
         """Check requested metadata has been received from the server."""
         at_least_one_lfs = False
         for file in files:
@@ -2285,38 +2253,34 @@ def test_failing_filter_models_by_author_and_model_name(self):
         models = list(self._api.list_models(author="muellerzr", model_name="testme"))
         assert len(models) == 0
 
-    @expect_deprecation("list_models")
     def test_filter_models_with_library(self):
-        models = list(self._api.list_models(author="microsoft", model_name="wavlm-base-sd", library="tensorflow"))
+        models = list(self._api.list_models(author="microsoft", model_name="wavlm-base-sd", filter="tensorflow"))
         assert len(models) == 0
 
-        models = list(self._api.list_models(author="microsoft", model_name="wavlm-base-sd", library="pytorch"))
+        models = list(self._api.list_models(author="microsoft", model_name="wavlm-base-sd", filter="pytorch"))
         assert len(models) > 0
 
-    @expect_deprecation("list_models")
     def test_filter_models_with_task(self):
-        models = list(self._api.list_models(task="fill-mask", model_name="albert-base-v2"))
+        models = list(self._api.list_models(filter="fill-mask", model_name="albert-base-v2"))
         assert models[0].pipeline_tag == "fill-mask"
         assert "albert" in models[0].id
         assert "base" in models[0].id
         assert "v2" in models[0].id
 
-        models = list(self._api.list_models(task="dummytask"))
+        models = list(self._api.list_models(filter="dummytask"))
         assert len(models) == 0
 
-    @expect_deprecation("list_models")
     def test_filter_models_by_language(self):
         for language in ["en", "fr", "zh"]:
-            for model in self._api.list_models(language=language, limit=5):
+            for model in self._api.list_models(filter=language, limit=5):
                 assert language in model.tags
 
-    @expect_deprecation("list_models")
     def test_filter_models_with_tag(self):
-        models = list(self._api.list_models(author="HuggingFaceBR4", tags=["tensorboard"]))
+        models = list(self._api.list_models(author="HuggingFaceBR4", filter=["tensorboard"]))
         assert models[0].id.startswith("HuggingFaceBR4/")
         assert "tensorboard" in models[0].tags
 
-        models = list(self._api.list_models(tags="dummytag"))
+        models = list(self._api.list_models(filter=["dummytag"]))
         assert len(models) == 0
 
     def test_filter_models_with_card_data(self):
@@ -2572,7 +2536,7 @@ def test_model_info(self, mock_get_token: Mock) -> None:
         with patch.object(self._api, "token", None):  # no default token
             # Test we cannot access model info without a token
             with self.assertRaisesRegex(
-                requests.exceptions.HTTPError,
+                HfHubHTTPError,
                 re.compile(
                     r"401 Client Error(.+)\(Request ID: .+\)(.*)Repository Not Found",
                     flags=re.DOTALL,
@@ -2580,7 +2544,7 @@ def test_model_info(self, mock_get_token: Mock) -> None:
             ):
                 _ = self._api.model_info(repo_id=f"{USER}/{self.REPO_NAME}")
 
-            model_info = self._api.model_info(repo_id=f"{USER}/{self.REPO_NAME}", use_auth_token=self._token)
+            model_info = self._api.model_info(repo_id=f"{USER}/{self.REPO_NAME}", token=self._token)
             self.assertIsInstance(model_info, ModelInfo)
 
     @patch("huggingface_hub.utils._headers.get_token", return_value=None)
@@ -2588,7 +2552,7 @@ def test_dataset_info(self, mock_get_token: Mock) -> None:
         with patch.object(self._api, "token", None):  # no default token
             # Test we cannot access model info without a token
             with self.assertRaisesRegex(
-                requests.exceptions.HTTPError,
+                HfHubHTTPError,
                 re.compile(
                     r"401 Client Error(.+)\(Request ID: .+\)(.*)Repository Not Found",
                     flags=re.DOTALL,
@@ -2596,23 +2560,23 @@ def test_dataset_info(self, mock_get_token: Mock) -> None:
             ):
                 _ = self._api.dataset_info(repo_id=f"{USER}/{self.REPO_NAME}")
 
-            dataset_info = self._api.dataset_info(repo_id=f"{USER}/{self.REPO_NAME}", use_auth_token=self._token)
+            dataset_info = self._api.dataset_info(repo_id=f"{USER}/{self.REPO_NAME}", token=self._token)
             self.assertIsInstance(dataset_info, DatasetInfo)
 
     def test_list_private_datasets(self):
-        orig = len(list(self._api.list_datasets(use_auth_token=False)))
-        new = len(list(self._api.list_datasets(use_auth_token=self._token)))
+        orig = len(list(self._api.list_datasets(token=False)))
+        new = len(list(self._api.list_datasets(token=self._token)))
         self.assertGreater(new, orig)
 
     def test_list_private_models(self):
-        orig = len(list(self._api.list_models(use_auth_token=False)))
-        new = len(list(self._api.list_models(use_auth_token=self._token)))
+        orig = len(list(self._api.list_models(token=False)))
+        new = len(list(self._api.list_models(token=self._token)))
         self.assertGreater(new, orig)
 
     @with_production_testing
     def test_list_private_spaces(self):
-        orig = len(list(self._api.list_spaces(use_auth_token=False)))
-        new = len(list(self._api.list_spaces(use_auth_token=self._token)))
+        orig = len(list(self._api.list_spaces(token=False)))
+        new = len(list(self._api.list_spaces(token=self._token)))
         self.assertGreaterEqual(new, orig)
 
 
@@ -2659,7 +2623,7 @@ def setUp(self) -> None:
         self.create_commit_mock.return_value.pr_url = None
         self.api.create_commit = self.create_commit_mock
 
-    def _upload_folder_alias(self, **kwargs) -> List[Union[CommitOperationAdd, CommitOperationDelete]]:
+    def _upload_folder_alias(self, **kwargs) -> list[Union[CommitOperationAdd, CommitOperationDelete]]:
         """Alias to call `upload_folder` + retrieve the CommitOperation list passed to `create_commit`."""
         if "folder_path" not in kwargs:
             kwargs["folder_path"] = self.cache_dir
@@ -3453,12 +3417,8 @@ def test_hf_hub_download_alias(self, mock: Mock) -> None:
             revision=None,
             cache_dir=None,
             local_dir=None,
-            local_dir_use_symlinks="auto",
             force_download=False,
-            force_filename=None,
-            proxies=None,
             etag_timeout=10,
-            resume_download=None,
             local_files_only=False,
             headers=None,
         )
@@ -3480,10 +3440,7 @@ def test_snapshot_download_alias(self, mock: Mock) -> None:
             revision=None,
             cache_dir=None,
             local_dir=None,
-            local_dir_use_symlinks="auto",
-            proxies=None,
             etag_timeout=10,
-            resume_download=None,
             force_download=False,
             local_files_only=False,
             allow_patterns=None,
@@ -4055,7 +4012,7 @@ def test_create_collection_exists_ok(self) -> None:
         self.slug = collection_1.slug
 
         # Cannot create twice with same title
-        with self.assertRaises(HTTPError):  # already exists
+        with self.assertRaises(HfHubHTTPError):  # already exists
             self._api.create_collection(self.title)
 
         # Can ignore error
@@ -4071,7 +4028,7 @@ def test_create_private_collection(self) -> None:
 
         # Get private collection
         self._api.get_collection(collection.slug)  # no error
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.get_collection(collection.slug, token=OTHER_TOKEN)  # not authorized
 
         # Get public collection
@@ -4113,7 +4070,7 @@ def test_delete_collection(self) -> None:
         self._api.delete_collection(collection.slug)
 
         # Cannot delete twice the same collection
-        with self.assertRaises(HTTPError):  # already exists
+        with self.assertRaises(HfHubHTTPError):  # already exists
             self._api.delete_collection(collection.slug)
 
         # Possible to ignore error
@@ -4141,12 +4098,12 @@ def test_collection_items(self) -> None:
         self.assertIsNone(collection.items[1].note)
 
         # Add existing item fails (except if ignore error)
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.add_collection_item(collection.slug, model_id, "model")
         self._api.add_collection_item(collection.slug, model_id, "model", exists_ok=True)
 
         # Add inexistent item fails
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.add_collection_item(collection.slug, model_id, "dataset")
 
         # Update first item
@@ -4247,21 +4204,21 @@ def test_access_request_error(self):
         self._api.grant_access(self.repo_id, OTHER_USER)
 
         # Cannot grant twice
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.grant_access(self.repo_id, OTHER_USER)
 
         # Cannot accept to already accepted
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.accept_access_request(self.repo_id, OTHER_USER)
 
         # Cannot reject to already rejected
         self._api.reject_access_request(self.repo_id, OTHER_USER, rejection_reason="This is a rejection reason")
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.reject_access_request(self.repo_id, OTHER_USER, rejection_reason="This is a rejection reason")
 
         # Cannot cancel to already cancelled
         self._api.cancel_access_request(self.repo_id, OTHER_USER)
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.cancel_access_request(self.repo_id, OTHER_USER)
 
 
@@ -4379,7 +4336,7 @@ def test_delete_webhook(self) -> None:
             url=self.webhook_url, watched=self.watched_items, domains=self.domains, secret=self.secret
         )
         self._api.delete_webhook(webhook_to_delete.id)
-        with self.assertRaises(HTTPError):
+        with self.assertRaises(HfHubHTTPError):
             self._api.get_webhook(webhook_to_delete.id)
 
 
@@ -4492,7 +4449,7 @@ class HfApiInferenceCatalogTest(HfApiCommonTest):
     def test_list_inference_catalog(self) -> None:
         models = self._api.list_inference_catalog()  # note: @experimental api
         # Check that server returns a list[str] => at least if it changes in the future, we'll notice
-        assert isinstance(models, List)
+        assert isinstance(models, list)
         assert len(models) > 0
         assert all(isinstance(model, str) for model in models)
 
diff --git a/tests/test_hf_file_system.py b/tests/test_hf_file_system.py
index 5ba382cb95..26c1eb8ae3 100644
--- a/tests/test_hf_file_system.py
+++ b/tests/test_hf_file_system.py
@@ -6,7 +6,7 @@
 import unittest
 from pathlib import Path
 from typing import Optional
-from unittest.mock import patch
+from unittest.mock import Mock, patch
 
 import fsspec
 import pytest
@@ -193,9 +193,9 @@ def test_stream_file_retry(self):
             self.assertIsInstance(f, HfFileSystemStreamFile)
             self.assertEqual(f.read(6), b"dummy ")
             # Simulate that streaming fails mid-way
-            f.response.raw.read = None
+            f.response = None
             self.assertEqual(f.read(6), b"binary")
-            self.assertIsNotNone(f.response.raw.read)  # a new connection has been created
+            self.assertIsNotNone(f.response)  # a new connection has been created
 
     def test_read_file_with_revision(self):
         with self.hffs.open(self.hf_path + "/data/binary_data_for_pr.bin", "rb", revision="refs/pr/1") as f:
@@ -588,9 +588,9 @@ def test_resolve_path_with_refs_revision() -> None:
 def mock_repo_info(fs: HfFileSystem):
     def _inner(repo_id: str, *, revision: str, repo_type: str, **kwargs):
         if repo_id not in ["gpt2", "squad", "username/my_dataset", "username/my_model"]:
-            raise RepositoryNotFoundError(repo_id)
+            raise RepositoryNotFoundError(repo_id, response=Mock())
         if revision is not None and revision not in ["main", "dev", "refs"] and not revision.startswith("refs/"):
-            raise RevisionNotFoundError(revision)
+            raise RevisionNotFoundError(revision, response=Mock())
 
     return patch.object(fs._api, "repo_info", _inner)
 
diff --git a/tests/test_hub_mixin.py b/tests/test_hub_mixin.py
index 4dbf888c61..e6a410dd40 100644
--- a/tests/test_hub_mixin.py
+++ b/tests/test_hub_mixin.py
@@ -4,7 +4,7 @@
 import unittest
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Dict, Optional, Union, get_type_hints
+from typing import Optional, Union, get_type_hints
 from unittest.mock import Mock, patch
 
 import jedi
@@ -58,7 +58,7 @@ def __init__(self, config: ConfigAsDataclass):
 
 
 class DummyModelConfigAsDict(BaseModel, ModelHubMixin):
-    def __init__(self, config: Dict):
+    def __init__(self, config: dict):
         pass
 
 
@@ -68,7 +68,7 @@ def __init__(self, config: Optional[ConfigAsDataclass] = None):
 
 
 class DummyModelConfigAsOptionalDict(BaseModel, ModelHubMixin):
-    def __init__(self, config: Optional[Dict] = None):
+    def __init__(self, config: Optional[dict] = None):
         pass
 
 
@@ -85,7 +85,7 @@ def _save_pretrained(self, save_directory: Path) -> None:
     def _from_pretrained(
         cls,
         model_id: Union[str, Path],
-        config: Optional[Dict] = None,
+        config: Optional[dict] = None,
         **kwargs,
     ) -> "BaseModel":
         return cls(**kwargs)
@@ -126,8 +126,6 @@ def _from_pretrained(
         revision: Optional[str],
         cache_dir: Optional[Union[str, Path]],
         force_download: bool,
-        proxies: Optional[Dict],
-        resume_download: bool,
         local_files_only: bool,
         token: Optional[Union[str, bool]],
         **model_kwargs,
@@ -341,8 +339,6 @@ def test_from_pretrained_model_id_and_revision(self, from_pretrained_mock: Mock)
             revision="123456789",  # Revision is passed correctly!
             cache_dir=None,
             force_download=False,
-            proxies=None,
-            resume_download=None,
             local_files_only=False,
             token=None,
         )
@@ -376,10 +372,7 @@ def test_push_to_hub(self):
 
         # Test config has been pushed to hub
         tmp_config_path = hf_hub_download(
-            repo_id=repo_id,
-            filename="config.json",
-            use_auth_token=TOKEN,
-            cache_dir=self.cache_dir,
+            repo_id=repo_id, filename="config.json", token=TOKEN, cache_dir=self.cache_dir
         )
         with open(tmp_config_path) as f:
             assert json.load(f) == CONFIG_AS_DICT
diff --git a/tests/test_hub_mixin_pytorch.py b/tests/test_hub_mixin_pytorch.py
index c9494accbc..e7dcf47201 100644
--- a/tests/test_hub_mixin_pytorch.py
+++ b/tests/test_hub_mixin_pytorch.py
@@ -4,13 +4,13 @@
 import unittest
 from argparse import Namespace
 from pathlib import Path
-from typing import Any, Dict, Optional, TypeVar
+from typing import Any, Optional, TypeVar
 from unittest.mock import Mock, patch
 
 import pytest
 
 from huggingface_hub import HfApi, ModelCard, constants, hf_hub_download
-from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
+from huggingface_hub.errors import RemoteEntryNotFoundError
 from huggingface_hub.hub_mixin import ModelHubMixin, PyTorchModelHubMixin
 from huggingface_hub.serialization._torch import storage_ptr
 from huggingface_hub.utils import SoftTemporaryDirectory, is_torch_available
@@ -89,7 +89,7 @@ def __init__(
             self.not_jsonable = not_jsonable
 
     class DummyModelWithConfigAndKwargs(nn.Module, PyTorchModelHubMixin):
-        def __init__(self, num_classes: int = 42, state: str = "layernorm", config: Optional[Dict] = None, **kwargs):
+        def __init__(self, num_classes: int = 42, state: str = "layernorm", config: Optional[dict] = None, **kwargs):
             super().__init__()
 
     class DummyModelWithModelCardAndCustomKwargs(
@@ -195,7 +195,7 @@ def test_from_pretrained_model_id_only(self, from_pretrained_mock: Mock) -> None
 
     def pretend_file_download(self, **kwargs):
         if kwargs.get("filename") == "config.json":
-            raise HfHubHTTPError("no config")
+            raise RemoteEntryNotFoundError("no config", response=Mock())
         DummyModel().save_pretrained(self.cache_dir)
         return self.cache_dir / "model.safetensors"
 
@@ -209,8 +209,6 @@ def test_from_pretrained_model_from_hub_prefer_safetensor(self, hf_hub_download_
             revision=None,
             cache_dir=None,
             force_download=False,
-            proxies=None,
-            resume_download=None,
             token=None,
             local_files_only=False,
         )
@@ -219,7 +217,7 @@ def test_from_pretrained_model_from_hub_prefer_safetensor(self, hf_hub_download_
     def pretend_file_download_fallback(self, **kwargs):
         filename = kwargs.get("filename")
         if filename == "model.safetensors" or filename == "config.json":
-            raise EntryNotFoundError("not found")
+            raise RemoteEntryNotFoundError("not found", response=Mock())
 
         class TestMixin(ModelHubMixin):
             def _save_pretrained(self, save_directory: Path) -> None:
@@ -238,8 +236,6 @@ def test_from_pretrained_model_from_hub_fallback_pickle(self, hf_hub_download_mo
             revision=None,
             cache_dir=None,
             force_download=False,
-            proxies=None,
-            resume_download=None,
             token=None,
             local_files_only=False,
         )
@@ -249,8 +245,6 @@ def test_from_pretrained_model_from_hub_fallback_pickle(self, hf_hub_download_mo
             revision=None,
             cache_dir=None,
             force_download=False,
-            proxies=None,
-            resume_download=None,
             token=None,
             local_files_only=False,
         )
@@ -266,8 +260,6 @@ def test_from_pretrained_model_id_and_revision(self, from_pretrained_mock: Mock)
             revision="123456789",  # Revision is passed correctly!
             cache_dir=None,
             force_download=False,
-            proxies=None,
-            resume_download=None,
             local_files_only=False,
             token=None,
         )
@@ -318,10 +310,7 @@ def test_push_to_hub(self):
 
         # Test config has been pushed to hub
         tmp_config_path = hf_hub_download(
-            repo_id=repo_id,
-            filename="config.json",
-            use_auth_token=TOKEN,
-            cache_dir=self.cache_dir,
+            repo_id=repo_id, filename="config.json", token=TOKEN, cache_dir=self.cache_dir
         )
         with open(tmp_config_path) as f:
             self.assertDictEqual(json.load(f), CONFIG)
diff --git a/tests/test_inference_api.py b/tests/test_inference_api.py
deleted file mode 100644
index a057ec4450..0000000000
--- a/tests/test_inference_api.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import unittest
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-from PIL import Image
-
-from huggingface_hub import hf_hub_download
-from huggingface_hub.inference_api import InferenceApi
-
-from .testing_utils import expect_deprecation, with_production_testing
-
-
-@pytest.mark.vcr
-@with_production_testing
-class InferenceApiTest(unittest.TestCase):
-    def read(self, filename: str) -> bytes:
-        return Path(filename).read_bytes()
-
-    @classmethod
-    @with_production_testing
-    def setUpClass(cls) -> None:
-        cls.image_file = hf_hub_download(repo_id="Narsil/image_dummy", repo_type="dataset", filename="lena.png")
-        return super().setUpClass()
-
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_simple_inference(self):
-        api = InferenceApi("bert-base-uncased")
-        inputs = "Hi, I think [MASK] is cool"
-        results = api(inputs)
-        self.assertIsInstance(results, list)
-
-        result = results[0]
-        self.assertIsInstance(result, dict)
-        self.assertTrue("sequence" in result)
-        self.assertTrue("score" in result)
-
-    @unittest.skip("Model often not loaded")
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_inference_with_params(self):
-        api = InferenceApi("typeform/distilbert-base-uncased-mnli")
-        inputs = "I bought a device but it is not working and I would like to get reimbursed!"
-        params = {"candidate_labels": ["refund", "legal", "faq"]}
-        result = api(inputs, params)
-        self.assertIsInstance(result, dict)
-        self.assertTrue("sequence" in result)
-        self.assertTrue("scores" in result)
-
-    @unittest.skip("Model often not loaded")
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_inference_with_dict_inputs(self):
-        api = InferenceApi("distilbert-base-cased-distilled-squad")
-        inputs = {
-            "question": "What's my name?",
-            "context": "My name is Clara and I live in Berkeley.",
-        }
-        result = api(inputs)
-        self.assertIsInstance(result, dict)
-        self.assertTrue("score" in result)
-        self.assertTrue("answer" in result)
-
-    @unittest.skip("Model often not loaded")
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_inference_with_audio(self):
-        api = InferenceApi("facebook/wav2vec2-base-960h")
-        file = hf_hub_download(
-            repo_id="hf-internal-testing/dummy-flac-single-example",
-            repo_type="dataset",
-            filename="example.flac",
-        )
-        data = self.read(file)
-        result = api(data=data)
-        self.assertIsInstance(result, dict)
-        self.assertTrue("text" in result, f"We received {result} instead")
-
-    @unittest.skip("Model often not loaded")
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_inference_with_image(self):
-        api = InferenceApi("google/vit-base-patch16-224")
-        data = self.read(self.image_file)
-        result = api(data=data)
-        self.assertIsInstance(result, list)
-        for classification in result:
-            self.assertIsInstance(classification, dict)
-            self.assertTrue("score" in classification)
-            self.assertTrue("label" in classification)
-
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_text_to_image(self):
-        api = InferenceApi("stabilityai/stable-diffusion-2-1")
-        with patch("huggingface_hub.inference_api.get_session") as mock:
-            mock().post.return_value.headers = {"Content-Type": "image/jpeg"}
-            mock().post.return_value.content = self.read(self.image_file)
-            output = api("cat")
-        self.assertIsInstance(output, Image.Image)
-
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_text_to_image_raw_response(self):
-        api = InferenceApi("stabilityai/stable-diffusion-2-1")
-        with patch("huggingface_hub.inference_api.get_session") as mock:
-            mock().post.return_value.headers = {"Content-Type": "image/jpeg"}
-            mock().post.return_value.content = self.read(self.image_file)
-            output = api("cat", raw_response=True)
-        # Raw response is returned
-        self.assertEqual(output, mock().post.return_value)
-
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_inference_overriding_task(self):
-        api = InferenceApi(
-            "sentence-transformers/paraphrase-albert-small-v2",
-            task="feature-extraction",
-        )
-        inputs = "This is an example again"
-        result = api(inputs)
-        self.assertIsInstance(result, list)
-
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_inference_overriding_invalid_task(self):
-        with self.assertRaises(ValueError, msg="Invalid task invalid-task. Make sure it's valid."):
-            InferenceApi("bert-base-uncased", task="invalid-task")
-
-    @expect_deprecation("huggingface_hub.inference_api")
-    def test_inference_missing_input(self):
-        api = InferenceApi("deepset/roberta-base-squad2")
-        result = api({"question": "What's my name?"})
-        self.assertIsInstance(result, dict)
-        self.assertTrue("error" in result)
diff --git a/tests/test_inference_async_client.py b/tests/test_inference_async_client.py
index cf60c9e2ad..ec2ee85dc3 100644
--- a/tests/test_inference_async_client.py
+++ b/tests/test_inference_async_client.py
@@ -299,7 +299,7 @@ def test_sync_vs_async_signatures() -> None:
 
 @pytest.mark.asyncio
 async def test_async_generate_timeout_error(monkeypatch: pytest.MonkeyPatch) -> None:
-    def _mock_aiohttp_client_timeout(*args, **kwargs):
+    async def _mock_client_post(*args, **kwargs):
         raise asyncio.TimeoutError
 
     def mock_check_supported_task(*args, **kwargs):
@@ -308,9 +308,10 @@ def mock_check_supported_task(*args, **kwargs):
     monkeypatch.setattr(
         "huggingface_hub.inference._providers.hf_inference._check_supported_task", mock_check_supported_task
     )
-    monkeypatch.setattr("aiohttp.ClientSession.post", _mock_aiohttp_client_timeout)
+    client = AsyncInferenceClient(timeout=1)
+    client._async_client = Mock(post=_mock_client_post)
     with pytest.raises(InferenceTimeoutError):
-        await AsyncInferenceClient(timeout=1).text_generation("test")
+        await client.text_generation("test")
 
 
 class CustomException(Exception):
@@ -415,32 +416,3 @@ async def test_use_async_with_inference_client():
         async with AsyncInferenceClient():
             pass
     mock_close.assert_called_once()
-
-
-@pytest.mark.asyncio
-@patch("aiohttp.ClientSession._request")
-async def test_client_responses_correctly_closed(request_mock: Mock) -> None:
-    """
-    Regression test for #2521.
-    Async client must close the ClientResponse objects when exiting the async context manager.
-    Fixed by closing the response objects when the session is closed.
-
-    See https://github.com/huggingface/huggingface_hub/issues/2521.
-    """
-    async with AsyncInferenceClient() as client:
-        session = client._get_client_session()
-        response1 = await session.get("http://this-is-a-fake-url.com")
-        response2 = await session.post("http://this-is-a-fake-url.com", json={})
-
-    # Response objects are closed when the AsyncInferenceClient is closed
-    response1.close.assert_called_once()
-    response2.close.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_warns_if_client_deleted_with_opened_sessions():
-    client = AsyncInferenceClient()
-    session = client._get_client_session()
-    with pytest.warns(UserWarning):
-        client.__del__()
-    await session.close()
diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py
index cf384db0d1..cedf6b4b89 100644
--- a/tests/test_inference_client.py
+++ b/tests/test_inference_client.py
@@ -18,7 +18,6 @@
 import string
 import time
 from pathlib import Path
-from typing import List
 from unittest.mock import MagicMock, patch
 
 import numpy as np
@@ -215,7 +214,7 @@
 }
 
 
-def list_clients(task: str) -> List[pytest.param]:
+def list_clients(task: str) -> list[pytest.param]:
     """Get list of clients for a specific task, with proper skip handling."""
     clients = []
     for provider, tasks in _RECOMMENDED_MODELS_FOR_VCR.items():
@@ -894,7 +893,7 @@ def test_accept_header_image(
         response = client.text_to_image("An astronaut riding a horse")
         assert response == bytes_to_image_mock.return_value
 
-        headers = get_session_mock().post.call_args_list[0].kwargs["headers"]
+        headers = get_session_mock().stream.call_args_list[0].kwargs["headers"]
         assert headers["Accept"] == "image/png"
 
 
@@ -993,20 +992,20 @@ def test_token_initialization_cannot_be_token_false(self):
 @pytest.mark.parametrize(
     "stop_signal",
     [
-        b"data: [DONE]",
-        b"data: [DONE]\n",
-        b"data: [DONE] ",
+        "data: [DONE]",
+        "data: [DONE]\n",
+        "data: [DONE] ",
     ],
 )
 def test_stream_text_generation_response(stop_signal: bytes):
     data = [
-        b'data: {"index":1,"token":{"id":4560,"text":" trying","logprob":-2.078125,"special":false},"generated_text":null,"details":null}',
-        b"",  # Empty line is skipped
-        b"\n",  # Newline is skipped
-        b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
+        'data: {"index":1,"token":{"id":4560,"text":" trying","logprob":-2.078125,"special":false},"generated_text":null,"details":null}',
+        "",  # Empty line is skipped
+        "\n",  # Newline is skipped
+        'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
         stop_signal,  # Stop signal
         # Won't parse after
-        b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
+        'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
     ]
     output = list(_stream_text_generation_response(data, details=False))
     assert len(output) == 2
@@ -1016,20 +1015,20 @@ def test_stream_text_generation_response(stop_signal: bytes):
 @pytest.mark.parametrize(
     "stop_signal",
     [
-        b"data: [DONE]",
-        b"data: [DONE]\n",
-        b"data: [DONE] ",
+        "data: [DONE]",
+        "data: [DONE]\n",
+        "data: [DONE] ",
     ],
 )
 def test_stream_chat_completion_response(stop_signal: bytes):
     data = [
-        b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":"Both"},"logprobs":null,"finish_reason":null}]}',
-        b"",  # Empty line is skipped
-        b"\n",  # Newline is skipped
-        b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":" Rust"},"logprobs":null,"finish_reason":null}]}',
+        'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":"Both"},"logprobs":null,"finish_reason":null}]}',
+        "",  # Empty line is skipped
+        "\n",  # Newline is skipped
+        'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":" Rust"},"logprobs":null,"finish_reason":null}]}',
         stop_signal,  # Stop signal
         # Won't parse after
-        b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
+        'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
     ]
     output = list(_stream_chat_completion_response(data))
     assert len(output) == 2
@@ -1043,8 +1042,8 @@ def test_chat_completion_error_in_stream():
     When an error is encountered in the stream, it should raise a TextGenerationError (e.g. a ValidationError).
     """
     data = [
-        b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":"Both"},"logprobs":null,"finish_reason":null}]}',
-        b'data: {"error":"Input validation error: `inputs` tokens + `max_new_tokens` must be <= 4096. Given: 6 `inputs` tokens and 4091 `max_new_tokens`","error_type":"validation"}',
+        'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":"Both"},"logprobs":null,"finish_reason":null}]}',
+        'data: {"error":"Input validation error: `inputs` tokens + `max_new_tokens` must be <= 4096. Given: 6 `inputs` tokens and 4091 `max_new_tokens`","error_type":"validation"}',
     ]
     with pytest.raises(ValidationError):
         for token in _stream_chat_completion_response(data):
diff --git a/tests/test_inference_providers.py b/tests/test_inference_providers.py
index 79c668fd47..6aa2d23aba 100644
--- a/tests/test_inference_providers.py
+++ b/tests/test_inference_providers.py
@@ -1,6 +1,5 @@
 import base64
 import logging
-from typing import Dict
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -1632,7 +1631,7 @@ def test_prepare_payload(self):
         ),
     ],
 )
-def test_recursive_merge(dict1: Dict, dict2: Dict, expected: Dict):
+def test_recursive_merge(dict1: dict, dict2: dict, expected: dict):
     initial_dict1 = dict1.copy()
     initial_dict2 = dict2.copy()
     assert recursive_merge(dict1, dict2) == expected
@@ -1666,7 +1665,7 @@ def test_recursive_merge(dict1: Dict, dict2: Dict, expected: Dict):
         ({"a": [None, {"x": None}]}, {"a": [None, {}]}),
     ],
 )
-def test_filter_none(data: Dict, expected: Dict):
+def test_filter_none(data: dict, expected: dict):
     """Test that filter_none removes None values from nested dictionaries."""
     assert filter_none(data) == expected
 
diff --git a/tests/test_inference_text_generation.py b/tests/test_inference_text_generation.py
index 1015f81327..4bb30e5665 100644
--- a/tests/test_inference_text_generation.py
+++ b/tests/test_inference_text_generation.py
@@ -4,13 +4,12 @@
 # See './src/huggingface_hub/inference/_text_generation.py' for details.
 import json
 import unittest
-from typing import Dict
 from unittest.mock import MagicMock, patch
 
 import pytest
-from requests import HTTPError
 
 from huggingface_hub import InferenceClient, TextGenerationOutputPrefillToken
+from huggingface_hub.errors import HfHubHTTPError
 from huggingface_hub.inference._common import (
     _UNSUPPORTED_TEXT_GENERATION_KWARGS,
     GenerationError,
@@ -45,8 +44,8 @@ def test_validation_error(self):
             raise_text_generation_error(error)
 
 
-def _mocked_error(payload: Dict) -> MagicMock:
-    error = HTTPError(response=MagicMock())
+def _mocked_error(payload: dict) -> MagicMock:
+    error = HfHubHTTPError("message", response=MagicMock())
     error.response.json.return_value = payload
     return error
 
diff --git a/tests/test_inference_types.py b/tests/test_inference_types.py
index 5f7a5b2a6e..c164877fd7 100644
--- a/tests/test_inference_types.py
+++ b/tests/test_inference_types.py
@@ -18,8 +18,8 @@ class DummyType(BaseInferenceType):
 @dataclass_with_extra
 class DummyNestedType(BaseInferenceType):
     item: DummyType
-    items: List[DummyType]
-    maybe_items: Optional[List[DummyType]] = None
+    items: List[DummyType]  # works both with List and list
+    maybe_items: Optional[list[DummyType]] = None
 
 
 DUMMY_AS_DICT = {"foo": 42, "bar": "baz"}
@@ -97,6 +97,7 @@ def test_parse_nested_class():
 def test_all_fields_are_optional():
     # all fields are optional => silently accept None if server returns less data than expected
     instance = DummyNestedType.parse_obj({"maybe_items": [{}, DUMMY_AS_BYTES]})
+    assert isinstance(instance, DummyNestedType)
     assert instance.item is None
     assert instance.items is None
     assert len(instance.maybe_items) == 2
diff --git a/tests/test_keras_integration.py b/tests/test_keras_integration.py
deleted file mode 100644
index 5601a6335f..0000000000
--- a/tests/test_keras_integration.py
+++ /dev/null
@@ -1,342 +0,0 @@
-import json
-import os
-import unittest
-from pathlib import Path
-
-import pytest
-
-from huggingface_hub import HfApi, hf_hub_download, snapshot_download
-from huggingface_hub.keras_mixin import (
-    KerasModelHubMixin,
-    from_pretrained_keras,
-    push_to_hub_keras,
-    save_pretrained_keras,
-)
-from huggingface_hub.utils import is_graphviz_available, is_pydot_available, is_tf_available, logging
-
-from .testing_constants import ENDPOINT_STAGING, TOKEN, USER
-from .testing_utils import repo_name
-
-
-logger = logging.get_logger(__name__)
-
-
-if is_tf_available():
-    import tensorflow as tf
-
-
-def require_tf(test_case):
-    """
-    Decorator marking a test that requires TensorFlow, graphviz and pydot.
-
-    These tests are skipped when TensorFlow, graphviz and pydot are installed.
-
-    """
-    if not is_tf_available() or not is_pydot_available() or not is_graphviz_available():
-        return unittest.skip("test requires Tensorflow, graphviz and pydot.")(test_case)
-    else:
-        return test_case
-
-
-if is_tf_available():
-    # Define dummy mixin model...
-    class DummyModel(tf.keras.Model, KerasModelHubMixin):
-        def __init__(self, **kwargs):
-            super().__init__()
-            self.l1 = tf.keras.layers.Dense(2, activation="relu")
-            dummy_batch_size = input_dim = 2
-            self.dummy_inputs = tf.ones([dummy_batch_size, input_dim])
-
-        def call(self, x):
-            return self.l1(x)
-
-else:
-    DummyModel = None
-
-
-@require_tf
-@pytest.mark.usefixtures("fx_cache_dir")
-class CommonKerasTest(unittest.TestCase):
-    cache_dir: Path
-
-    @classmethod
-    def setUpClass(cls):
-        """
-        Share this valid token in all tests below.
-        """
-        cls._api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN)
-
-
-class HubMixinTestKeras(CommonKerasTest):
-    def test_save_pretrained(self):
-        model = DummyModel()
-        model(model.dummy_inputs)
-
-        model.save_pretrained(self.cache_dir)
-        files = os.listdir(self.cache_dir)
-        self.assertTrue("saved_model.pb" in files)
-        self.assertTrue("keras_metadata.pb" in files)
-        self.assertTrue("README.md" in files)
-        self.assertTrue("model.png" in files)
-        self.assertEqual(len(files), 7)
-
-        model.save_pretrained(self.cache_dir, config={"num": 12, "act": "gelu"})
-        files = os.listdir(self.cache_dir)
-        self.assertTrue("config.json" in files)
-        self.assertTrue("saved_model.pb" in files)
-        self.assertEqual(len(files), 8)
-
-    def test_keras_from_pretrained_weights(self):
-        model = DummyModel()
-        model(model.dummy_inputs)
-
-        model.save_pretrained(self.cache_dir)
-        new_model = DummyModel.from_pretrained(self.cache_dir)
-
-        # Check the reloaded model's weights match the original model's weights
-        self.assertTrue(tf.reduce_all(tf.equal(new_model.weights[0], model.weights[0])))
-
-        # Check a new model's weights are not the same as the reloaded model's weights
-        another_model = DummyModel()
-        another_model(tf.ones([2, 2]))
-        self.assertFalse(tf.reduce_all(tf.equal(new_model.weights[0], another_model.weights[0])).numpy().item())
-
-    def test_abs_path_from_pretrained(self):
-        model = DummyModel()
-        model(model.dummy_inputs)
-        model.save_pretrained(self.cache_dir, config={"num": 10, "act": "gelu_fast"})
-        model = DummyModel.from_pretrained(self.cache_dir)
-        self.assertTrue(model.config == {"num": 10, "act": "gelu_fast"})
-
-    def test_push_to_hub_keras_mixin_via_http_basic(self):
-        repo_id = f"{USER}/{repo_name()}"
-
-        model = DummyModel()
-        model(model.dummy_inputs)
-
-        model.push_to_hub(repo_id=repo_id, token=TOKEN, config={"num": 7, "act": "gelu_fast"})
-
-        # Test model id exists
-        assert self._api.model_info(repo_id).id == repo_id
-
-        # Test config has been pushed to hub
-        config_path = hf_hub_download(
-            repo_id=repo_id, filename="config.json", use_auth_token=TOKEN, cache_dir=self.cache_dir
-        )
-        with open(config_path) as f:
-            assert json.load(f) == {"num": 7, "act": "gelu_fast"}
-
-        # Delete tmp file and repo
-        self._api.delete_repo(repo_id=repo_id)
-
-
-@require_tf
-class HubKerasSequentialTest(CommonKerasTest):
-    def model_init(self):
-        model = tf.keras.models.Sequential()
-        model.add(tf.keras.layers.Dense(2, activation="relu"))
-        model.compile(optimizer="adam", loss="mse")
-        return model
-
-    def model_fit(self, model):
-        x = tf.constant([[0.44, 0.90], [0.65, 0.39]])
-        y = tf.constant([[1, 1], [0, 0]])
-        model.fit(x, y)
-        return model
-
-    def test_save_pretrained(self):
-        model = self.model_init()
-        with pytest.raises(ValueError, match="Model should be built*"):
-            save_pretrained_keras(model, save_directory=self.cache_dir)
-        model.build((None, 2))
-
-        save_pretrained_keras(model, save_directory=self.cache_dir)
-        files = os.listdir(self.cache_dir)
-        self.assertIn("saved_model.pb", files)
-        self.assertIn("keras_metadata.pb", files)
-        self.assertIn("model.png", files)
-        self.assertIn("README.md", files)
-        self.assertEqual(len(files), 7)
-
-        loaded_model = from_pretrained_keras(self.cache_dir)
-        self.assertIsNone(loaded_model.optimizer)
-
-    def test_save_pretrained_model_card_fit(self):
-        model = self.model_init()
-        model = self.model_fit(model)
-
-        save_pretrained_keras(model, save_directory=self.cache_dir)
-        files = os.listdir(self.cache_dir)
-        history = json.loads((self.cache_dir / "history.json").read_text())
-
-        self.assertIn("saved_model.pb", files)
-        self.assertIn("keras_metadata.pb", files)
-        self.assertIn("model.png", files)
-        self.assertIn("README.md", files)
-        self.assertIn("history.json", files)
-        self.assertEqual(history, model.history.history)
-        self.assertEqual(len(files), 8)
-
-    def test_save_model_card_history_removal(self):
-        model = self.model_init()
-        model = self.model_fit(model)
-
-        history_path = self.cache_dir / "history.json"
-        history_path.write_text("Keras FTW")
-
-        with pytest.warns(UserWarning, match="`history.json` file already exists, *"):
-            save_pretrained_keras(model, save_directory=self.cache_dir)
-            # assert that it's not the same as old history file and it's overridden
-            self.assertNotEqual("Keras FTW", history_path.read_text())
-
-            # Check the history is saved as a json in the repository.
-            files = os.listdir(self.cache_dir)
-            self.assertIn("history.json", files)
-
-            # Check that there is no "Training Metrics" section in the model card.
-            # This was done in an older version.
-            self.assertNotIn("Training Metrics", (self.cache_dir / "README.md").read_text())
-
-    def test_save_pretrained_optimizer_state(self):
-        model = self.model_init()
-        model.build((None, 2))
-        save_pretrained_keras(model, self.cache_dir, include_optimizer=True)
-        loaded_model = from_pretrained_keras(self.cache_dir)
-        self.assertIsNotNone(loaded_model.optimizer)
-
-    def test_from_pretrained_weights(self):
-        model = self.model_init()
-        model.build((None, 2))
-
-        save_pretrained_keras(model, self.cache_dir)
-        new_model = from_pretrained_keras(self.cache_dir)
-
-        # Check a new model's weights are not the same as the reloaded model's weights
-        another_model = DummyModel()
-        another_model(tf.ones([2, 2]))
-        self.assertFalse(tf.reduce_all(tf.equal(new_model.weights[0], another_model.weights[0])).numpy().item())
-
-    def test_save_pretrained_task_name_deprecation(self):
-        model = self.model_init()
-        model.build((None, 2))
-
-        with pytest.warns(
-            FutureWarning,
-            match="`task_name` input argument is deprecated. Pass `tags` instead.",
-        ):
-            save_pretrained_keras(model, self.cache_dir, tags=["test"], task_name="test", save_traces=True)
-
-    def test_abs_path_from_pretrained(self):
-        model = self.model_init()
-        model.build((None, 2))
-        save_pretrained_keras(
-            model, self.cache_dir, config={"num": 10, "act": "gelu_fast"}, plot_model=True, tags=None
-        )
-        new_model = from_pretrained_keras(self.cache_dir)
-        self.assertTrue(tf.reduce_all(tf.equal(new_model.weights[0], model.weights[0])))
-        self.assertTrue(new_model.config == {"num": 10, "act": "gelu_fast"})
-
-    def test_push_to_hub_keras_sequential_via_http_basic(self):
-        repo_id = f"{USER}/{repo_name()}"
-        model = self.model_init()
-        model = self.model_fit(model)
-
-        push_to_hub_keras(model, repo_id=repo_id, token=TOKEN, api_endpoint=ENDPOINT_STAGING)
-        assert self._api.model_info(repo_id).id == repo_id
-        repo_files = self._api.list_repo_files(repo_id)
-        assert "README.md" in repo_files
-        assert "model.png" in repo_files
-        self._api.delete_repo(repo_id=repo_id)
-
-    def test_push_to_hub_keras_sequential_via_http_plot_false(self):
-        repo_id = f"{USER}/{repo_name()}"
-        model = self.model_init()
-        model = self.model_fit(model)
-
-        push_to_hub_keras(model, repo_id=repo_id, token=TOKEN, api_endpoint=ENDPOINT_STAGING, plot_model=False)
-        repo_files = self._api.list_repo_files(repo_id)
-        self.assertNotIn("model.png", repo_files)
-        self._api.delete_repo(repo_id=repo_id)
-
-    def test_push_to_hub_keras_via_http_override_tensorboard(self):
-        """Test log directory is overwritten when pushing a keras model a 2nd time."""
-        repo_id = f"{USER}/{repo_name()}"
-
-        log_dir = self.cache_dir / "tb_log_dir"
-        log_dir.mkdir(parents=True, exist_ok=True)
-        (log_dir / "tensorboard.txt").write_text("Keras FTW")
-
-        model = self.model_init()
-        model.build((None, 2))
-        push_to_hub_keras(model, repo_id=repo_id, log_dir=log_dir, api_endpoint=ENDPOINT_STAGING, token=TOKEN)
-
-        log_dir2 = self.cache_dir / "tb_log_dir2"
-        log_dir2.mkdir(parents=True, exist_ok=True)
-        (log_dir2 / "override.txt").write_text("Keras FTW")
-        push_to_hub_keras(model, repo_id=repo_id, log_dir=log_dir2, api_endpoint=ENDPOINT_STAGING, token=TOKEN)
-
-        files = self._api.list_repo_files(repo_id)
-        self.assertIn("logs/override.txt", files)
-        self.assertNotIn("logs/tensorboard.txt", files)
-
-        self._api.delete_repo(repo_id=repo_id)
-
-    def test_push_to_hub_keras_via_http_with_model_kwargs(self):
-        repo_id = f"{USER}/{repo_name()}"
-
-        model = self.model_init()
-        model = self.model_fit(model)
-        push_to_hub_keras(
-            model,
-            repo_id=repo_id,
-            api_endpoint=ENDPOINT_STAGING,
-            token=TOKEN,
-            include_optimizer=True,
-            save_traces=False,
-        )
-
-        assert self._api.model_info(repo_id).id == repo_id
-
-        snapshot_path = snapshot_download(repo_id=repo_id, cache_dir=self.cache_dir)
-        from_pretrained_keras(snapshot_path)
-
-        self._api.delete_repo(repo_id)
-
-
-@require_tf
-class HubKerasFunctionalTest(CommonKerasTest):
-    def model_init(self):
-        inputs = tf.keras.layers.Input(shape=(2,))
-        outputs = tf.keras.layers.Dense(2, activation="relu")(inputs)
-        model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
-        model.compile(optimizer="adam", loss="mse")
-        return model
-
-    def model_fit(self, model):
-        x = tf.constant([[0.44, 0.90], [0.65, 0.39]])
-        y = tf.constant([[1, 1], [0, 0]])
-        model.fit(x, y)
-        return model
-
-    def test_save_pretrained(self):
-        model = self.model_init()
-        model.build((None, 2))
-        self.assertTrue(model.built)
-
-        save_pretrained_keras(model, self.cache_dir)
-        files = os.listdir(self.cache_dir)
-
-        self.assertIn("saved_model.pb", files)
-        self.assertIn("keras_metadata.pb", files)
-        self.assertEqual(len(files), 7)
-
-    def test_save_pretrained_fit(self):
-        model = self.model_init()
-        model = self.model_fit(model)
-
-        save_pretrained_keras(model, self.cache_dir)
-        files = os.listdir(self.cache_dir)
-
-        self.assertIn("saved_model.pb", files)
-        self.assertIn("keras_metadata.pb", files)
-        self.assertEqual(len(files), 8)
diff --git a/tests/test_oauth.py b/tests/test_oauth.py
index 156069ec63..0bf0a98e74 100644
--- a/tests/test_oauth.py
+++ b/tests/test_oauth.py
@@ -18,8 +18,8 @@
 from dataclasses import asdict
 from unittest.mock import patch
 
+import httpx
 import pytest
-import requests
 import starlette.datastructures
 from fastapi import FastAPI, Request
 from fastapi.testclient import TestClient
@@ -98,8 +98,8 @@ def test_oauth_workflow(client: TestClient):
     # Make call to HF Hub
     assert location.startswith("https://hub-ci.huggingface.co/oauth/authorize")
     location_authorize = location
-    response_authorize = requests.get(
-        location_authorize, headers={"cookie": "token=huggingface-hub.js-cookie"}, allow_redirects=False
+    response_authorize = httpx.get(
+        location_authorize, headers={"cookie": "token=huggingface-hub.js-cookie"}, follow_redirects=False
     )
     assert response_authorize.status_code == 303
     assert "location" in response_authorize.headers
diff --git a/tests/test_offline_utils.py b/tests/test_offline_utils.py
index cb9bf28fa2..52bf3862be 100644
--- a/tests/test_offline_utils.py
+++ b/tests/test_offline_utils.py
@@ -1,36 +1,34 @@
 from io import BytesIO
 
+import httpx
 import pytest
-import requests
 
 from huggingface_hub.file_download import http_get
 
-from .testing_utils import (
-    OfflineSimulationMode,
-    RequestWouldHangIndefinitelyError,
-    offline,
-)
+from .testing_utils import OfflineSimulationMode, RequestWouldHangIndefinitelyError, offline
 
 
 def test_offline_with_timeout():
     with offline(OfflineSimulationMode.CONNECTION_TIMES_OUT):
         with pytest.raises(RequestWouldHangIndefinitelyError):
-            requests.request("GET", "https://huggingface.co")
-        with pytest.raises(requests.exceptions.ConnectTimeout):
-            requests.request("GET", "https://huggingface.co", timeout=1.0)
-        with pytest.raises(requests.exceptions.ConnectTimeout):
+            httpx.request("GET", "https://huggingface.co")
+        with pytest.raises(httpx.ConnectTimeout):
+            httpx.request("GET", "https://huggingface.co", timeout=1.0)
+        with pytest.raises(httpx.ConnectTimeout):
             http_get("https://huggingface.co", BytesIO())
 
 
 def test_offline_with_connection_error():
     with offline(OfflineSimulationMode.CONNECTION_FAILS):
-        with pytest.raises(requests.exceptions.ConnectionError):
-            requests.request("GET", "https://huggingface.co")
-        with pytest.raises(requests.exceptions.ConnectionError):
+        with pytest.raises(httpx.ConnectError):
+            httpx.request("GET", "https://huggingface.co")
+        with pytest.raises(httpx.ConnectError):
             http_get("https://huggingface.co", BytesIO())
 
 
 def test_offline_with_datasets_offline_mode_enabled():
     with offline(OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1):
-        with pytest.raises(ConnectionError):
+        from huggingface_hub.errors import OfflineModeIsEnabled
+
+        with pytest.raises(OfflineModeIsEnabled):
             http_get("https://huggingface.co", BytesIO())
diff --git a/tests/test_repository.py b/tests/test_repository.py
deleted file mode 100644
index b000d74ab3..0000000000
--- a/tests/test_repository.py
+++ /dev/null
@@ -1,895 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import json
-import os
-import time
-import unittest
-from pathlib import Path
-
-import pytest
-import requests
-
-from huggingface_hub import RepoUrl
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub.repository import (
-    Repository,
-    is_tracked_upstream,
-    is_tracked_with_lfs,
-)
-from huggingface_hub.utils import SoftTemporaryDirectory, logging, run_subprocess
-
-from .testing_constants import ENDPOINT_STAGING, TOKEN
-from .testing_utils import (
-    expect_deprecation,
-    repo_name,
-    use_tmp_repo,
-    with_production_testing,
-)
-
-
-logger = logging.get_logger(__name__)
-
-
-@pytest.mark.usefixtures("fx_cache_dir")
-class RepositoryTestAbstract(unittest.TestCase):
-    cache_dir: Path
-    repo_path: Path
-
-    # This content is 5MB (under 10MB)
-    small_content = json.dumps([100] * int(1e6))
-
-    # This content is 20MB (over 10MB)
-    large_content = json.dumps([100] * int(4e6))
-
-    # This content is binary (contains the null character)
-    binary_content = "\x00\x00\x00\x00"
-
-    _api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN)
-
-    @classmethod
-    def setUp(self) -> None:
-        self.repo_path = self.cache_dir / "working_dir"
-        self.repo_path.mkdir()
-
-    def _create_dummy_files(self):
-        # Create dummy files
-        # one is lfs-tracked, the other is not.
-        small_file = self.repo_path / "dummy.txt"
-        small_file.write_text(self.small_content)
-
-        binary_file = self.repo_path / "model.bin"
-        binary_file.write_text(self.binary_content)
-
-
-class TestRepositoryShared(RepositoryTestAbstract):
-    """Tests in this class shares a single repo on the Hub (common to all tests).
-
-    These tests must not push data to it.
-    """
-
-    @classmethod
-    def setUpClass(cls):
-        """
-        Share this valid token in all tests below.
-        """
-        super().setUpClass()
-        cls.repo_url = cls._api.create_repo(repo_id=repo_name())
-        cls.repo_id = cls.repo_url.repo_id
-        cls._api.upload_file(
-            path_or_fileobj=cls.binary_content.encode(),
-            path_in_repo="random_file.txt",
-            repo_id=cls.repo_id,
-        )
-
-    @classmethod
-    def tearDownClass(cls):
-        cls._api.delete_repo(repo_id=cls.repo_id)
-
-    @expect_deprecation("Repository")
-    def test_clone_from_repo_url(self):
-        Repository(self.repo_path, clone_from=self.repo_url)
-
-    @expect_deprecation("Repository")
-    def test_clone_from_repo_id(self):
-        Repository(self.repo_path, clone_from=self.repo_id)
-
-    @expect_deprecation("Repository")
-    def test_clone_from_repo_name_no_namespace_fails(self):
-        with self.assertRaises(EnvironmentError):
-            Repository(self.repo_path, clone_from=self.repo_id.split("/")[1], token=TOKEN)
-
-    @expect_deprecation("Repository")
-    def test_clone_from_not_hf_url(self):
-        # Should not error out
-        Repository(self.repo_path, clone_from="https://hf.co/hf-internal-testing/huggingface-hub-dummy-repository")
-
-    @expect_deprecation("Repository")
-    def test_clone_from_missing_repo(self):
-        """If the repo does not exist an EnvironmentError is raised."""
-        with self.assertRaises(EnvironmentError):
-            Repository(self.repo_path, clone_from="missing_repo")
-
-    @expect_deprecation("Repository")
-    @with_production_testing
-    def test_clone_from_prod_canonical_repo_id(self):
-        Repository(self.repo_path, clone_from="bert-base-cased", skip_lfs_files=True)
-
-    @expect_deprecation("Repository")
-    @with_production_testing
-    def test_clone_from_prod_canonical_repo_url(self):
-        Repository(self.repo_path, clone_from="https://huggingface.co/bert-base-cased", skip_lfs_files=True)
-
-    @expect_deprecation("Repository")
-    def test_init_from_existing_local_clone(self):
-        run_subprocess(["git", "clone", self.repo_url, str(self.repo_path)])
-
-        repo = Repository(self.repo_path)
-        repo.lfs_track(["*.pdf"])
-        repo.lfs_enable_largefiles()
-        repo.git_pull()
-
-    @expect_deprecation("Repository")
-    def test_init_failure(self):
-        with self.assertRaises(ValueError):
-            Repository(self.repo_path)
-
-    @expect_deprecation("Repository")
-    def test_init_clone_in_empty_folder(self):
-        repo = Repository(self.repo_path, clone_from=self.repo_url)
-        repo.lfs_track(["*.pdf"])
-        repo.lfs_enable_largefiles()
-        repo.git_pull()
-        self.assertIn("random_file.txt", os.listdir(self.repo_path))
-
-    @expect_deprecation("Repository")
-    def test_git_lfs_filename(self):
-        run_subprocess("git init", folder=self.repo_path)
-
-        repo = Repository(self.repo_path)
-        large_file = self.repo_path / "large_file[].txt"
-        large_file.write_text(self.large_content)
-
-        repo.git_add()
-
-        repo.lfs_track([large_file.name])
-        self.assertFalse(is_tracked_with_lfs(large_file))
-
-        repo.lfs_track([large_file.name], filename=True)
-        self.assertTrue(is_tracked_with_lfs(large_file))
-
-    @expect_deprecation("Repository")
-    def test_init_clone_in_nonempty_folder(self):
-        self._create_dummy_files()
-        with self.assertRaises(EnvironmentError):
-            Repository(self.repo_path, clone_from=self.repo_url)
-
-    @expect_deprecation("Repository")
-    def test_init_clone_in_nonempty_linked_git_repo_with_token(self):
-        Repository(self.repo_path, clone_from=self.repo_url, token=TOKEN)
-        Repository(self.repo_path, clone_from=self.repo_url, token=TOKEN)
-
-    @expect_deprecation("Repository")
-    def test_is_tracked_upstream(self):
-        Repository(self.repo_path, clone_from=self.repo_id)
-        self.assertTrue(is_tracked_upstream(self.repo_path))
-
-    @expect_deprecation("Repository")
-    def test_push_errors_on_wrong_checkout(self):
-        repo = Repository(self.repo_path, clone_from=self.repo_id)
-
-        head_commit_ref = run_subprocess("git show --oneline -s", folder=self.repo_path).stdout.split()[0]
-
-        repo.git_checkout(head_commit_ref)
-
-        with self.assertRaises(OSError):
-            with repo.commit("New commit"):
-                with open("new_file", "w+") as f:
-                    f.write("Ok")
-
-
-class TestRepositoryUniqueRepos(RepositoryTestAbstract):
-    """Tests in this class use separated repos on the Hub (i.e. 1 test = 1 repo).
-
-    These tests can push data to it.
-    """
-
-    def setUp(self):
-        super().setUp()
-        self.repo_url = self._api.create_repo(repo_id=repo_name())
-        self.repo_id = self.repo_url.repo_id
-        self._api.upload_file(
-            path_or_fileobj=self.binary_content.encode(), path_in_repo="random_file.txt", repo_id=self.repo_id
-        )
-
-    def tearDown(self):
-        self._api.delete_repo(repo_id=self.repo_id)
-
-    @expect_deprecation("Repository")
-    def clone_repo(self, **kwargs) -> Repository:
-        if "local_dir" not in kwargs:
-            kwargs["local_dir"] = self.repo_path
-        if "clone_from" not in kwargs:
-            kwargs["clone_from"] = self.repo_url
-        if "token" not in kwargs:
-            kwargs["token"] = TOKEN
-        if "git_user" not in kwargs:
-            kwargs["git_user"] = "ci"
-        if "git_email" not in kwargs:
-            kwargs["git_email"] = "ci@dummy.com"
-        return Repository(**kwargs)
-
-    @use_tmp_repo()
-    @expect_deprecation("Repository")
-    def test_init_clone_in_nonempty_non_linked_git_repo(self, repo_url: RepoUrl):
-        self.clone_repo()
-
-        # Try and clone another repository within the same directory.
-        # Should error out due to mismatched remotes.
-        with self.assertRaises(EnvironmentError):
-            Repository(self.repo_path, clone_from=repo_url)
-
-    def test_init_clone_in_nonempty_linked_git_repo(self):
-        # Clone the repository to disk
-        self.clone_repo()
-
-        # Add to the remote repository without doing anything to the local repository.
-        self._api.upload_file(
-            path_or_fileobj=self.binary_content.encode(), path_in_repo="random_file_3.txt", repo_id=self.repo_id
-        )
-
-        # Cloning the repository in the same directory should not result in a git pull.
-        self.clone_repo(clone_from=self.repo_url)
-        self.assertNotIn("random_file_3.txt", os.listdir(self.repo_path))
-
-    def test_init_clone_in_nonempty_linked_git_repo_unrelated_histories(self):
-        # Clone the repository to disk
-        repo = self.clone_repo()
-
-        # Create and commit file locally
-        (self.repo_path / "random_file_3.txt").write_text("hello world")
-        repo.git_add()
-        repo.git_commit("Unrelated commit")
-
-        # Add to the remote repository without doing anything to the local repository.
-        self._api.upload_file(
-            path_or_fileobj=self.binary_content.encode(),
-            path_in_repo="random_file_3.txt",
-            repo_id=self.repo_url.repo_id,
-        )
-
-        # The repo should initialize correctly as the remote is the same, even with unrelated historied
-        self.clone_repo()
-
-    def test_add_commit_push(self):
-        repo = self.clone_repo()
-        self._create_dummy_files()
-        repo.git_add()
-        repo.git_commit()
-        url = repo.git_push()
-
-        # Check that the returned commit url
-        # actually exists.
-        r = requests.head(url)
-        r.raise_for_status()
-
-    def test_add_commit_push_non_blocking(self):
-        repo = self.clone_repo()
-        self._create_dummy_files()
-        repo.git_add()
-        repo.git_commit()
-        url, result = repo.git_push(blocking=False)
-
-        # Check background process
-        if result._process.poll() is None:
-            self.assertEqual(result.status, -1)
-
-        while not result.is_done:
-            time.sleep(0.5)
-
-        self.assertTrue(result.is_done)
-        self.assertEqual(result.status, 0)
-
-        # Check that the returned commit url
-        # actually exists.
-        r = requests.head(url)
-        r.raise_for_status()
-
-    def test_context_manager_non_blocking(self):
-        repo = self.clone_repo()
-
-        with repo.commit("New commit", blocking=False):
-            (self.repo_path / "dummy.txt").write_text("hello world")
-
-        while repo.commands_in_progress:
-            time.sleep(1)
-
-        self.assertEqual(len(repo.commands_in_progress), 0)
-        self.assertEqual(len(repo.command_queue), 1)
-        self.assertEqual(repo.command_queue[-1].status, 0)
-        self.assertEqual(repo.command_queue[-1].is_done, True)
-        self.assertEqual(repo.command_queue[-1].title, "push")
-
-    @unittest.skip("This is a flaky and legacy test")
-    def test_add_commit_push_non_blocking_process_killed(self):
-        repo = self.clone_repo()
-
-        # Far too big file: will take forever
-        (self.repo_path / "dummy.txt").write_text(str([[[1] * 10000] * 1000] * 10))
-        repo.git_add(auto_lfs_track=True)
-        repo.git_commit()
-        _, result = repo.git_push(blocking=False)
-
-        result._process.kill()
-
-        while result._process.poll() is None:
-            time.sleep(0.5)
-
-        self.assertTrue(result.is_done)
-        self.assertEqual(result.status, -9)
-
-    def test_commit_context_manager(self):
-        # Clone and commit from a first folder
-        folder_1 = self.repo_path / "folder_1"
-        clone = self.clone_repo(local_dir=folder_1)
-        with clone.commit("Commit"):
-            with open("dummy.txt", "w") as f:
-                f.write("hello")
-            with open("model.bin", "w") as f:
-                f.write("hello")
-
-        # Clone in second folder. Check existence of committed files
-        folder_2 = self.repo_path / "folder_2"
-        self.clone_repo(local_dir=folder_2)
-        files = os.listdir(folder_2)
-        self.assertTrue("dummy.txt" in files)
-        self.assertTrue("model.bin" in files)
-
-    def test_clone_skip_lfs_files(self):
-        # Upload LFS file
-        self._api.upload_file(path_or_fileobj=b"Bin file", path_in_repo="file.bin", repo_id=self.repo_id)
-
-        repo = self.clone_repo(skip_lfs_files=True)
-        file_bin = self.repo_path / "file.bin"
-
-        self.assertTrue(file_bin.read_text().startswith("version"))
-
-        repo.git_pull(lfs=True)
-
-        self.assertEqual(file_bin.read_text(), "Bin file")
-
-    def test_commits_on_correct_branch(self):
-        repo = self.clone_repo()
-        branch = repo.current_branch
-        repo.git_checkout("new-branch", create_branch_ok=True)
-        repo.git_checkout(branch)
-
-        with repo.commit("New commit"):
-            with open("file.txt", "w+") as f:
-                f.write("Ok")
-
-        repo.git_checkout("new-branch")
-
-        with repo.commit("New commit"):
-            with open("new_file.txt", "w+") as f:
-                f.write("Ok")
-
-        with SoftTemporaryDirectory() as tmp:
-            clone = self.clone_repo(local_dir=tmp)
-            files = os.listdir(clone.local_dir)
-            self.assertTrue("file.txt" in files)
-            self.assertFalse("new_file.txt" in files)
-
-            clone.git_checkout("new-branch")
-            files = os.listdir(clone.local_dir)
-            self.assertFalse("file.txt" in files)
-            self.assertTrue("new_file.txt" in files)
-
-    def test_repo_checkout_push(self):
-        repo = self.clone_repo()
-
-        repo.git_checkout("new-branch", create_branch_ok=True)
-        repo.git_checkout("main")
-
-        (self.repo_path / "file.txt").write_text("OK")
-
-        repo.push_to_hub("Commit #1")
-        repo.git_checkout("new-branch", create_branch_ok=True)
-
-        (self.repo_path / "new_file.txt").write_text("OK")
-
-        repo.push_to_hub("Commit #2")
-
-        with SoftTemporaryDirectory() as tmp:
-            clone = self.clone_repo(local_dir=tmp)
-            files = os.listdir(clone.local_dir)
-            self.assertTrue("file.txt" in files)
-            self.assertFalse("new_file.txt" in files)
-
-            clone.git_checkout("new-branch")
-            files = os.listdir(clone.local_dir)
-            self.assertFalse("file.txt" in files)
-            self.assertTrue("new_file.txt" in files)
-
-    def test_repo_checkout_commit_context_manager(self):
-        repo = self.clone_repo()
-
-        with repo.commit("Commit #1", branch="new-branch"):
-            with open(os.path.join(repo.local_dir, "file.txt"), "w+") as f:
-                f.write("Ok")
-
-        with repo.commit("Commit #2", branch="main"):
-            with open(os.path.join(repo.local_dir, "new_file.txt"), "w+") as f:
-                f.write("Ok")
-
-        # Maintains lastly used branch
-        with repo.commit("Commit #3"):
-            with open(os.path.join(repo.local_dir, "new_file-2.txt"), "w+") as f:
-                f.write("Ok")
-
-        with SoftTemporaryDirectory() as tmp:
-            clone = self.clone_repo(local_dir=tmp)
-            files = os.listdir(clone.local_dir)
-            self.assertFalse("file.txt" in files)
-            self.assertTrue("new_file-2.txt" in files)
-            self.assertTrue("new_file.txt" in files)
-
-            clone.git_checkout("new-branch")
-            files = os.listdir(clone.local_dir)
-            self.assertTrue("file.txt" in files)
-            self.assertFalse("new_file.txt" in files)
-            self.assertFalse("new_file-2.txt" in files)
-
-    def test_add_tag(self):
-        repo = self.clone_repo()
-        repo.add_tag("v4.6.0", remote="origin")
-        self.assertTrue(repo.tag_exists("v4.6.0", remote="origin"))
-
-    def test_add_annotated_tag(self):
-        repo = self.clone_repo()
-        repo.add_tag("v4.5.0", message="This is an annotated tag", remote="origin")
-
-        # Unfortunately git offers no built-in way to check the annotated
-        # message of a remote tag.
-        # In order to check that the remote tag was correctly annotated,
-        # we delete the local tag before pulling the remote tag (which
-        # should be the same). We then check that this tag is correctly
-        # annotated.
-        repo.delete_tag("v4.5.0")
-
-        self.assertTrue(repo.tag_exists("v4.5.0", remote="origin"))
-        self.assertFalse(repo.tag_exists("v4.5.0"))
-
-        # Tag still exists on remote
-        run_subprocess("git pull --tags", folder=self.repo_path)
-        self.assertTrue(repo.tag_exists("v4.5.0"))
-
-        # Tag is annotated
-        result = run_subprocess("git tag -n9", folder=self.repo_path).stdout.strip()
-        self.assertIn("This is an annotated tag", result)
-
-    def test_delete_tag(self):
-        repo = self.clone_repo()
-
-        repo.add_tag("v4.6.0", message="This is an annotated tag", remote="origin")
-        self.assertTrue(repo.tag_exists("v4.6.0", remote="origin"))
-
-        repo.delete_tag("v4.6.0")
-        self.assertFalse(repo.tag_exists("v4.6.0"))
-        self.assertTrue(repo.tag_exists("v4.6.0", remote="origin"))
-
-        repo.delete_tag("v4.6.0", remote="origin")
-        self.assertFalse(repo.tag_exists("v4.6.0", remote="origin"))
-
-    def test_lfs_prune(self):
-        repo = self.clone_repo()
-
-        with repo.commit("Committing LFS file"):
-            with open("file.bin", "w+") as f:
-                f.write("Random string 1")
-
-        with repo.commit("Committing LFS file"):
-            with open("file.bin", "w+") as f:
-                f.write("Random string 2")
-
-        root_directory = self.repo_path / ".git" / "lfs"
-        git_lfs_files_size = sum(f.stat().st_size for f in root_directory.glob("**/*") if f.is_file())
-        repo.lfs_prune()
-        post_prune_git_lfs_files_size = sum(f.stat().st_size for f in root_directory.glob("**/*") if f.is_file())
-
-        # Size of the directory holding LFS files was reduced
-        self.assertLess(post_prune_git_lfs_files_size, git_lfs_files_size)
-
-    def test_lfs_prune_git_push(self):
-        repo = self.clone_repo()
-        with repo.commit("Committing LFS file"):
-            with open("file.bin", "w+") as f:
-                f.write("Random string 1")
-
-        root_directory = self.repo_path / ".git" / "lfs"
-        git_lfs_files_size = sum(f.stat().st_size for f in root_directory.glob("**/*") if f.is_file())
-
-        with open(os.path.join(repo.local_dir, "file.bin"), "w+") as f:
-            f.write("Random string 2")
-
-        repo.git_add()
-        repo.git_commit("New commit")
-        repo.git_push(auto_lfs_prune=True)
-
-        post_prune_git_lfs_files_size = sum(f.stat().st_size for f in root_directory.glob("**/*") if f.is_file())
-
-        # Size of the directory holding LFS files is the exact same
-        self.assertEqual(post_prune_git_lfs_files_size, git_lfs_files_size)
-
-
-class TestRepositoryOffline(RepositoryTestAbstract):
-    """Class to test `Repository` object on local folders only (no cloning from Hub)."""
-
-    repo: Repository
-
-    @classmethod
-    @expect_deprecation("Repository")
-    def setUp(self) -> None:
-        super().setUp()
-
-        run_subprocess("git init", folder=self.repo_path)
-
-        self.repo = Repository(self.repo_path, git_user="ci", git_email="ci@dummy.ci")
-
-        git_attributes_path = self.repo_path / ".gitattributes"
-        git_attributes_path.write_text("*.pt filter=lfs diff=lfs merge=lfs -text")
-
-        self.repo.git_add(".gitattributes")
-        self.repo.git_commit("Add .gitattributes")
-
-    def test_is_tracked_with_lfs(self):
-        txt_1 = self.repo_path / "small_file_1.txt"
-        txt_2 = self.repo_path / "small_file_2.txt"
-        pt_1 = self.repo_path / "model.pt"
-
-        txt_1.write_text(self.small_content)
-        txt_2.write_text(self.small_content)
-        pt_1.write_text(self.small_content)
-
-        self.repo.lfs_track("small_file_1.txt")
-
-        self.assertTrue(is_tracked_with_lfs(txt_1))
-        self.assertFalse(is_tracked_with_lfs(txt_2))
-        self.assertTrue(pt_1)
-
-    def test_is_tracked_with_lfs_with_pattern(self):
-        txt_small_file = self.repo_path / "small_file.txt"
-        txt_small_file.write_text(self.small_content)
-
-        txt_large_file = self.repo_path / "large_file.txt"
-        txt_large_file.write_text(self.large_content)
-
-        (self.repo_path / "dir").mkdir()
-        txt_small_file_in_dir = self.repo_path / "dir" / "small_file.txt"
-        txt_small_file_in_dir.write_text(self.small_content)
-
-        txt_large_file_in_dir = self.repo_path / "dir" / "large_file.txt"
-        txt_large_file_in_dir.write_text(self.large_content)
-
-        self.repo.auto_track_large_files("dir")
-
-        self.assertFalse(is_tracked_with_lfs(txt_large_file))
-        self.assertFalse(is_tracked_with_lfs(txt_small_file))
-        self.assertTrue(is_tracked_with_lfs(txt_large_file_in_dir))
-        self.assertFalse(is_tracked_with_lfs(txt_small_file_in_dir))
-
-    def test_auto_track_large_files(self):
-        txt_small_file = self.repo_path / "small_file.txt"
-        txt_small_file.write_text(self.small_content)
-
-        txt_large_file = self.repo_path / "large_file.txt"
-        txt_large_file.write_text(self.large_content)
-
-        self.repo.auto_track_large_files()
-
-        self.assertTrue(is_tracked_with_lfs(txt_large_file))
-        self.assertFalse(is_tracked_with_lfs(txt_small_file))
-
-    def test_auto_track_binary_files(self):
-        non_binary_file = self.repo_path / "non_binary_file.txt"
-        non_binary_file.write_text(self.small_content)
-
-        binary_file = self.repo_path / "binary_file.txt"
-        binary_file.write_text(self.binary_content)
-
-        self.repo.auto_track_binary_files()
-
-        self.assertFalse(is_tracked_with_lfs(non_binary_file))
-        self.assertTrue(is_tracked_with_lfs(binary_file))
-
-    def test_auto_track_large_files_ignored_with_gitignore(self):
-        (self.repo_path / "dir").mkdir()
-
-        # Test nested gitignores
-        gitignore_file = self.repo_path / ".gitignore"
-        gitignore_file.write_text("large_file.txt")
-
-        gitignore_file_in_dir = self.repo_path / "dir" / ".gitignore"
-        gitignore_file_in_dir.write_text("large_file_3.txt")
-
-        large_file = self.repo_path / "large_file.txt"
-        large_file.write_text(self.large_content)
-
-        large_file_2 = self.repo_path / "large_file_2.txt"
-        large_file_2.write_text(self.large_content)
-
-        large_file_3 = self.repo_path / "dir" / "large_file_3.txt"
-        large_file_3.write_text(self.large_content)
-
-        large_file_4 = self.repo_path / "dir" / "large_file_4.txt"
-        large_file_4.write_text(self.large_content)
-
-        self.repo.auto_track_large_files()
-
-        # Large files
-        self.assertFalse(is_tracked_with_lfs(large_file))
-        self.assertTrue(is_tracked_with_lfs(large_file_2))
-
-        self.assertFalse(is_tracked_with_lfs(large_file_3))
-        self.assertTrue(is_tracked_with_lfs(large_file_4))
-
-    def test_auto_track_binary_files_ignored_with_gitignore(self):
-        (self.repo_path / "dir").mkdir()
-
-        # Test nested gitignores
-        gitignore_file = self.repo_path / ".gitignore"
-        gitignore_file.write_text("binary_file.txt")
-
-        gitignore_file_in_dir = self.repo_path / "dir" / ".gitignore"
-        gitignore_file_in_dir.write_text("binary_file_3.txt")
-
-        binary_file = self.repo_path / "binary_file.txt"
-        binary_file.write_text(self.binary_content)
-
-        binary_file_2 = self.repo_path / "binary_file_2.txt"
-        binary_file_2.write_text(self.binary_content)
-
-        binary_file_3 = self.repo_path / "dir" / "binary_file_3.txt"
-        binary_file_3.write_text(self.binary_content)
-
-        binary_file_4 = self.repo_path / "dir" / "binary_file_4.txt"
-        binary_file_4.write_text(self.binary_content)
-
-        self.repo.auto_track_binary_files()
-
-        # Binary files
-        self.assertFalse(is_tracked_with_lfs(binary_file))
-        self.assertTrue(is_tracked_with_lfs(binary_file_2))
-        self.assertFalse(is_tracked_with_lfs(binary_file_3))
-        self.assertTrue(is_tracked_with_lfs(binary_file_4))
-
-    def test_auto_track_large_files_through_git_add(self):
-        txt_small_file = self.repo_path / "small_file.txt"
-        txt_small_file.write_text(self.small_content)
-
-        txt_large_file = self.repo_path / "large_file.txt"
-        txt_large_file.write_text(self.large_content)
-
-        self.repo.git_add(auto_lfs_track=True)
-
-        self.assertTrue(is_tracked_with_lfs(txt_large_file))
-        self.assertFalse(is_tracked_with_lfs(txt_small_file))
-
-    def test_auto_track_binary_files_through_git_add(self):
-        non_binary_file = self.repo_path / "small_file.txt"
-        non_binary_file.write_text(self.small_content)
-
-        binary_file = self.repo_path / "binary.txt"
-        binary_file.write_text(self.binary_content)
-
-        self.repo.git_add(auto_lfs_track=True)
-
-        self.assertTrue(is_tracked_with_lfs(binary_file))
-        self.assertFalse(is_tracked_with_lfs(non_binary_file))
-
-    def test_auto_no_track_large_files_through_git_add(self):
-        txt_small_file = self.repo_path / "small_file.txt"
-        txt_small_file.write_text(self.small_content)
-
-        txt_large_file = self.repo_path / "large_file.txt"
-        txt_large_file.write_text(self.large_content)
-
-        self.repo.git_add(auto_lfs_track=False)
-
-        self.assertFalse(is_tracked_with_lfs(txt_large_file))
-        self.assertFalse(is_tracked_with_lfs(txt_small_file))
-
-    def test_auto_no_track_binary_files_through_git_add(self):
-        non_binary_file = self.repo_path / "small_file.txt"
-        non_binary_file.write_text(self.small_content)
-
-        binary_file = self.repo_path / "binary.txt"
-        binary_file.write_text(self.binary_content)
-
-        self.repo.git_add(auto_lfs_track=False)
-
-        self.assertFalse(is_tracked_with_lfs(binary_file))
-        self.assertFalse(is_tracked_with_lfs(non_binary_file))
-
-    def test_auto_track_updates_removed_gitattributes(self):
-        txt_small_file = self.repo_path / "small_file.txt"
-        txt_small_file.write_text(self.small_content)
-
-        txt_large_file = self.repo_path / "large_file.txt"
-        txt_large_file.write_text(self.large_content)
-
-        self.repo.git_add(auto_lfs_track=True)
-
-        self.assertTrue(is_tracked_with_lfs(txt_large_file))
-        self.assertFalse(is_tracked_with_lfs(txt_small_file))
-
-        # Remove large file
-        txt_large_file.unlink()
-
-        # Auto track should remove the entry from .gitattributes
-        self.repo.auto_track_large_files()
-
-        # Recreate the large file with smaller contents
-        txt_large_file.write_text(self.small_content)
-
-        # Ensure the file is not LFS tracked anymore
-        self.repo.auto_track_large_files()
-        self.assertFalse(is_tracked_with_lfs(txt_large_file))
-
-    def test_checkout_non_existing_branch(self):
-        self.assertRaises(EnvironmentError, self.repo.git_checkout, "brand-new-branch")
-
-    def test_checkout_new_branch(self):
-        self.repo.git_checkout("new-branch", create_branch_ok=True)
-        self.assertEqual(self.repo.current_branch, "new-branch")
-
-    def test_is_not_tracked_upstream(self):
-        self.repo.git_checkout("new-branch", create_branch_ok=True)
-        self.assertFalse(is_tracked_upstream(self.repo.local_dir))
-
-    def test_no_branch_checked_out_raises(self):
-        head_commit_ref = run_subprocess("git show --oneline -s", folder=self.repo_path).stdout.split()[0]
-
-        self.repo.git_checkout(head_commit_ref)
-        self.assertRaises(OSError, is_tracked_upstream, self.repo.local_dir)
-
-    @expect_deprecation("Repository")
-    def test_repo_init_checkout_default_revision(self):
-        # Instantiate repository on a given revision
-        repo = Repository(self.repo_path, revision="new-branch")
-        self.assertEqual(repo.current_branch, "new-branch")
-
-        # The revision should be kept when re-initializing the repo
-        repo_2 = Repository(self.repo_path)
-        self.assertEqual(repo_2.current_branch, "new-branch")
-
-    @expect_deprecation("Repository")
-    def test_repo_init_checkout_revision(self):
-        current_head_hash = self.repo.git_head_hash()
-
-        (self.repo_path / "file.txt").write_text("hello world")
-
-        self.repo.git_add()
-        self.repo.git_commit("Add file.txt")
-
-        new_head_hash = self.repo.git_head_hash()
-
-        self.assertNotEqual(current_head_hash, new_head_hash)
-
-        previous_head_repo = Repository(self.repo_path, revision=current_head_hash)
-        files = os.listdir(previous_head_repo.local_dir)
-        self.assertNotIn("file.txt", files)
-
-        current_head_repo = Repository(self.repo_path, revision=new_head_hash)
-        files = os.listdir(current_head_repo.local_dir)
-        self.assertIn("file.txt", files)
-
-    @expect_deprecation("Repository")
-    def test_repo_user(self):
-        _ = Repository(self.repo_path, token=TOKEN)
-        username = run_subprocess("git config user.name", folder=self.repo_path).stdout
-        email = run_subprocess("git config user.email", folder=self.repo_path).stdout
-
-        # hardcode values to avoid another api call to whoami
-        self.assertEqual(username.strip(), "Dummy User")
-        self.assertEqual(email.strip(), "julien@huggingface.co")
-
-    @expect_deprecation("Repository")
-    def test_repo_passed_user(self):
-        _ = Repository(self.repo_path, token=TOKEN, git_user="RANDOM_USER", git_email="EMAIL@EMAIL.EMAIL")
-        username = run_subprocess("git config user.name", folder=self.repo_path).stdout
-        email = run_subprocess("git config user.email", folder=self.repo_path).stdout
-
-        self.assertEqual(username.strip(), "RANDOM_USER")
-        self.assertEqual(email.strip(), "EMAIL@EMAIL.EMAIL")
-
-    def test_add_tag(self):
-        self.repo.add_tag("v4.6.0")
-        self.assertTrue(self.repo.tag_exists("v4.6.0"))
-
-    def test_add_annotated_tag(self):
-        self.repo.add_tag("v4.6.0", message="This is an annotated tag")
-        self.assertTrue(self.repo.tag_exists("v4.6.0"))
-
-        result = run_subprocess("git tag -n9", folder=self.repo_path).stdout.strip()
-        self.assertIn("This is an annotated tag", result)
-
-    def test_delete_tag(self):
-        self.repo.add_tag("v4.6.0", message="This is an annotated tag")
-        self.assertTrue(self.repo.tag_exists("v4.6.0"))
-
-        self.repo.delete_tag("v4.6.0")
-        self.assertFalse(self.repo.tag_exists("v4.6.0"))
-
-    def test_repo_clean(self):
-        self.assertTrue(self.repo.is_repo_clean())
-        (self.repo_path / "file.txt").write_text("hello world")
-        self.assertFalse(self.repo.is_repo_clean())
-
-
-class TestRepositoryDataset(RepositoryTestAbstract):
-    """Class to test that cloning from a different repo_type works fine."""
-
-    @classmethod
-    def setUpClass(cls):
-        super().setUpClass()
-        cls.repo_url = cls._api.create_repo(repo_id=repo_name(), repo_type="dataset")
-        cls.repo_id = cls.repo_url.repo_id
-        cls._api.upload_file(
-            path_or_fileobj=cls.binary_content.encode(),
-            path_in_repo="file.txt",
-            repo_id=cls.repo_id,
-            repo_type="dataset",
-        )
-
-    @classmethod
-    def tearDownClass(cls):
-        super().tearDownClass()
-        cls._api.delete_repo(repo_id=cls.repo_id, repo_type="dataset")
-
-    @expect_deprecation("Repository")
-    def test_clone_dataset_with_endpoint_explicit_repo_type(self):
-        Repository(
-            self.repo_path, clone_from=self.repo_url, repo_type="dataset", git_user="ci", git_email="ci@dummy.com"
-        )
-        self.assertTrue((self.repo_path / "file.txt").exists())
-
-    @expect_deprecation("Repository")
-    def test_clone_dataset_with_endpoint_implicit_repo_type(self):
-        self.assertIn("dataset", self.repo_url)  # Implicit
-        Repository(self.repo_path, clone_from=self.repo_url, git_user="ci", git_email="ci@dummy.com")
-        self.assertTrue((self.repo_path / "file.txt").exists())
-
-    @expect_deprecation("Repository")
-    def test_clone_dataset_with_repo_id_and_repo_type(self):
-        Repository(
-            self.repo_path, clone_from=self.repo_id, repo_type="dataset", git_user="ci", git_email="ci@dummy.com"
-        )
-        self.assertTrue((self.repo_path / "file.txt").exists())
-
-    @expect_deprecation("Repository")
-    def test_clone_dataset_no_ci_user_and_email(self):
-        Repository(self.repo_path, clone_from=self.repo_id, repo_type="dataset")
-        self.assertTrue((self.repo_path / "file.txt").exists())
-
-    @expect_deprecation("Repository")
-    def test_clone_dataset_with_repo_name_and_repo_type_fails(self):
-        with self.assertRaises(EnvironmentError):
-            Repository(
-                self.repo_path,
-                clone_from=self.repo_id.split("/")[1],
-                repo_type="dataset",
-                token=TOKEN,
-                git_user="ci",
-                git_email="ci@dummy.com",
-            )
diff --git a/tests/test_serialization.py b/tests/test_serialization.py
index dad7065de6..6b67ff00d4 100644
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -1,7 +1,7 @@
 import json
 import struct
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List
+from typing import TYPE_CHECKING
 from unittest.mock import Mock
 
 import pytest
@@ -9,7 +9,6 @@
 
 from huggingface_hub import constants
 from huggingface_hub.serialization import (
-    get_tf_storage_size,
     get_torch_storage_size,
     load_state_dict_from_file,
     load_torch_model,
@@ -57,7 +56,7 @@ def is_dtensor_available():
 
 
 @pytest.fixture
-def dummy_state_dict() -> Dict[str, List[int]]:
+def dummy_state_dict() -> dict[str, list[int]]:
     return {
         "layer_1": [6],
         "layer_2": [10],
@@ -68,7 +67,7 @@ def dummy_state_dict() -> Dict[str, List[int]]:
 
 
 @pytest.fixture
-def torch_state_dict() -> Dict[str, "torch.Tensor"]:
+def torch_state_dict() -> dict[str, "torch.Tensor"]:
     try:
         import torch
 
@@ -105,7 +104,7 @@ def __init__(self):
 
 
 @pytest.fixture
-def torch_state_dict_tensor_subclass() -> Dict[str, "torch.Tensor"]:
+def torch_state_dict_tensor_subclass() -> dict[str, "torch.Tensor"]:
     try:
         import torch  # type: ignore[import]
         from torch.testing._internal.two_tensor import TwoTensor  # type: ignore[import]
@@ -124,7 +123,7 @@ def torch_state_dict_tensor_subclass() -> Dict[str, "torch.Tensor"]:
 
 
 @pytest.fixture
-def torch_state_dict_shared_layers() -> Dict[str, "torch.Tensor"]:
+def torch_state_dict_shared_layers() -> dict[str, "torch.Tensor"]:
     try:
         import torch  # type: ignore[import]
 
@@ -141,7 +140,7 @@ def torch_state_dict_shared_layers() -> Dict[str, "torch.Tensor"]:
 
 
 @pytest.fixture
-def torch_state_dict_shared_layers_tensor_subclass() -> Dict[str, "torch.Tensor"]:
+def torch_state_dict_shared_layers_tensor_subclass() -> dict[str, "torch.Tensor"]:
     try:
         import torch  # type: ignore[import]
         from torch.testing._internal.two_tensor import TwoTensor  # type: ignore[import]
@@ -244,14 +243,6 @@ def test_tensor_same_storage():
     assert state_dict_split.metadata == {"total_size": 3}  # count them once
 
 
-@requires("tensorflow")
-def test_get_tf_storage_size():
-    import tensorflow as tf  # type: ignore[import]
-
-    assert get_tf_storage_size(tf.constant([1, 2, 3, 4, 5], dtype=tf.float64)) == 5 * 8
-    assert get_tf_storage_size(tf.constant([1, 2, 3, 4, 5], dtype=tf.float16)) == 5 * 2
-
-
 @requires("torch")
 def test_get_torch_storage_size():
     import torch  # type: ignore[import]
@@ -341,14 +332,14 @@ def test_save_torch_model(mocker: MockerFixture, tmp_path: Path) -> None:
     )
 
 
-def test_save_torch_state_dict_not_sharded(tmp_path: Path, torch_state_dict: Dict[str, "torch.Tensor"]) -> None:
+def test_save_torch_state_dict_not_sharded(tmp_path: Path, torch_state_dict: dict[str, "torch.Tensor"]) -> None:
     """Save as safetensors without sharding."""
     save_torch_state_dict(torch_state_dict, tmp_path, max_shard_size="1GB")
     assert (tmp_path / "model.safetensors").is_file()
     assert not (tmp_path / "model.safetensors.index.json").is_file()
 
 
-def test_save_torch_state_dict_sharded(tmp_path: Path, torch_state_dict: Dict[str, "torch.Tensor"]) -> None:
+def test_save_torch_state_dict_sharded(tmp_path: Path, torch_state_dict: dict[str, "torch.Tensor"]) -> None:
     """Save as safetensors with sharding."""
     save_torch_state_dict(torch_state_dict, tmp_path, max_shard_size=30)
     assert not (tmp_path / "model.safetensors").is_file()
@@ -369,7 +360,7 @@ def test_save_torch_state_dict_sharded(tmp_path: Path, torch_state_dict: Dict[st
 
 
 def test_save_torch_state_dict_unsafe_not_sharded(
-    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict: Dict[str, "torch.Tensor"]
+    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict: dict[str, "torch.Tensor"]
 ) -> None:
     """Save as pickle without sharding."""
     with caplog.at_level("WARNING"):
@@ -382,7 +373,7 @@ def test_save_torch_state_dict_unsafe_not_sharded(
 
 @pytest.mark.skipif(not is_wrapper_tensor_subclass_available(), reason="requires torch 2.1 or higher")
 def test_save_torch_state_dict_tensor_subclass_unsafe_not_sharded(
-    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict_tensor_subclass: Dict[str, "torch.Tensor"]
+    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict_tensor_subclass: dict[str, "torch.Tensor"]
 ) -> None:
     """Save as pickle without sharding."""
     with caplog.at_level("WARNING"):
@@ -399,7 +390,7 @@ def test_save_torch_state_dict_tensor_subclass_unsafe_not_sharded(
 def test_save_torch_state_dict_shared_layers_tensor_subclass_unsafe_not_sharded(
     tmp_path: Path,
     caplog: pytest.LogCaptureFixture,
-    torch_state_dict_shared_layers_tensor_subclass: Dict[str, "torch.Tensor"],
+    torch_state_dict_shared_layers_tensor_subclass: dict[str, "torch.Tensor"],
 ) -> None:
     """Save as pickle without sharding."""
     with caplog.at_level("WARNING"):
@@ -413,7 +404,7 @@ def test_save_torch_state_dict_shared_layers_tensor_subclass_unsafe_not_sharded(
 
 
 def test_save_torch_state_dict_unsafe_sharded(
-    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict: Dict[str, "torch.Tensor"]
+    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict: dict[str, "torch.Tensor"]
 ) -> None:
     """Save as pickle with sharding."""
     # Check logs
@@ -439,7 +430,7 @@ def test_save_torch_state_dict_unsafe_sharded(
 
 
 def test_save_torch_state_dict_shared_layers_not_sharded(
-    tmp_path: Path, torch_state_dict_shared_layers: Dict[str, "torch.Tensor"]
+    tmp_path: Path, torch_state_dict_shared_layers: dict[str, "torch.Tensor"]
 ) -> None:
     from safetensors.torch import load_file
 
@@ -461,7 +452,7 @@ def test_save_torch_state_dict_shared_layers_not_sharded(
 
 
 def test_save_torch_state_dict_shared_layers_sharded(
-    tmp_path: Path, torch_state_dict_shared_layers: Dict[str, "torch.Tensor"]
+    tmp_path: Path, torch_state_dict_shared_layers: dict[str, "torch.Tensor"]
 ) -> None:
     from safetensors.torch import load_file
 
@@ -480,7 +471,7 @@ def test_save_torch_state_dict_shared_layers_sharded(
 
 
 def test_save_torch_state_dict_discard_selected_sharded(
-    tmp_path: Path, torch_state_dict_shared_layers: Dict[str, "torch.Tensor"]
+    tmp_path: Path, torch_state_dict_shared_layers: dict[str, "torch.Tensor"]
 ) -> None:
     from safetensors.torch import load_file
 
@@ -502,7 +493,7 @@ def test_save_torch_state_dict_discard_selected_sharded(
 
 
 def test_save_torch_state_dict_discard_selected_not_sharded(
-    tmp_path: Path, torch_state_dict_shared_layers: Dict[str, "torch.Tensor"]
+    tmp_path: Path, torch_state_dict_shared_layers: dict[str, "torch.Tensor"]
 ) -> None:
     from safetensors.torch import load_file
 
@@ -529,7 +520,7 @@ def test_save_torch_state_dict_discard_selected_not_sharded(
 
 
 def test_split_torch_state_dict_into_shards(
-    tmp_path: Path, torch_state_dict_shared_layers_tensor_subclass: Dict[str, "torch.Tensor"]
+    tmp_path: Path, torch_state_dict_shared_layers_tensor_subclass: dict[str, "torch.Tensor"]
 ):
     # the model size is 72, setting max_shard_size to 32 means we'll shard the file
     state_dict_split = split_torch_state_dict_into_shards(
@@ -540,7 +531,7 @@ def test_split_torch_state_dict_into_shards(
     assert state_dict_split.is_sharded
 
 
-def test_save_torch_state_dict_custom_filename(tmp_path: Path, torch_state_dict: Dict[str, "torch.Tensor"]) -> None:
+def test_save_torch_state_dict_custom_filename(tmp_path: Path, torch_state_dict: dict[str, "torch.Tensor"]) -> None:
     """Custom filename pattern is respected."""
     # Not sharded
     save_torch_state_dict(torch_state_dict, tmp_path, filename_pattern="model.variant{suffix}.safetensors")
@@ -556,7 +547,7 @@ def test_save_torch_state_dict_custom_filename(tmp_path: Path, torch_state_dict:
 
 
 def test_save_torch_state_dict_delete_existing_files(
-    tmp_path: Path, torch_state_dict: Dict[str, "torch.Tensor"]
+    tmp_path: Path, torch_state_dict: dict[str, "torch.Tensor"]
 ) -> None:
     """Directory is cleaned before saving new files."""
     (tmp_path / "model.safetensors").touch()
@@ -590,7 +581,7 @@ def test_save_torch_state_dict_delete_existing_files(
 
 def test_save_torch_state_dict_not_main_process(
     tmp_path: Path,
-    torch_state_dict: Dict[str, "torch.Tensor"],
+    torch_state_dict: dict[str, "torch.Tensor"],
 ) -> None:
     """
     Test that previous files in the directory are not deleted when is_main_process=False.
@@ -613,7 +604,7 @@ def test_save_torch_state_dict_not_main_process(
 
 
 @requires("torch")
-def test_load_state_dict_from_file(tmp_path: Path, torch_state_dict: Dict[str, "torch.Tensor"]):
+def test_load_state_dict_from_file(tmp_path: Path, torch_state_dict: dict[str, "torch.Tensor"]):
     """Test saving and loading a state dict with both safetensors and pickle formats."""
     import torch  # type: ignore[import]
 
@@ -637,7 +628,7 @@ def test_load_state_dict_from_file(tmp_path: Path, torch_state_dict: Dict[str, "
 @requires("torch")
 def test_load_sharded_state_dict(
     tmp_path: Path,
-    torch_state_dict: Dict[str, "torch.Tensor"],
+    torch_state_dict: dict[str, "torch.Tensor"],
     dummy_model: "torch.nn.Module",
 ):
     """Test saving and loading a sharded state dict."""
@@ -666,7 +657,7 @@ def test_load_sharded_state_dict(
 
 @requires("torch")
 def test_load_from_directory_not_sharded(
-    tmp_path: Path, torch_state_dict: Dict[str, "torch.Tensor"], dummy_model: "torch.nn.Module"
+    tmp_path: Path, torch_state_dict: dict[str, "torch.Tensor"], dummy_model: "torch.nn.Module"
 ):
     import torch
 
diff --git a/tests/test_tf_import.py b/tests/test_tf_import.py
deleted file mode 100644
index a79c234a8c..0000000000
--- a/tests/test_tf_import.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import sys
-import unittest
-
-from huggingface_hub.utils import is_tf_available
-
-
-def require_tf(test_case):
-    """
-    Decorator marking a test that requires TensorFlow.
-
-    These tests are skipped when TensorFlow is not installed.
-
-    """
-    if not is_tf_available():
-        return unittest.skip("test requires Tensorflow")(test_case)
-    else:
-        return test_case
-
-
-@require_tf
-def test_import_huggingface_hub_does_not_import_tensorflow():
-    # `import huggingface_hub` is not necessary since huggingface_hub is already imported at the top of this file,
-    # but let's keep it here anyway just in case
-    import huggingface_hub  # noqa
-
-    assert "tensorflow" not in sys.modules
diff --git a/tests/test_utils_cache.py b/tests/test_utils_cache.py
index 2609867abd..e7d5d350a3 100644
--- a/tests/test_utils_cache.py
+++ b/tests/test_utils_cache.py
@@ -3,26 +3,17 @@
 import time
 import unittest
 from pathlib import Path
-from typing import Any, List
+from typing import Any
 from unittest.mock import Mock
 
 import pytest
 
 from huggingface_hub._snapshot_download import snapshot_download
-from huggingface_hub.commands.scan_cache import ScanCacheCommand
-from huggingface_hub.utils import DeleteCacheStrategy, HFCacheInfo, capture_output, scan_cache_dir
-from huggingface_hub.utils._cache_manager import (
-    CacheNotFound,
-    _format_size,
-    _format_timesince,
-    _try_delete_path,
-)
-
-from .testing_utils import (
-    rmtree_with_retry,
-    with_production_testing,
-    xfail_on_windows,
-)
+from huggingface_hub.cli.cache import cache_scan
+from huggingface_hub.utils import DeleteCacheStrategy, HFCacheInfo, _format_size, capture_output, scan_cache_dir
+from huggingface_hub.utils._cache_manager import CacheNotFound, _format_timesince, _try_delete_path
+
+from .testing_utils import rmtree_with_retry, with_production_testing, xfail_on_windows
 
 
 # On production server to avoid recreating them all the time
@@ -245,12 +236,8 @@ def test_cli_scan_cache_quiet(self) -> None:
 
         End-to-end test just to see if output is in expected format.
         """
-        args = Mock()
-        args.verbose = 0
-        args.dir = self.cache_dir
-
         with capture_output() as output:
-            ScanCacheCommand(args).run()
+            cache_scan(dir=self.cache_dir, verbose=0)
 
         expected_output = f"""
         REPO ID                       REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED     LAST_MODIFIED     REFS            LOCAL PATH
@@ -272,12 +259,8 @@ def test_cli_scan_cache_verbose(self) -> None:
 
         End-to-end test just to see if output is in expected format.
         """
-        args = Mock()
-        args.verbose = 1
-        args.dir = self.cache_dir
-
         with capture_output() as output:
-            ScanCacheCommand(args).run()
+            cache_scan(dir=self.cache_dir, verbose=1)
 
         expected_output = f"""
         REPO ID                       REPO TYPE REVISION                                 SIZE ON DISK NB FILES LAST_MODIFIED     REFS      LOCAL PATH
@@ -303,12 +286,8 @@ def test_cli_scan_missing_cache(self) -> None:
         tmp_dir = tempfile.mkdtemp()
         os.rmdir(tmp_dir)
 
-        args = Mock()
-        args.verbose = 0
-        args.dir = tmp_dir
-
         with capture_output() as output:
-            ScanCacheCommand(args).run()
+            cache_scan(dir=tmp_dir, verbose=0)
 
         expected_output = f"""
         Cache directory not found: {Path(tmp_dir).resolve()}
@@ -772,13 +751,8 @@ def test_delete_path_on_missing_file(self) -> None:
             _try_delete_path(file_path, path_type="TYPE")
 
         # Assert warning message with traceback for debug purposes
-        self.assertEqual(len(captured.output), 1)
-        self.assertTrue(
-            captured.output[0].startswith(
-                "WARNING:huggingface_hub.utils._cache_manager:Couldn't delete TYPE:"
-                f" file not found ({file_path})\nTraceback (most recent call last):"
-            )
-        )
+        assert len(captured.output) > 0
+        assert any(f"Couldn't delete TYPE: file not found ({file_path})" in log for log in captured.output)
 
     def test_delete_path_on_missing_folder(self) -> None:
         """Try delete a missing folder."""
@@ -788,13 +762,8 @@ def test_delete_path_on_missing_folder(self) -> None:
             _try_delete_path(dir_path, path_type="TYPE")
 
         # Assert warning message with traceback for debug purposes
-        self.assertEqual(len(captured.output), 1)
-        self.assertTrue(
-            captured.output[0].startswith(
-                "WARNING:huggingface_hub.utils._cache_manager:Couldn't delete TYPE:"
-                f" file not found ({dir_path})\nTraceback (most recent call last):"
-            )
-        )
+        assert len(captured.output) > 0
+        assert any(f"Couldn't delete TYPE: file not found ({dir_path})" in log for log in captured.output)
 
     @xfail_on_windows(reason="Permissions are handled differently on Windows.")
     def test_delete_path_on_local_folder_with_wrong_permission(self) -> None:
@@ -864,6 +833,6 @@ def test_format_timesince(self) -> None:
             )
 
 
-def is_sublist(sub: List[Any], full: List[Any]) -> bool:
+def is_sublist(sub: list[Any], full: list[Any]) -> bool:
     it = iter(full)
     return all(item in it for item in sub)
diff --git a/tests/test_utils_errors.py b/tests/test_utils_errors.py
index a08b4e543e..1d282172bf 100644
--- a/tests/test_utils_errors.py
+++ b/tests/test_utils_errors.py
@@ -1,7 +1,8 @@
 import unittest
+from unittest.mock import Mock
 
 import pytest
-from requests.models import PreparedRequest, Response
+from httpx import Request, Response
 
 from huggingface_hub.errors import (
     BadRequestError,
@@ -16,9 +17,8 @@
 
 class TestErrorUtils(unittest.TestCase):
     def test_hf_raise_for_status_repo_not_found(self) -> None:
-        response = Response()
-        response.headers = {"X-Error-Code": "RepoNotFound", X_REQUEST_ID: 123}
-        response.status_code = 404
+        response = Response(status_code=404, headers={"X-Error-Code": "RepoNotFound", X_REQUEST_ID: "123"})
+        response.request = Request(method="GET", url="https://huggingface.co/fake")
         with self.assertRaisesRegex(RepositoryNotFoundError, "Repository Not Found") as context:
             hf_raise_for_status(response)
 
@@ -26,10 +26,11 @@ def test_hf_raise_for_status_repo_not_found(self) -> None:
         assert "Request ID: 123" in str(context.exception)
 
     def test_hf_raise_for_status_disabled_repo(self) -> None:
-        response = Response()
-        response.headers = {"X-Error-Message": "Access to this resource is disabled.", X_REQUEST_ID: 123}
+        response = Response(
+            status_code=403, headers={"X-Error-Message": "Access to this resource is disabled.", X_REQUEST_ID: "123"}
+        )
+        response.request = Request(method="GET", url="https://huggingface.co/fake")
 
-        response.status_code = 403
         with self.assertRaises(DisabledRepoError) as context:
             hf_raise_for_status(response)
 
@@ -37,11 +38,8 @@ def test_hf_raise_for_status_disabled_repo(self) -> None:
         assert "Request ID: 123" in str(context.exception)
 
     def test_hf_raise_for_status_401_repo_url_not_invalid_token(self) -> None:
-        response = Response()
-        response.headers = {X_REQUEST_ID: 123}
-        response.status_code = 401
-        response.request = PreparedRequest()
-        response.request.url = "https://huggingface.co/api/models/username/reponame"
+        response = Response(status_code=401, headers={X_REQUEST_ID: "123"})
+        response.request = Request(method="GET", url="https://huggingface.co/api/models/username/reponame")
         with self.assertRaisesRegex(RepositoryNotFoundError, "Repository Not Found") as context:
             hf_raise_for_status(response)
 
@@ -49,11 +47,11 @@ def test_hf_raise_for_status_401_repo_url_not_invalid_token(self) -> None:
         assert "Request ID: 123" in str(context.exception)
 
     def test_hf_raise_for_status_401_repo_url_invalid_token(self) -> None:
-        response = Response()
-        response.headers = {X_REQUEST_ID: 123, "X-Error-Message": "Invalid credentials in Authorization header"}
-        response.status_code = 401
-        response.request = PreparedRequest()
-        response.request.url = "https://huggingface.co/api/models/username/reponame"
+        response = Response(
+            status_code=401,
+            headers={X_REQUEST_ID: "123", "X-Error-Message": "Invalid credentials in Authorization header"},
+        )
+        response.request = Request(method="GET", url="https://huggingface.co/api/models/username/reponame")
         with self.assertRaisesRegex(HfHubHTTPError, "Invalid credentials in Authorization header") as context:
             hf_raise_for_status(response)
 
@@ -61,11 +59,10 @@ def test_hf_raise_for_status_401_repo_url_invalid_token(self) -> None:
         assert "Request ID: 123" in str(context.exception)
 
     def test_hf_raise_for_status_403_wrong_token_scope(self) -> None:
-        response = Response()
-        response.headers = {X_REQUEST_ID: 123, "X-Error-Message": "specific error message"}
-        response.status_code = 403
-        response.request = PreparedRequest()
-        response.request.url = "https://huggingface.co/api/repos/create"
+        response = Response(
+            status_code=403, headers={X_REQUEST_ID: "123", "X-Error-Message": "specific error message"}
+        )
+        response.request = Request(method="GET", url="https://huggingface.co/api/repos/create")
         expected_message_part = "403 Forbidden: specific error message"
         with self.assertRaisesRegex(HfHubHTTPError, expected_message_part) as context:
             hf_raise_for_status(response)
@@ -74,11 +71,8 @@ def test_hf_raise_for_status_403_wrong_token_scope(self) -> None:
         assert "Request ID: 123" in str(context.exception)
 
     def test_hf_raise_for_status_401_not_repo_url(self) -> None:
-        response = Response()
-        response.headers = {X_REQUEST_ID: 123}
-        response.status_code = 401
-        response.request = PreparedRequest()
-        response.request.url = "https://huggingface.co/api/collections"
+        response = Response(status_code=401, headers={X_REQUEST_ID: "123"})
+        response.request = Request(method="GET", url="https://huggingface.co/api/collections")
         with self.assertRaises(HfHubHTTPError) as context:
             hf_raise_for_status(response)
 
@@ -86,9 +80,8 @@ def test_hf_raise_for_status_401_not_repo_url(self) -> None:
         assert "Request ID: 123" in str(context.exception)
 
     def test_hf_raise_for_status_revision_not_found(self) -> None:
-        response = Response()
-        response.headers = {"X-Error-Code": "RevisionNotFound", X_REQUEST_ID: 123}
-        response.status_code = 404
+        response = Response(status_code=404, headers={"X-Error-Code": "RevisionNotFound", X_REQUEST_ID: "123"})
+        response.request = Request(method="GET", url="https://huggingface.co/fake")
         with self.assertRaisesRegex(RevisionNotFoundError, "Revision Not Found") as context:
             hf_raise_for_status(response)
 
@@ -96,9 +89,8 @@ def test_hf_raise_for_status_revision_not_found(self) -> None:
         assert "Request ID: 123" in str(context.exception)
 
     def test_hf_raise_for_status_entry_not_found(self) -> None:
-        response = Response()
-        response.headers = {"X-Error-Code": "EntryNotFound", X_REQUEST_ID: 123}
-        response.status_code = 404
+        response = Response(status_code=404, headers={"X-Error-Code": "EntryNotFound", X_REQUEST_ID: "123"})
+        response.request = Request(method="GET", url="https://huggingface.co/fake")
         with self.assertRaisesRegex(EntryNotFoundError, "Entry Not Found") as context:
             hf_raise_for_status(response)
 
@@ -107,33 +99,29 @@ def test_hf_raise_for_status_entry_not_found(self) -> None:
 
     def test_hf_raise_for_status_bad_request_no_endpoint_name(self) -> None:
         """Test HTTPError converted to BadRequestError if error 400."""
-        response = Response()
-        response.status_code = 400
+        response = Response(status_code=400)
+        response.request = Request(method="GET", url="https://huggingface.co/fake")
         with self.assertRaisesRegex(BadRequestError, "Bad request:") as context:
             hf_raise_for_status(response)
         assert context.exception.response.status_code == 400
 
     def test_hf_raise_for_status_bad_request_with_endpoint_name(self) -> None:
         """Test endpoint name is added to BadRequestError message."""
-        response = Response()
-        response.status_code = 400
+        response = Response(status_code=400)
+        response.request = Request(method="GET", url="https://huggingface.co/fake")
         with self.assertRaisesRegex(BadRequestError, "Bad request for preupload endpoint:") as context:
             hf_raise_for_status(response, endpoint_name="preupload")
         assert context.exception.response.status_code == 400
 
     def test_hf_raise_for_status_fallback(self) -> None:
         """Test HTTPError is converted to HfHubHTTPError."""
-        response = Response()
-        response.status_code = 404
-        response.headers = {
-            X_REQUEST_ID: "test-id",
-        }
-        response.url = "test_URL"
+        response = Response(status_code=404, headers={X_REQUEST_ID: "test-id"})
+        response.request = Request(method="GET", url="https://huggingface.co/fake")
         with self.assertRaisesRegex(HfHubHTTPError, "Request ID: test-id") as context:
             hf_raise_for_status(response)
 
         assert context.exception.response.status_code == 404
-        assert context.exception.response.url == "test_URL"
+        assert context.exception.response.url == "https://huggingface.co/fake"
 
 
 class TestHfHubHTTPError(unittest.TestCase):
@@ -141,9 +129,7 @@ class TestHfHubHTTPError(unittest.TestCase):
 
     def setUp(self) -> None:
         """Setup with a default response."""
-        self.response = Response()
-        self.response.status_code = 404
-        self.response.url = "test_URL"
+        self.response = Response(status_code=404, request=Request(method="GET", url="https://huggingface.co/fake"))
 
     def test_hf_hub_http_error_initialization(self) -> None:
         """Test HfHubHTTPError is initialized properly."""
@@ -323,3 +309,9 @@ def test_repo_api_regex(url: str, should_match: bool) -> None:
         assert REPO_API_REGEX.match(url)
     else:
         assert REPO_API_REGEX.match(url) is None
+
+
+def test_hf_hub_http_error_inherits_from_os_error() -> None:
+    """Test HfHubHTTPError inherits from OSError."""
+    with pytest.raises(OSError):
+        raise HfHubHTTPError("this is a message", response=Mock())
diff --git a/tests/test_utils_headers.py b/tests/test_utils_headers.py
index d6c00874e4..849b9f063a 100644
--- a/tests/test_utils_headers.py
+++ b/tests/test_utils_headers.py
@@ -19,31 +19,29 @@
 NO_AUTH_HEADER = {"user-agent": DEFAULT_USER_AGENT}
 
 
-# @patch("huggingface_hub.utils._headers.HfFolder")
-# @handle_injection
 class TestAuthHeadersUtil(unittest.TestCase):
-    def test_use_auth_token_str(self) -> None:
-        self.assertEqual(build_hf_headers(use_auth_token=FAKE_TOKEN), FAKE_TOKEN_HEADER)
+    def test_token_str(self) -> None:
+        self.assertEqual(build_hf_headers(token=FAKE_TOKEN), FAKE_TOKEN_HEADER)
 
     @patch("huggingface_hub.utils._headers.get_token", return_value=None)
-    def test_use_auth_token_true_no_cached_token(self, mock_get_token: Mock) -> None:
+    def test_token_true_no_cached_token(self, mock_get_token: Mock) -> None:
         with self.assertRaises(EnvironmentError):
-            build_hf_headers(use_auth_token=True)
+            build_hf_headers(token=True)
 
     @patch("huggingface_hub.utils._headers.get_token", return_value=FAKE_TOKEN)
-    def test_use_auth_token_true_has_cached_token(self, mock_get_token: Mock) -> None:
-        self.assertEqual(build_hf_headers(use_auth_token=True), FAKE_TOKEN_HEADER)
+    def test_token_true_has_cached_token(self, mock_get_token: Mock) -> None:
+        self.assertEqual(build_hf_headers(token=True), FAKE_TOKEN_HEADER)
 
     @patch("huggingface_hub.utils._headers.get_token", return_value=FAKE_TOKEN)
-    def test_use_auth_token_false(self, mock_get_token: Mock) -> None:
-        self.assertEqual(build_hf_headers(use_auth_token=False), NO_AUTH_HEADER)
+    def test_token_false(self, mock_get_token: Mock) -> None:
+        self.assertEqual(build_hf_headers(token=False), NO_AUTH_HEADER)
 
     @patch("huggingface_hub.utils._headers.get_token", return_value=None)
-    def test_use_auth_token_none_no_cached_token(self, mock_get_token: Mock) -> None:
+    def test_token_none_no_cached_token(self, mock_get_token: Mock) -> None:
         self.assertEqual(build_hf_headers(), NO_AUTH_HEADER)
 
     @patch("huggingface_hub.utils._headers.get_token", return_value=FAKE_TOKEN)
-    def test_use_auth_token_none_has_cached_token(self, mock_get_token: Mock) -> None:
+    def test_token_none_has_cached_token(self, mock_get_token: Mock) -> None:
         self.assertEqual(build_hf_headers(), FAKE_TOKEN_HEADER)
 
     @patch("huggingface_hub.utils._headers.get_token", return_value=FAKE_TOKEN)
@@ -59,59 +57,33 @@ def test_implicit_use_disabled_but_explicit_use(self, mock_get_token: Mock) -> N
             "huggingface_hub.constants.HF_HUB_DISABLE_IMPLICIT_TOKEN", True
         ):
             # This is not an implicit use so we still send it
-            self.assertEqual(build_hf_headers(use_auth_token=True), FAKE_TOKEN_HEADER)
+            self.assertEqual(build_hf_headers(token=True), FAKE_TOKEN_HEADER)
 
 
 class TestUserAgentHeadersUtil(unittest.TestCase):
     def _get_user_agent(self, **kwargs) -> str:
         return build_hf_headers(**kwargs)["user-agent"]
 
-    @patch("huggingface_hub.utils._headers.get_fastai_version")
-    @patch("huggingface_hub.utils._headers.get_fastcore_version")
-    @patch("huggingface_hub.utils._headers.get_tf_version")
     @patch("huggingface_hub.utils._headers.get_torch_version")
-    @patch("huggingface_hub.utils._headers.is_fastai_available")
-    @patch("huggingface_hub.utils._headers.is_fastcore_available")
-    @patch("huggingface_hub.utils._headers.is_tf_available")
     @patch("huggingface_hub.utils._headers.is_torch_available")
     @handle_injection_in_test
     def test_default_user_agent(
         self,
-        mock_get_fastai_version: Mock,
-        mock_get_fastcore_version: Mock,
-        mock_get_tf_version: Mock,
         mock_get_torch_version: Mock,
-        mock_is_fastai_available: Mock,
-        mock_is_fastcore_available: Mock,
-        mock_is_tf_available: Mock,
         mock_is_torch_available: Mock,
     ) -> None:
-        mock_get_fastai_version.return_value = "fastai_version"
-        mock_get_fastcore_version.return_value = "fastcore_version"
-        mock_get_tf_version.return_value = "tf_version"
         mock_get_torch_version.return_value = "torch_version"
-        mock_is_fastai_available.return_value = True
-        mock_is_fastcore_available.return_value = True
-        mock_is_tf_available.return_value = True
         mock_is_torch_available.return_value = True
         self.assertEqual(
             self._get_user_agent(),
-            f"unknown/None; hf_hub/{get_hf_hub_version()};"
-            f" python/{get_python_version()}; torch/torch_version;"
-            " tensorflow/tf_version; fastai/fastai_version;"
-            " fastcore/fastcore_version",
+            f"unknown/None; hf_hub/{get_hf_hub_version()}; python/{get_python_version()}; torch/torch_version",
         )
 
     @patch("huggingface_hub.utils._headers.is_torch_available")
-    @patch("huggingface_hub.utils._headers.is_tf_available")
     @handle_injection_in_test
-    def test_user_agent_with_library_name_multiple_missing(
-        self, mock_is_torch_available: Mock, mock_is_tf_available: Mock
-    ) -> None:
+    def test_user_agent_with_library_name_multiple_missing(self, mock_is_torch_available: Mock) -> None:
         mock_is_torch_available.return_value = False
-        mock_is_tf_available.return_value = False
         self.assertNotIn("torch", self._get_user_agent())
-        self.assertNotIn("tensorflow", self._get_user_agent())
 
     def test_user_agent_with_library_name_and_version(self) -> None:
         self.assertTrue(
diff --git a/tests/test_utils_hf_folder.py b/tests/test_utils_hf_folder.py
deleted file mode 100644
index 5857fa4df8..0000000000
--- a/tests/test_utils_hf_folder.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contain tests for `HfFolder` utility."""
-
-import os
-import unittest
-from uuid import uuid4
-
-from huggingface_hub.utils import HfFolder
-
-
-def _generate_token() -> str:
-    return f"token-{uuid4()}"
-
-
-class HfFolderTest(unittest.TestCase):
-    def test_token_workflow(self):
-        """
-        Test the whole token save/get/delete workflow,
-        with the desired behavior with respect to non-existent tokens.
-        """
-        token = _generate_token()
-        HfFolder.save_token(token)
-        self.assertEqual(HfFolder.get_token(), token)
-        HfFolder.delete_token()
-        HfFolder.delete_token()
-        # ^^ not an error, we test that the
-        # second call does not fail.
-        self.assertEqual(HfFolder.get_token(), None)
-        # test TOKEN in env
-        self.assertEqual(HfFolder.get_token(), None)
-        with unittest.mock.patch.dict(os.environ, {"HF_TOKEN": token}):
-            self.assertEqual(HfFolder.get_token(), token)
-
-    def test_token_strip(self):
-        """
-        Test the workflow when the token is mistakenly finishing with new-line or space character.
-        """
-        token = _generate_token()
-        HfFolder.save_token(" " + token + "\n")
-        self.assertEqual(HfFolder.get_token(), token)
-        HfFolder.delete_token()
diff --git a/tests/test_utils_http.py b/tests/test_utils_http.py
index 07037e6aba..cfce921497 100644
--- a/tests/test_utils_http.py
+++ b/tests/test_utils_http.py
@@ -7,19 +7,19 @@
 from unittest.mock import Mock, call, patch
 from uuid import UUID
 
+import httpx
 import pytest
-import requests
-from requests import ConnectTimeout, HTTPError
+from httpx import ConnectTimeout, HTTPError
 
 from huggingface_hub.constants import ENDPOINT
+from huggingface_hub.errors import OfflineModeIsEnabled
 from huggingface_hub.utils._http import (
-    OfflineModeIsEnabled,
     _adjust_range_header,
-    configure_http_backend,
+    default_client_factory,
     fix_hf_endpoint_in_url,
     get_session,
     http_backoff,
-    reset_sessions,
+    set_client_factory,
 )
 
 
@@ -63,7 +63,7 @@ def test_backoff_3_calls(self) -> None:
 
     def test_backoff_on_exception_until_max(self) -> None:
         """Test `http_backoff` until max limit is reached with exceptions."""
-        self.mock_request.side_effect = ConnectTimeout()
+        self.mock_request.side_effect = ConnectTimeout("Connection timeout")
 
         with self.assertRaises(ConnectTimeout):
             http_backoff("GET", URL, base_wait_time=0.0, max_retries=3)
@@ -76,7 +76,7 @@ def test_backoff_on_status_code_until_max(self) -> None:
         mock_503.status_code = 503
         mock_504 = Mock()
         mock_504.status_code = 504
-        mock_504.raise_for_status.side_effect = HTTPError()
+        mock_504.raise_for_status.side_effect = HTTPError("HTTP Error")
         self.mock_request.side_effect = (mock_503, mock_504, mock_503, mock_504)
 
         with self.assertRaises(HTTPError):
@@ -94,7 +94,7 @@ def test_backoff_on_exceptions_and_status_codes(self) -> None:
         """Test `http_backoff` until max limit with status codes and exceptions."""
         mock_503 = Mock()
         mock_503.status_code = 503
-        self.mock_request.side_effect = (mock_503, ConnectTimeout())
+        self.mock_request.side_effect = (mock_503, ConnectTimeout("Connection timeout"))
 
         with self.assertRaises(ConnectTimeout):
             http_backoff("GET", URL, base_wait_time=0.0, max_retries=1)
@@ -131,7 +131,7 @@ def test_backoff_sleep_time(self) -> None:
         def _side_effect_timer() -> Generator[ConnectTimeout, None, None]:
             t0 = time.time()
             while True:
-                yield ConnectTimeout()
+                yield ConnectTimeout("Connection timeout")
                 t1 = time.time()
                 sleep_times.append(round(t1 - t0, 1))
                 t0 = t1
@@ -151,65 +151,60 @@ def _side_effect_timer() -> Generator[ConnectTimeout, None, None]:
 class TestConfigureSession(unittest.TestCase):
     def setUp(self) -> None:
         # Reconfigure + clear session cache between each test
-        configure_http_backend()
+        set_client_factory(default_client_factory)
 
     @classmethod
     def tearDownClass(cls) -> None:
         # Clear all sessions after tests
-        configure_http_backend()
+        set_client_factory(default_client_factory)
 
     @staticmethod
-    def _factory() -> requests.Session:
-        session = requests.Session()
-        session.headers.update({"x-test-header": 4})
-        return session
+    def _factory() -> httpx.Client:
+        client = httpx.Client()
+        client.headers.update({"x-test-header": "4"})
+        return client
 
     def test_default_configuration(self) -> None:
-        session = get_session()
-        self.assertEqual(session.headers["connection"], "keep-alive")  # keep connection alive by default
-        self.assertIsNone(session.auth)
-        self.assertEqual(session.proxies, {})
-        self.assertEqual(session.verify, True)
-        self.assertIsNone(session.cert)
-        self.assertEqual(session.max_redirects, 30)
-        self.assertEqual(session.trust_env, True)
-        self.assertEqual(session.hooks, {"response": []})
+        client = get_session()
+        # Check httpx.Client default configuration
+        self.assertTrue(client.follow_redirects)
+        self.assertIsNotNone(client.timeout)
 
     def test_set_configuration(self) -> None:
-        configure_http_backend(backend_factory=self._factory)
+        set_client_factory(self._factory)
 
         # Check headers have been set correctly
-        session = get_session()
-        self.assertNotEqual(session.headers, {"x-test-header": 4})
-        self.assertEqual(session.headers["x-test-header"], 4)
+        client = get_session()
+        self.assertNotEqual(client.headers, {"x-test-header": "4"})
+        self.assertEqual(client.headers["x-test-header"], "4")
 
     def test_get_session_twice(self):
-        session_1 = get_session()
-        session_2 = get_session()
-        self.assertIs(session_1, session_2)  # exact same instance
+        client_1 = get_session()
+        client_2 = get_session()
+        self.assertIs(client_1, client_2)  # exact same instance
 
     def test_get_session_twice_but_reconfigure_in_between(self):
         """Reconfiguring the session clears the cache."""
-        session_1 = get_session()
-        configure_http_backend(backend_factory=self._factory)
+        client_1 = get_session()
+        set_client_factory(self._factory)
 
-        session_2 = get_session()
-        self.assertIsNot(session_1, session_2)
-        self.assertIsNone(session_1.headers.get("x-test-header"))
-        self.assertEqual(session_2.headers["x-test-header"], 4)
+        client_2 = get_session()
+        self.assertIsNot(client_1, client_2)
+        self.assertIsNone(client_1.headers.get("x-test-header"))
+        self.assertEqual(client_2.headers["x-test-header"], "4")
 
     def test_get_session_multiple_threads(self):
         N = 3
-        sessions = [None] * N
+        clients = [None] * N
 
         def _get_session_in_thread(index: int) -> None:
             time.sleep(0.1)
-            sessions[index] = get_session()
+            clients[index] = get_session()
 
-        # Get main thread session
-        main_session = get_session()
+        # Get main thread client
+        main_client = get_session()
 
-        # Start 3 threads and get sessions in each of them
+        # Start 3 threads and get clients in each of them
         threads = [threading.Thread(target=_get_session_in_thread, args=(index,)) for index in range(N)]
         for th in threads:
             th.start()
@@ -217,43 +212,41 @@ def _get_session_in_thread(index: int) -> None:
         for th in threads:
             th.join()
 
-        # Check all sessions are different
+        # Check all clients are the same instance (httpx is thread-safe)
         for i in range(N):
-            self.assertIsNot(main_session, sessions[i])
+            self.assertIs(main_client, clients[i])
             for j in range(N):
-                if i != j:
-                    self.assertIsNot(sessions[i], sessions[j])
+                self.assertIs(clients[i], clients[j])
 
     @unittest.skipIf(os.name == "nt", "Works differently on Windows.")
     def test_get_session_in_forked_process(self):
-        # Get main process session
-        main_session = get_session()
+        # Get main process client
+        main_client = get_session()
 
         def _child_target():
-            # Put `repr(session)` in queue because putting the `Session` object directly would duplicate it.
-            # Repr looks like this: "<requests.sessions.Session object at 0x7f5adcc41e40>"
+            # Put `repr(client)` in queue because putting the `Client` object directly would duplicate it.
+            # Repr looks like this: "<httpx.Client object at 0x7f5adcc41e40>"
             process_queue.put(repr(get_session()))
 
-        # Fork a new process and get session in it
+        # Fork a new process and get client in it
         process_queue = Queue()
         Process(target=_child_target).start()
-        child_session = process_queue.get()
+        child_client = process_queue.get()
 
-        # Check sessions are different
-        self.assertNotEqual(repr(main_session), child_session)
+        # Check clients are the same instance
+        self.assertEqual(repr(main_client), child_client)
 
 
 class OfflineModeSessionTest(unittest.TestCase):
     def tearDown(self) -> None:
-        reset_sessions()
         return super().tearDown()
 
     @patch("huggingface_hub.constants.HF_HUB_OFFLINE", True)
     def test_offline_mode(self):
-        configure_http_backend()
-        session = get_session()
+        set_client_factory(default_client_factory)
+        client = get_session()
         with self.assertRaises(OfflineModeIsEnabled):
-            session.get("https://huggingface.co")
+            client.get("https://huggingface.co")
 
 
 class TestUniqueRequestId(unittest.TestCase):
@@ -336,3 +329,36 @@ def test_adjust_range_header():
         _adjust_range_header("bytes=0-100", 150)
     with pytest.raises(RuntimeError):
         _adjust_range_header("bytes=-50", 100)
+
+
+def test_proxy_env_is_used(monkeypatch):
+    """Regression test for https://github.com/huggingface/transformers/issues/41301.
+
+    Test is hacky and uses httpx internal attributes, but it works.
+    We just need to test that proxies from env vars are used when creating the client.
+    """
+    monkeypatch.setenv("HTTP_PROXY", "http://proxy.example1.com:8080")
+    monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example2.com:8181")
+
+    set_client_factory(default_client_factory)
+    client = get_session()
+    mounts = client._mounts
+    url_patterns = list(mounts.keys())
+    assert len(url_patterns) == 2  # http and https
+
+    http_url_pattern = next(url for url in url_patterns if url.pattern == "http://")
+    http_proxy_url = mounts[http_url_pattern]._pool._proxy_url
+    assert http_proxy_url.scheme == b"http"
+    assert http_proxy_url.host == b"proxy.example1.com"
+    assert http_proxy_url.port == 8080
+    assert http_proxy_url.target == b"/"
+
+    https_url_pattern = next(url for url in url_patterns if url.pattern == "https://")
+    https_proxy_url = mounts[https_url_pattern]._pool._proxy_url
+    assert https_proxy_url.scheme == b"http"
+    assert https_proxy_url.host == b"proxy.example2.com"
+    assert https_proxy_url.port == 8181
+    assert https_proxy_url.target == b"/"
+
+    # Reset
+    set_client_factory(default_client_factory)
diff --git a/tests/test_utils_paths.py b/tests/test_utils_paths.py
index 82ffa174bd..39f311dd5f 100644
--- a/tests/test_utils_paths.py
+++ b/tests/test_utils_paths.py
@@ -1,7 +1,7 @@
 import unittest
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Callable, List, Optional, Union
+from typing import Any, Callable, Optional, Union
 
 from huggingface_hub.utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects
 
@@ -97,10 +97,10 @@ def test_filter_object_with_folder(self) -> None:
 
     def _check(
         self,
-        items: List[Any],
-        expected_items: List[Any],
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
+        items: list[Any],
+        expected_items: list[Any],
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
         key: Optional[Callable[[Any], str]] = None,
     ) -> None:
         """Run `filter_repo_objects` and check output against expected result."""
diff --git a/tests/test_utils_strict_dataclass.py b/tests/test_utils_strict_dataclass.py
index 6fce042e44..70ee8f5cdb 100644
--- a/tests/test_utils_strict_dataclass.py
+++ b/tests/test_utils_strict_dataclass.py
@@ -1,11 +1,27 @@
 import inspect
+import sys
 from dataclasses import asdict, astuple, dataclass, is_dataclass
-from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union, get_type_hints
+from typing import Annotated, Any, Literal, Optional, TypedDict, Union, get_type_hints
 
 import jedi
 import pytest
 
-from huggingface_hub.dataclasses import _is_validator, as_validated_field, strict, type_validator, validated_field
+
+if sys.version_info >= (3, 11):
+    from typing import NotRequired, Required
+else:
+    # Provide fallbacks or skip the entire module
+    NotRequired = None
+    Required = None
+from huggingface_hub.dataclasses import (
+    _build_strict_cls_from_typed_dict,
+    _is_validator,
+    as_validated_field,
+    strict,
+    type_validator,
+    validate_typed_dict,
+    validated_field,
+)
 from huggingface_hub.errors import (
     StrictDataclassClassValidationError,
     StrictDataclassDefinitionError,
@@ -180,18 +196,18 @@ class Config:
         ("John", Literal["John", "Doe"]),
         (5, Literal[4, 5, 6]),
         # List
-        ([1, 2, 3], List[int]),
-        ([1, 2, "3"], List[Union[int, str]]),
+        ([1, 2, 3], list[int]),
+        ([1, 2, "3"], list[Union[int, str]]),
         # Tuple
-        ((1, 2, 3), Tuple[int, int, int]),
-        ((1, 2, "3"), Tuple[int, int, str]),
-        ((1, 2, 3, 4), Tuple[int, ...]),
+        ((1, 2, 3), tuple[int, int, int]),
+        ((1, 2, "3"), tuple[int, int, str]),
+        ((1, 2, 3, 4), tuple[int, ...]),
         # Dict
-        ({"a": 1, "b": 2}, Dict[str, int]),
-        ({"a": 1, "b": "2"}, Dict[str, Union[int, str]]),
+        ({"a": 1, "b": 2}, dict[str, int]),
+        ({"a": 1, "b": "2"}, dict[str, Union[int, str]]),
         # Set
-        ({1, 2, 3}, Set[int]),
-        ({1, 2, "3"}, Set[Union[int, str]]),
+        ({1, 2, 3}, set[int]),
+        ({1, 2, "3"}, set[Union[int, str]]),
         # Custom classes
         (DummyClass(), DummyClass),
         # Any
@@ -206,13 +222,13 @@ class Config:
                     (2, DummyClass(), None),
                 ],
             },
-            Dict[
+            dict[
                 str,
-                List[
-                    Tuple[
+                list[
+                    tuple[
                         int,
                         DummyClass,
-                        Optional[Set[Union[int, str],]],
+                        Optional[set[Union[int, str],]],
                     ]
                 ],
             ],
@@ -241,19 +257,19 @@ def test_type_validator_valid(value, type_annotation):
         ("Ada", Literal["John", "Doe"]),
         (3, Literal[4, 5, 6]),
         # List
-        (5, List[int]),
-        ([1, 2, "3"], List[int]),
+        (5, list[int]),
+        ([1, 2, "3"], list[int]),
         # Tuple
-        (5, Tuple[int, int, int]),
-        ((1, 2, "3"), Tuple[int, int, int]),
-        ((1, 2, 3, 4), Tuple[int, int, int]),
-        ((1, 2, "3", 4), Tuple[int, ...]),
+        (5, tuple[int, int, int]),
+        ((1, 2, "3"), tuple[int, int, int]),
+        ((1, 2, 3, 4), tuple[int, int, int]),
+        ((1, 2, "3", 4), tuple[int, ...]),
         # Dict
-        (5, Dict[str, int]),
-        ({"a": 1, "b": "2"}, Dict[str, int]),
+        (5, dict[str, int]),
+        ({"a": 1, "b": "2"}, dict[str, int]),
         # Set
-        (5, Set[int]),
-        ({1, 2, "3"}, Set[int]),
+        (5, set[int]),
+        ({1, 2, "3"}, set[int]),
         # Custom classes
         (5, DummyClass),
         ("John", DummyClass),
@@ -646,3 +662,125 @@ def validate(self):
             @dataclass
             class ConfigWithParent(ParentClass):  # 'validate' already defined => should raise an error
                 foo: int = 0
+
+
+class ConfigDict(TypedDict):
+    str_value: str
+    positive_int_value: Annotated[int, positive_int]
+    forward_ref_value: "ForwardDtype"
+    optional_value: Optional[int]
+
+
+class ConfigDictIncomplete(TypedDict, total=False):
+    str_value: str
+    positive_int_value: Annotated[int, positive_int]
+    forward_ref_value: "ForwardDtype"
+    optional_value: Optional[int]
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # All values are valid
+        {"str_value": "foo", "positive_int_value": 1, "forward_ref_value": "bar", "optional_value": 0},
+    ],
+)
+def test_typed_dict_valid_data(data: dict):
+    validate_typed_dict(ConfigDict, data)
+    validate_typed_dict(ConfigDictIncomplete, data)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # Optional value cannot be omitted
+        {"str_value": "foo", "positive_int_value": 1, "forward_ref_value": "bar"},
+        # Other fields neither
+        {"positive_int_value": 1, "forward_ref_value": "bar", "optional_value": 0},
+        # Not a string
+        {"str_value": 123, "positive_int_value": 1, "forward_ref_value": "bar", "optional_value": 0},
+        # Not an integer
+        {"str_value": "foo", "positive_int_value": "1", "forward_ref_value": "bar", "optional_value": 0},
+        # Annotated validator is used
+        {"str_value": "foo", "positive_int_value": -1, "forward_ref_value": "bar", "optional_value": 0},
+    ],
+)
+def test_typed_dict_invalid_data(data: dict):
+    with pytest.raises(StrictDataclassFieldValidationError):
+        validate_typed_dict(ConfigDict, data)
+
+
+def test_typed_dict_error_message():
+    with pytest.raises(StrictDataclassFieldValidationError) as exception:
+        validate_typed_dict(
+            ConfigDict, {"str_value": 123, "positive_int_value": 1, "forward_ref_value": "bar", "optional_value": 0}
+        )
+    assert "Validation error for field 'str_value'" in str(exception.value)
+    assert "Field 'str_value' expected str, got int (value: 123)" in str(exception.value)
+
+
+def test_typed_dict_unknown_attribute():
+    with pytest.raises(TypeError):
+        validate_typed_dict(
+            ConfigDict,
+            {
+                "str_value": "foo",
+                "positive_int_value": 1,
+                "forward_ref_value": "bar",
+                "optional_value": 0,
+                "another_value": 0,
+            },
+        )
+
+
+def test_typed_dict_to_dataclass_is_cached():
+    strict_cls = _build_strict_cls_from_typed_dict(ConfigDict)
+    strict_cls_bis = _build_strict_cls_from_typed_dict(ConfigDict)
+    assert strict_cls is strict_cls_bis  # "is" because dataclass is built only once
+
+
+@pytest.mark.skipif(sys.version_info < (3, 11), reason="Requires Python 3.11+")
+class TestConfigDictNotRequired:
+    def __init__(self):
+        # cannot be defined at class level because of Python<3.11
+        self.ConfigDictNotRequired = TypedDict(
+            "ConfigDictNotRequired",
+            {"required_value": Required[int], "not_required_value": NotRequired[int]},
+            total=False,
+        )
+
+    @pytest.mark.parametrize(
+        "data",
+        [
+            {"required_value": 1, "not_required_value": 2},
+            {"required_value": 1},  # not required value is not validated
+        ],
+    )
+    def test_typed_dict_not_required_valid_data(self, data: dict):
+        validate_typed_dict(self.ConfigDictNotRequired, data)
+
+    @pytest.mark.parametrize(
+        "data",
+        [
+            # Missing required value
+            {"not_required_value": 2},
+            # If exists, the value is validated
+            {"required_value": 1, "not_required_value": "2"},
+        ],
+    )
+    def test_typed_dict_not_required_invalid_data(self, data: dict):
+        with pytest.raises(StrictDataclassFieldValidationError):
+            validate_typed_dict(self.ConfigDictNotRequired, data)
+
+
+def test_typed_dict_total_true():
+    ConfigDictTotalTrue = TypedDict("ConfigDictTotalTrue", {"value": int}, total=True)
+    validate_typed_dict(ConfigDictTotalTrue, {"value": 1})
+    with pytest.raises(StrictDataclassFieldValidationError):
+        validate_typed_dict(ConfigDictTotalTrue, {})
+
+
+def test_typed_dict_total_false():
+    ConfigDictTotalFalse = TypedDict("ConfigDictTotalFalse", {"value": int}, total=False)
+    validate_typed_dict(ConfigDictTotalFalse, {})
+    validate_typed_dict(ConfigDictTotalFalse, {"value": 1})
diff --git a/tests/test_utils_cli.py b/tests/test_utils_terminal.py
similarity index 97%
rename from tests/test_utils_cli.py
rename to tests/test_utils_terminal.py
index e1d68118da..5b5f39bf5e 100644
--- a/tests/test_utils_cli.py
+++ b/tests/test_utils_terminal.py
@@ -2,7 +2,7 @@
 import unittest
 from unittest import mock
 
-from huggingface_hub.commands._cli_utils import ANSI, tabulate
+from huggingface_hub.utils._terminal import ANSI, tabulate
 
 
 class TestCLIUtils(unittest.TestCase):
diff --git a/tests/test_utils_validators.py b/tests/test_utils_validators.py
index 66161daf18..07c1950094 100644
--- a/tests/test_utils_validators.py
+++ b/tests/test_utils_validators.py
@@ -4,7 +4,6 @@
 
 from huggingface_hub.utils import (
     HFValidationError,
-    smoothly_deprecate_use_auth_token,
     validate_hf_hub_args,
     validate_repo_id,
 )
@@ -58,61 +57,3 @@ def test_not_valid_repo_ids(self) -> None:
         for repo_id in self.NOT_VALID_VALUES:
             with self.assertRaises(HFValidationError, msg=f"'{repo_id}' must not be valid"):
                 validate_repo_id(repo_id)
-
-
-class TestSmoothlyDeprecateUseAuthToken(unittest.TestCase):
-    def test_token_normal_usage_as_arg(self) -> None:
-        self.assertEqual(
-            self.dummy_token_function("this_is_a_token"),
-            ("this_is_a_token", {}),
-        )
-
-    def test_token_normal_usage_as_kwarg(self) -> None:
-        self.assertEqual(
-            self.dummy_token_function(token="this_is_a_token"),
-            ("this_is_a_token", {}),
-        )
-
-    def test_token_normal_usage_with_more_kwargs(self) -> None:
-        self.assertEqual(
-            self.dummy_token_function(token="this_is_a_token", foo="bar"),
-            ("this_is_a_token", {"foo": "bar"}),
-        )
-
-    def test_token_with_smoothly_deprecated_use_auth_token(self) -> None:
-        self.assertEqual(
-            self.dummy_token_function(use_auth_token="this_is_a_use_auth_token"),
-            ("this_is_a_use_auth_token", {}),
-        )
-
-    def test_input_kwargs_not_mutated_by_smooth_deprecation(self) -> None:
-        initial_kwargs = {"a": "b", "use_auth_token": "token"}
-        kwargs = smoothly_deprecate_use_auth_token(fn_name="name", has_token=False, kwargs=initial_kwargs)
-        self.assertEqual(kwargs, {"a": "b", "token": "token"})
-        self.assertEqual(initial_kwargs, {"a": "b", "use_auth_token": "token"})  # not mutated!
-
-    def test_with_both_token_and_use_auth_token(self) -> None:
-        with self.assertWarns(UserWarning):
-            # `use_auth_token` is ignored !
-            self.assertEqual(
-                self.dummy_token_function(token="this_is_a_token", use_auth_token="this_is_a_use_auth_token"),
-                ("this_is_a_token", {}),
-            )
-
-    def test_not_deprecated_use_auth_token(self) -> None:
-        # `use_auth_token` is accepted by `dummy_use_auth_token_function`
-        # => `smoothly_deprecate_use_auth_token` is not called
-        self.assertEqual(
-            self.dummy_use_auth_token_function(use_auth_token="this_is_a_use_auth_token"),
-            ("this_is_a_use_auth_token", {}),
-        )
-
-    @staticmethod
-    @validate_hf_hub_args
-    def dummy_token_function(token: str, **kwargs) -> None:
-        return token, kwargs
-
-    @staticmethod
-    @validate_hf_hub_args
-    def dummy_use_auth_token_function(use_auth_token: str, **kwargs) -> None:
-        return use_auth_token, kwargs
diff --git a/tests/test_webhooks_server.py b/tests/test_webhooks_server.py
index 8284b14bea..c8d5c4c2db 100644
--- a/tests/test_webhooks_server.py
+++ b/tests/test_webhooks_server.py
@@ -110,28 +110,28 @@
 }
 
 
-def test_deserialize_payload_example_with_comment() -> None:
-    """Confirm that the test stub can actually be deserialized."""
-    payload = WebhookPayload.model_validate(WEBHOOK_PAYLOAD_CREATE_DISCUSSION)
-    assert payload.event.scope == WEBHOOK_PAYLOAD_CREATE_DISCUSSION["event"]["scope"]
-    assert payload.comment is not None
-    assert payload.comment.content == "Add co2 emissions information to the model card"
-
-
-def test_deserialize_payload_example_without_comment() -> None:
-    """Confirm that the test stub can actually be deserialized."""
-    payload = WebhookPayload.model_validate(WEBHOOK_PAYLOAD_UPDATE_DISCUSSION)
-    assert payload.event.scope == WEBHOOK_PAYLOAD_UPDATE_DISCUSSION["event"]["scope"]
-    assert payload.comment is None
-
-
-def test_deserialize_payload_example_with_updated_refs() -> None:
-    """Confirm that the test stub can actually be deserialized."""
-    payload = WebhookPayload.model_validate(WEBHOOK_PAYLOAD_WITH_UPDATED_REFS)
-    assert payload.updatedRefs is not None
-    assert payload.updatedRefs[0].ref == "refs/pr/5"
-    assert payload.updatedRefs[0].oldSha is None
-    assert payload.updatedRefs[0].newSha == "227c78346870a85e5de4fff8a585db68df975406"
+@requires("gradio")
+class TestWebhookPayload(unittest.TestCase):
+    def test_deserialize_payload_example_with_comment(self) -> None:
+        """Confirm that the test stub can actually be deserialized."""
+        payload = WebhookPayload.model_validate(WEBHOOK_PAYLOAD_CREATE_DISCUSSION)
+        assert payload.event.scope == WEBHOOK_PAYLOAD_CREATE_DISCUSSION["event"]["scope"]
+        assert payload.comment is not None
+        assert payload.comment.content == "Add co2 emissions information to the model card"
+
+    def test_deserialize_payload_example_without_comment(self) -> None:
+        """Confirm that the test stub can actually be deserialized."""
+        payload = WebhookPayload.model_validate(WEBHOOK_PAYLOAD_UPDATE_DISCUSSION)
+        assert payload.event.scope == WEBHOOK_PAYLOAD_UPDATE_DISCUSSION["event"]["scope"]
+        assert payload.comment is None
+
+    def test_deserialize_payload_example_with_updated_refs(self) -> None:
+        """Confirm that the test stub can actually be deserialized."""
+        payload = WebhookPayload.model_validate(WEBHOOK_PAYLOAD_WITH_UPDATED_REFS)
+        assert payload.updatedRefs is not None
+        assert payload.updatedRefs[0].ref == "refs/pr/5"
+        assert payload.updatedRefs[0].oldSha is None
+        assert payload.updatedRefs[0].newSha == "227c78346870a85e5de4fff8a585db68df975406"
 
 
 @requires("gradio")
diff --git a/tests/test_xet_download.py b/tests/test_xet_download.py
index be59bf125e..24e6877738 100644
--- a/tests/test_xet_download.py
+++ b/tests/test_xet_download.py
@@ -1,7 +1,6 @@
 import os
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Tuple
 from unittest.mock import DEFAULT, Mock, patch
 
 from huggingface_hub import snapshot_download
@@ -318,7 +317,7 @@ def test_download_backward_compatibility(self, tmp_path):
 
         connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers={})
 
-        def token_refresher() -> Tuple[str, int]:
+        def token_refresher() -> tuple[str, int]:
             connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers={})
             return connection_info.access_token, connection_info.expiration_unix_epoch
 
diff --git a/tests/test_xet_upload.py b/tests/test_xet_upload.py
index 2c418fb2ba..471d2150b9 100644
--- a/tests/test_xet_upload.py
+++ b/tests/test_xet_upload.py
@@ -15,7 +15,6 @@
 from contextlib import contextmanager
 from io import BytesIO
 from pathlib import Path
-from typing import Tuple
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -110,8 +109,8 @@ def test_upload_file(self, api, tmp_path, repo_url):
                 path_in_repo=filename_in_repo,
                 repo_id=repo_id,
             )
+        assert return_val.startswith(f"{api.endpoint}/{repo_id}/commit")
 
-        assert return_val == f"{api.endpoint}/{repo_id}/blob/main/{filename_in_repo}"
         # Download and verify content
         downloaded_file = hf_hub_download(repo_id=repo_id, filename=filename_in_repo, cache_dir=tmp_path)
         with open(downloaded_file, "rb") as f:
@@ -238,7 +237,7 @@ def test_upload_folder(self, api, repo_url):
                 repo_id=repo_id,
             )
 
-        assert return_val == f"{api.endpoint}/{repo_id}/tree/main/{folder_in_repo}"
+        assert return_val.startswith(f"{api.endpoint}/{repo_id}/commit")
         files_in_repo = set(api.list_repo_files(repo_id=repo_id))
         files = {
             f"{folder_in_repo}/text_file.txt",
@@ -265,7 +264,7 @@ def test_upload_folder_create_pr(self, api, repo_url) -> None:
                 create_pr=True,
             )
 
-        assert return_val == f"{api.endpoint}/{repo_id}/tree/refs%2Fpr%2F1/{folder_in_repo}"
+        assert return_val.startswith(f"{api.endpoint}/{repo_id}/commit")
 
         for rpath in ["text_file.txt", "nested/nested_binary.safetensors"]:
             local_path = self.folder_path / rpath
@@ -402,7 +401,6 @@ def test_hf_xet_with_token_refresher(self, api, tmp_path, repo_url):
                 headers=headers,
                 endpoint=api.endpoint,
                 token=TOKEN,
-                proxies=None,
                 etag_timeout=None,
                 local_files_only=False,
             )
@@ -412,7 +410,7 @@ def test_hf_xet_with_token_refresher(self, api, tmp_path, repo_url):
 
         # manually construct parameters to hf_xet.download_files and use a locally defined token_refresher function
         # to verify that token refresh works as expected.
-        def token_refresher() -> Tuple[str, int]:
+        def token_refresher() -> tuple[str, int]:
             # Issue a token refresh by returning a new access token and expiration time
             new_connection = refresh_xet_connection_info(file_data=xet_filedata, headers=headers)
             return new_connection.access_token, new_connection.expiration_unix_epoch
diff --git a/tests/testing_utils.py b/tests/testing_utils.py
index eeb9d6611e..6e9f24919f 100644
--- a/tests/testing_utils.py
+++ b/tests/testing_utils.py
@@ -9,13 +9,13 @@
 from enum import Enum
 from functools import wraps
 from pathlib import Path
-from typing import Callable, Optional, Type, TypeVar, Union
+from typing import Callable, Optional, TypeVar, Union
 from unittest.mock import Mock, patch
 
+import httpx
 import pytest
-import requests
 
-from huggingface_hub.utils import is_package_available, logging, reset_sessions
+from huggingface_hub.utils import is_package_available, logging
 from tests.testing_constants import ENDPOINT_PRODUCTION, ENDPOINT_PRODUCTION_URL_SCHEME
 
 
@@ -161,13 +161,14 @@ def offline(mode=OfflineSimulationMode.CONNECTION_FAILS, timeout=1e-16):
         Connection errors are created by mocking socket.socket
     CONNECTION_TIMES_OUT: the connection hangs until it times out.
         The default timeout value is low (1e-16) to speed up the tests.
-        Timeout errors are created by mocking requests.request
+        Timeout errors are created by mocking httpx.request
     HF_HUB_OFFLINE_SET_TO_1: the HF_HUB_OFFLINE_SET_TO_1 environment variable is set to 1.
         This makes the http/ftp calls of the library instantly fail and raise an OfflineModeEnabled error.
     """
     import socket
 
-    from requests import request as online_request
+    # Store the original httpx.request to avoid recursion
+    original_httpx_request = httpx.request
 
     def timeout_request(method, url, **kwargs):
         # Change the url to an invalid url so that the connection hangs
@@ -178,13 +179,16 @@ def timeout_request(method, url, **kwargs):
             )
         kwargs["timeout"] = timeout
         try:
-            return online_request(method, invalid_url, **kwargs)
+            return original_httpx_request(method, invalid_url, **kwargs)
         except Exception as e:
             # The following changes in the error are just here to make the offline timeout error prettier
-            e.request.url = url
-            max_retry_error = e.args[0]
-            max_retry_error.args = (max_retry_error.args[0].replace("10.255.255.1", f"OfflineMock[{url}]"),)
-            e.args = (max_retry_error,)
+            if hasattr(e, "request"):
+                e.request.url = url
+            if hasattr(e, "args") and e.args:
+                max_retry_error = e.args[0]
+                if hasattr(max_retry_error, "args"):
+                    max_retry_error.args = (max_retry_error.args[0].replace("10.255.255.1", f"OfflineMock[{url}]"),)
+                e.args = (max_retry_error,)
             raise
 
     def offline_socket(*args, **kwargs):
@@ -194,19 +198,37 @@ def offline_socket(*args, **kwargs):
         # inspired from https://stackoverflow.com/a/18601897
         with patch("socket.socket", offline_socket):
             with patch("huggingface_hub.utils._http.get_session") as get_session_mock:
-                get_session_mock.return_value = requests.Session()  # not an existing one
+                mock_client = Mock()
+
+                # Mock the request method to raise connection error
+                def mock_request(*args, **kwargs):
+                    raise httpx.ConnectError("Connection failed")
+
+                # Mock the stream method to raise connection error
+                def mock_stream(*args, **kwargs):
+                    raise httpx.ConnectError("Connection failed")
+
+                mock_client.request = mock_request
+                mock_client.stream = mock_stream
+                get_session_mock.return_value = mock_client
                 yield
     elif mode is OfflineSimulationMode.CONNECTION_TIMES_OUT:
         # inspired from https://stackoverflow.com/a/904609
-        with patch("requests.request", timeout_request):
+        with patch("httpx.request", timeout_request):
             with patch("huggingface_hub.utils._http.get_session") as get_session_mock:
-                get_session_mock().request = timeout_request
+                mock_client = Mock()
+                mock_client.request = timeout_request
+
+                # Mock the stream method to raise timeout
+                def mock_stream(*args, **kwargs):
+                    raise httpx.ConnectTimeout("Connection timed out")
+
+                mock_client.stream = mock_stream
+                get_session_mock.return_value = mock_client
                 yield
     elif mode is OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1:
         with patch("huggingface_hub.constants.HF_HUB_OFFLINE", True):
-            reset_sessions()
             yield
-        reset_sessions()
     else:
         raise ValueError("Please use a value from the OfflineSimulationMode enum.")
 
@@ -221,7 +243,7 @@ def rmtree_with_retry(path: Union[str, Path]) -> None:
 
 
 def with_production_testing(func):
-    file_download = patch("huggingface_hub.file_download.HUGGINGFACE_CO_URL_TEMPLATE", ENDPOINT_PRODUCTION_URL_SCHEME)
+    file_download = patch("huggingface_hub.constants.HUGGINGFACE_CO_URL_TEMPLATE", ENDPOINT_PRODUCTION_URL_SCHEME)
     hf_api = patch("huggingface_hub.constants.ENDPOINT", ENDPOINT_PRODUCTION)
     return hf_api(file_download(func))
 
@@ -280,7 +302,7 @@ def _inner_test_function(*args, **kwargs):
     return _inner_decorator
 
 
-def xfail_on_windows(reason: str, raises: Optional[Type[Exception]] = None):
+def xfail_on_windows(reason: str, raises: Optional[type[Exception]] = None):
     """
     Decorator to flag tests that we expect to fail on Windows.
 
@@ -290,7 +312,7 @@ def xfail_on_windows(reason: str, raises: Optional[Type[Exception]] = None):
     Args:
         reason (`str`):
             Reason why it should fail.
-        raises (`Type[Exception]`):
+        raises (`type[Exception]`):
             The error type we except to happen.
     """
 
diff --git a/utils/check_all_variable.py b/utils/check_all_variable.py
index 0754fae7e9..6b093740a9 100644
--- a/utils/check_all_variable.py
+++ b/utils/check_all_variable.py
@@ -18,7 +18,7 @@
 import argparse
 import re
 from pathlib import Path
-from typing import Dict, List, NoReturn
+from typing import NoReturn
 
 from huggingface_hub import _SUBMOD_ATTRS
 
@@ -26,7 +26,7 @@
 INIT_FILE_PATH = Path(__file__).parents[1] / "src" / "huggingface_hub" / "__init__.py"
 
 
-def format_all_definition(submod_attrs: Dict[str, List[str]]) -> str:
+def format_all_definition(submod_attrs: dict[str, list[str]]) -> str:
     """
     Generate a formatted static __all__ definition with grouped comments.
     """
@@ -39,7 +39,7 @@ def format_all_definition(submod_attrs: Dict[str, List[str]]) -> str:
     return "\n".join(lines)
 
 
-def parse_all_definition(content: str) -> List[str]:
+def parse_all_definition(content: str) -> list[str]:
     """
     Extract the current __all__ contents from file content.
 
diff --git a/utils/check_contrib_list.py b/utils/check_contrib_list.py
deleted file mode 100644
index 5b89e27a86..0000000000
--- a/utils/check_contrib_list.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# coding=utf-8
-# Copyright 2022-present, the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Contains a tool to list contrib test suites automatically."""
-
-import argparse
-import re
-from pathlib import Path
-from typing import NoReturn
-
-
-ROOT_DIR = Path(__file__).parent.parent
-CONTRIB_PATH = ROOT_DIR / "contrib"
-MAKEFILE_PATH = ROOT_DIR / "Makefile"
-WORKFLOW_PATH = ROOT_DIR / ".github" / "workflows" / "contrib-tests.yml"
-
-MAKEFILE_REGEX = re.compile(r"^CONTRIB_LIBS := .*$", flags=re.MULTILINE)
-WORKFLOW_REGEX = re.compile(
-    r"""
-    # First: match "contrib: ["
-    (?P<before>^\s{8}contrib:\s\[\n)
-    # Match list of libs
-    (\s{10}\".*\",\n)*
-    # Finally: match trailing "]"
-    (?P<after>^\s{8}\])
-    """,
-    flags=re.MULTILINE | re.VERBOSE,
-)
-
-
-def check_contrib_list(update: bool) -> NoReturn:
-    """List `contrib` test suites.
-
-    Make sure `Makefile` and `.github/workflows/contrib-tests.yml` are consistent with
-    the list."""
-    # List contrib test suites
-    contrib_list = sorted(
-        path.name for path in CONTRIB_PATH.glob("*") if path.is_dir() and not path.name.startswith("_")
-    )
-
-    # Check Makefile is consistent with list
-    makefile_content = MAKEFILE_PATH.read_text()
-    makefile_expected_content = MAKEFILE_REGEX.sub(f"CONTRIB_LIBS := {' '.join(contrib_list)}", makefile_content)
-
-    # Check workflow is consistent with list
-    workflow_content = WORKFLOW_PATH.read_text()
-    _substitute = "\n".join(f'{" " * 10}"{lib}",' for lib in contrib_list)
-    workflow_content_expected = WORKFLOW_REGEX.sub(rf"\g<before>{_substitute}\n\g<after>", workflow_content)
-
-    #
-    failed = False
-    if makefile_content != makefile_expected_content:
-        if update:
-            print(
-                "✅ Contrib libs have been updated in `Makefile`."
-                "\n   Please make sure the changes are accurate and commit them."
-            )
-            MAKEFILE_PATH.write_text(makefile_expected_content)
-        else:
-            print(
-                "❌ Expected content mismatch in `Makefile`.\n   It is most likely that"
-                " you added a contrib test and did not update the Makefile.\n   Please"
-                " run `make style` or `python utils/check_contrib_list.py --update`."
-            )
-            failed = True
-
-    if workflow_content != workflow_content_expected:
-        if update:
-            print(
-                f"✅ Contrib libs have been updated in `{WORKFLOW_PATH}`."
-                "\n   Please make sure the changes are accurate and commit them."
-            )
-            WORKFLOW_PATH.write_text(workflow_content_expected)
-        else:
-            print(
-                f"❌ Expected content mismatch in `{WORKFLOW_PATH}`.\n   It is most"
-                " likely that you added a contrib test and did not update the github"
-                " workflow file.\n   Please run `make style` or `python"
-                " utils/check_contrib_list.py --update`."
-            )
-            failed = True
-
-    if failed:
-        exit(1)
-    print("✅ All good! (contrib list)")
-    exit(0)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--update",
-        action="store_true",
-        help="Whether to fix Makefile and github workflow if a new lib is detected.",
-    )
-    args = parser.parse_args()
-
-    check_contrib_list(update=args.update)
diff --git a/utils/check_task_parameters.py b/utils/check_task_parameters.py
index eec18aeaa9..d8948ef278 100644
--- a/utils/check_task_parameters.py
+++ b/utils/check_task_parameters.py
@@ -42,7 +42,7 @@
 import textwrap
 from collections import defaultdict
 from pathlib import Path
-from typing import Any, Dict, List, NoReturn, Optional, Set, Tuple
+from typing import Any, NoReturn, Optional
 
 import libcst as cst
 from helpers import format_source_code
@@ -101,7 +101,7 @@ class DataclassFieldCollector(cst.CSTVisitor):
 
     def __init__(self, dataclass_name: str):
         self.dataclass_name = dataclass_name
-        self.parameters: Dict[str, Dict[str, str]] = {}
+        self.parameters: dict[str, dict[str, str]] = {}
 
     def visit_ClassDef(self, node: cst.ClassDef) -> None:
         """Visit class definitions to find the target dataclass."""
@@ -130,7 +130,7 @@ def visit_ClassDef(self, node: cst.ClassDef) -> None:
 
     @staticmethod
     def _extract_docstring(
-        body_statements: List[cst.CSTNode],
+        body_statements: list[cst.CSTNode],
         field_index: int,
     ) -> str:
         """Extract the docstring following a field definition."""
@@ -169,7 +169,7 @@ class MethodArgumentsCollector(cst.CSTVisitor):
 
     def __init__(self, method_name: str):
         self.method_name = method_name
-        self.parameters: Dict[str, Dict[str, str]] = {}
+        self.parameters: dict[str, dict[str, str]] = {}
 
     def visit_FunctionDef(self, node: cst.FunctionDef) -> None:
         if node.name.value != self.method_name:
@@ -194,7 +194,7 @@ def _extract_docstring(self, node: cst.FunctionDef) -> str:
             return node.body.body[0].body[0].value.evaluated_value
         return ""
 
-    def _parse_docstring_params(self, docstring: str) -> Dict[str, str]:
+    def _parse_docstring_params(self, docstring: str) -> dict[str, str]:
         """Parse parameter descriptions from docstring."""
         param_docs = {}
         lines = docstring.split("\n")
@@ -230,7 +230,7 @@ def _parse_docstring_params(self, docstring: str) -> Dict[str, str]:
 class AddImports(cst.CSTTransformer):
     """Transformer that adds import statements to the module."""
 
-    def __init__(self, imports_to_add: List[cst.BaseStatement]):
+    def __init__(self, imports_to_add: list[cst.BaseStatement]):
         self.imports_to_add = imports_to_add
         self.added = False
 
@@ -265,7 +265,7 @@ def leave_Module(
 class UpdateParameters(cst.CSTTransformer):
     """Updates a method's parameters, types, and docstrings."""
 
-    def __init__(self, method_name: str, param_updates: Dict[str, Dict[str, str]]):
+    def __init__(self, method_name: str, param_updates: dict[str, dict[str, str]]):
         self.method_name = method_name
         self.param_updates = param_updates
         self.found_method = False  # Flag to check if the method is found
@@ -383,10 +383,10 @@ def _update_docstring_content(self, docstring: str) -> str:
     def _format_param_docstring(
         self,
         param_name: str,
-        param_info: Dict[str, str],
+        param_info: dict[str, str],
         param_indent: str,
         desc_indent: str,
-    ) -> List[str]:
+    ) -> list[str]:
         """Format the docstring lines for a single parameter."""
         # Extract and format the parameter type
         param_type = param_info["type"]
@@ -417,12 +417,12 @@ def _format_param_docstring(
 
     def _process_existing_params(
         self,
-        docstring_lines: List[str],
-        params_to_update: Dict[str, Dict[str, str]],
+        docstring_lines: list[str],
+        params_to_update: dict[str, dict[str, str]],
         args_index: int,
         param_indent: str,
         desc_indent: str,
-    ) -> Tuple[List[str], Dict[str, Dict[str, str]]]:
+    ) -> tuple[list[str], dict[str, dict[str, str]]]:
         """Update existing parameters in the docstring."""
         # track the params that are updated
         params_updated = params_to_update.copy()
@@ -473,12 +473,12 @@ def _process_existing_params(
 
     def _add_new_params(
         self,
-        docstring_lines: List[str],
-        new_params: Dict[str, Dict[str, str]],
+        docstring_lines: list[str],
+        new_params: dict[str, dict[str, str]],
         args_index: int,
         param_indent: str,
         desc_indent: str,
-    ) -> List[str]:
+    ) -> list[str]:
         """Add new parameters to the docstring."""
         # Find the insertion point after existing parameters
         insertion_index = args_index + 1
@@ -521,7 +521,7 @@ def _check_parameters(
     parameters_module: cst.Module,
     method_name: str,
     parameter_type_name: str,
-) -> Dict[str, Dict[str, Any]]:
+) -> dict[str, dict[str, Any]]:
     """
     Check for missing parameters and outdated types/docstrings.
 
@@ -571,7 +571,7 @@ def _check_parameters(
 def _update_parameters(
     module: cst.Module,
     method_name: str,
-    param_updates: Dict[str, Dict[str, str]],
+    param_updates: dict[str, dict[str, str]],
 ) -> cst.Module:
     """
     Update method parameters, types and docstrings.
@@ -590,21 +590,21 @@ def _update_parameters(
 
 
 def _get_imports_to_add(
-    parameters: Dict[str, Dict[str, str]],
+    parameters: dict[str, dict[str, str]],
     parameters_module: cst.Module,
     inference_client_module: cst.Module,
-) -> Dict[str, List[str]]:
+) -> dict[str, list[str]]:
     """
     Get the needed imports for missing parameters.
 
     Args:
-        parameters (Dict[str, Dict[str, str]]): Dictionary of parameters with their type and docstring.
+        parameters (dict[str, dict[str, str]]): Dictionary of parameters with their type and docstring.
         eg: {"function_to_apply": {"type": "ClassificationOutputTransform", "docstring": "Function to apply to the input."}}
         parameters_module (cst.Module): The module where the parameters are defined.
         inference_client_module (cst.Module): The module of the inference client.
 
     Returns:
-        Dict[str, List[str]]: A dictionary mapping modules to list of types to import.
+        dict[str, list[str]]: A dictionary mapping modules to list of types to import.
         eg: {"huggingface_hub.inference._generated.types": ["ClassificationOutputTransform"]}
     """
     # Collect all type names from parameter annotations
@@ -630,12 +630,12 @@ def _get_imports_to_add(
     return needed_imports
 
 
-def _generate_import_statements(import_dict: Dict[str, List[str]]) -> str:
+def _generate_import_statements(import_dict: dict[str, list[str]]) -> str:
     """
     Generate import statements from a dictionary of needed imports.
 
     Args:
-        import_dict (Dict[str, List[str]]): Dictionary mapping modules to list of types to import.
+        import_dict (dict[str, list[str]]): Dictionary mapping modules to list of types to import.
         eg: {"typing": ["List", "Dict"], "huggingface_hub.inference._generated.types": ["ClassificationOutputTransform"]}
 
     Returns:
@@ -658,7 +658,7 @@ def _normalize_docstring(docstring: str) -> str:
 
 
 # TODO: Needs to be improved, maybe using `typing.get_type_hints` instead (we gonna need to access the method though)?
-def _collect_type_hints_from_annotation(annotation_str: str) -> Set[str]:
+def _collect_type_hints_from_annotation(annotation_str: str) -> set[str]:
     """
     Collect type hints from an annotation string.
 
@@ -666,7 +666,7 @@ def _collect_type_hints_from_annotation(annotation_str: str) -> Set[str]:
         annotation_str (str): The annotation string.
 
     Returns:
-        Set[str]: A set of type hints.
+        set[str]: A set of type hints.
     """
     type_string = annotation_str.replace(" ", "")
     builtin_types = {d for d in dir(builtins) if isinstance(getattr(builtins, d), type)}
@@ -699,7 +699,7 @@ def _parse_module_from_file(filepath: Path) -> Optional[cst.Module]:
 
 
 def _check_and_update_parameters(
-    method_params: Dict[str, str],
+    method_params: dict[str, str],
     update: bool,
 ) -> NoReturn:
     """
diff --git a/utils/generate_async_inference_client.py b/utils/generate_async_inference_client.py
index 61705b51c4..af699affa4 100644
--- a/utils/generate_async_inference_client.py
+++ b/utils/generate_async_inference_client.py
@@ -42,6 +42,11 @@ def generate_async_client_code(code: str) -> str:
     # Refactor `.post` method to be async + adapt calls
     code = _make_inner_post_async(code)
     code = _await_inner_post_method_call(code)
+
+    # Handle __enter__, __exit__, close
+    code = _remove_enter_exit_stack(code)
+
+    # Use _async_stream_text_generation_response
     code = _use_async_streaming_util(code)
 
     # Make all tasks-method async
@@ -54,15 +59,11 @@ def generate_async_client_code(code: str) -> str:
     code = _adapt_chat_completion_to_async(code)
 
     # Update some docstrings
-    code = _rename_HTTPError_to_ClientResponseError_in_docstring(code)
     code = _update_examples_in_public_methods(code)
 
     # Adapt /info and /health endpoints
     code = _adapt_info_and_health_endpoints(code)
 
-    # Add _get_client_session
-    code = _add_get_client_session(code)
-
     # Adapt the proxy client (for client.chat.completions.create)
     code = _adapt_proxy_client(code)
 
@@ -136,10 +137,13 @@ def _add_imports(code: str) -> str:
         r"(\nimport .*?\n)",
         repl=(
             r"\1"
-            + "from .._common import _async_yield_from, _import_aiohttp\n"
+            + "from .._common import _async_yield_from\n"
+            + "from huggingface_hub.utils import get_async_session\n"
             + "from typing import AsyncIterable\n"
+            + "from contextlib import AsyncExitStack\n"
             + "from typing import Set\n"
             + "import asyncio\n"
+            + "import httpx\n"
         ),
         string=code,
         count=1,
@@ -163,72 +167,52 @@ def _rename_to_AsyncInferenceClient(code: str) -> str:
 
 
 ASYNC_INNER_POST_CODE = """
-        aiohttp = _import_aiohttp()
-
         # TODO: this should be handled in provider helpers directly
         if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
             request_parameters.headers["Accept"] = "image/png"
 
-        # Do not use context manager as we don't want to close the connection immediately when returning
-        # a stream
-        session = self._get_client_session(headers=request_parameters.headers)
-
         try:
-            response = await session.post(request_parameters.url, json=request_parameters.json, data=request_parameters.data, proxy=self.proxies)
-            response_error_payload = None
-            if response.status != 200:
-                try:
-                    response_error_payload = await response.json()  # get payload before connection closed
-                except Exception:
-                    pass
-            response.raise_for_status()
+            client = await self._get_async_client()
             if stream:
-                return _async_yield_from(session, response)
+                response = await self.exit_stack.enter_async_context(
+                    client.stream(
+                        "POST",
+                        request_parameters.url,
+                        json=request_parameters.json,
+                        data=request_parameters.data,
+                        headers=request_parameters.headers,
+                        cookies=self.cookies,
+                        timeout=self.timeout,
+                    )
+                )
+                hf_raise_for_status(response)
+                return _async_yield_from(client, response)
             else:
-                content = await response.read()
-                await session.close()
-                return content
+                response = await client.post(
+                    request_parameters.url,
+                    json=request_parameters.json,
+                    data=request_parameters.data,
+                    headers=request_parameters.headers,
+                    cookies=self.cookies,
+                    timeout=self.timeout,
+                )
+                hf_raise_for_status(response)
+                return response.content
         except asyncio.TimeoutError as error:
-            await session.close()
             # Convert any `TimeoutError` to a `InferenceTimeoutError`
             raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error  # type: ignore
-        except aiohttp.ClientResponseError as error:
-            error.response_error_payload = response_error_payload
-            await session.close()
-            raise error
-        except Exception:
-            await session.close()
+        except HfHubHTTPError as error:
+            if error.response.status_code == 422 and request_parameters.task != "unknown":
+                msg = str(error.args[0])
+                if len(error.response.text) > 0:
+                    msg += f"{os.linesep}{error.response.text}{os.linesep}"
+                error.args = (msg,) + error.args[1:]
             raise
-
-    async def __aenter__(self):
-        return self
-
-    async def __aexit__(self, exc_type, exc_value, traceback):
-        await self.close()
-
-    def __del__(self):
-        if len(self._sessions) > 0:
-            warnings.warn(
-                "Deleting 'AsyncInferenceClient' client but some sessions are still open. "
-                "This can happen if you've stopped streaming data from the server before the stream was complete. "
-                "To close the client properly, you must call `await client.close()` "
-                "or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
-            )
-
-    async def close(self):
-        \"""Close all open sessions.
-
-        By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
-        are streaming data from the server and you stop before the stream is complete, you must call this method to
-        close the session properly.
-
-        Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
-        \"""
-        await asyncio.gather(*[session.close() for session in self._sessions.keys()])"""
+            """
 
 
 def _make_inner_post_async(code: str) -> str:
-    # Update AsyncInferenceClient._inner_post() implementation (use aiohttp instead of requests)
+    # Update AsyncInferenceClient._inner_post() implementation
     code = re.sub(
         r"""
         def[ ]_inner_post\( # definition
@@ -243,12 +227,52 @@ def _make_inner_post_async(code: str) -> str:
     )
     # Update `post`'s type annotations
     code = code.replace("    def _inner_post(", "    async def _inner_post(")
-    return code.replace("Iterable[bytes]", "AsyncIterable[bytes]")
+    return code
+
+
+ENTER_EXIT_STACK_SYNC_CODE = """
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.exit_stack.close()
+
+    def close(self):
+        self.exit_stack.close()"""
+
+ENTER_EXIT_STACK_ASYNC_CODE = """
+    async def __aenter__(self):
+        return self
 
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        await self.close()
 
-def _rename_HTTPError_to_ClientResponseError_in_docstring(code: str) -> str:
-    # Update `raises`-part in docstrings
-    return code.replace("`HTTPError`:", "`aiohttp.ClientResponseError`:")
+    async def close(self):
+        \"""Close the client.
+
+        This method is automatically called when using the client as a context manager.
+        \"""
+        await self.exit_stack.aclose()
+
+    async def _get_async_client(self):
+        \"""Get a unique async client for this AsyncInferenceClient instance.
+
+        Returns the same client instance on subsequent calls, ensuring proper
+        connection reuse and resource management through the exit stack.
+        \"""
+        if self._async_client is None:
+            self._async_client = await self.exit_stack.enter_async_context(get_async_session())
+        return self._async_client
+"""
+
+
+def _remove_enter_exit_stack(code: str) -> str:
+    code = code.replace(
+        "exit_stack = ExitStack()",
+        "exit_stack = AsyncExitStack()\n        self._async_client: Optional[httpx.AsyncClient] = None",
+    )
+    code = code.replace(ENTER_EXIT_STACK_SYNC_CODE, ENTER_EXIT_STACK_ASYNC_CODE)
+    return code
 
 
 def _make_tasks_methods_async(code: str) -> str:
@@ -272,22 +296,7 @@ def _make_tasks_methods_async(code: str) -> str:
 
 
 def _adapt_text_generation_to_async(code: str) -> str:
-    # Text-generation task has to be handled specifically since it has a recursive call mechanism (to retry on non-tgi
-    # servers)
-
-    # Catch `aiohttp` error instead of `requests` error
-    code = code.replace(
-        """
-        except HTTPError as e:
-            match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
-            if isinstance(e, BadRequestError) and match:
-    """,
-        """
-        except _import_aiohttp().ClientResponseError as e:
-            match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
-            if e.status == 400 and match:
-    """,
-    )
+    # Text-generation task has to be handled specifically since it has a recursive call mechanism (to retry on non-tgi servers)
 
     # Await recursive call
     code = code.replace(
@@ -301,24 +310,8 @@ def _adapt_text_generation_to_async(code: str) -> str:
 
     # Update return types: Iterable -> AsyncIterable
     code = code.replace(
-        ") -> Iterable[str]:",
-        ") -> AsyncIterable[str]:",
-    )
-    code = code.replace(
-        ") -> Union[bytes, Iterable[bytes]]:",
-        ") -> Union[bytes, AsyncIterable[bytes]]:",
-    )
-    code = code.replace(
-        ") -> Iterable[TextGenerationStreamOutput]:",
-        ") -> AsyncIterable[TextGenerationStreamOutput]:",
-    )
-    code = code.replace(
-        ") -> Union[TextGenerationOutput, Iterable[TextGenerationStreamOutput]]:",
-        ") -> Union[TextGenerationOutput, AsyncIterable[TextGenerationStreamOutput]]:",
-    )
-    code = code.replace(
-        ") -> Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]:",
-        ") -> Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]:",
+        "Iterable[",
+        "AsyncIterable[",
     )
 
     return code
@@ -331,16 +324,6 @@ def _adapt_chat_completion_to_async(code: str) -> str:
         "text_generation_output = await self.text_generation(",
     )
 
-    # Update return types: Iterable -> AsyncIterable
-    code = code.replace(
-        ") -> Iterable[ChatCompletionStreamOutput]:",
-        ") -> AsyncIterable[ChatCompletionStreamOutput]:",
-    )
-    code = code.replace(
-        ") -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:",
-        ") -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:",
-    )
-
     return code
 
 
@@ -395,101 +378,14 @@ def _use_async_streaming_util(code: str) -> str:
 
 
 def _adapt_info_and_health_endpoints(code: str) -> str:
-    info_sync_snippet = """
-        response = get_session().get(url, headers=build_hf_headers(token=self.token))
-        hf_raise_for_status(response)
-        return response.json()"""
-
-    info_async_snippet = """
-        async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
-            response = await client.get(url, proxy=self.proxies)
-            response.raise_for_status()
-            return await response.json()"""
-
-    code = code.replace(info_sync_snippet, info_async_snippet)
-
-    health_sync_snippet = """
-        response = get_session().get(url, headers=build_hf_headers(token=self.token))
-        return response.status_code == 200"""
-
-    health_async_snippet = """
-        async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
-            response = await client.get(url, proxy=self.proxies)
-            return response.status == 200"""
-
-    return code.replace(health_sync_snippet, health_async_snippet)
-
-
-def _add_get_client_session(code: str) -> str:
-    # Add trust_env as parameter
-    code = _add_before(code, "proxies: Optional[Any] = None,", "trust_env: bool = False,")
-    code = _add_before(code, "\n        self.proxies = proxies\n", "\n        self.trust_env = trust_env")
-
-    # Document `trust_env` parameter
-    code = _add_before(
-        code,
-        "\n        proxies (`Any`, `optional`):",
-        """
-        trust_env ('bool', 'optional'):
-            Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).""",
-    )
-
-    # insert `_get_client_session` before `get_endpoint_info` method
-    client_session_code = """
-
-    def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
-        aiohttp = _import_aiohttp()
-        client_headers = self.headers.copy()
-        if headers is not None:
-            client_headers.update(headers)
-
-        # Return a new aiohttp ClientSession with correct settings.
-        session = aiohttp.ClientSession(
-            headers=client_headers,
-            cookies=self.cookies,
-            timeout=aiohttp.ClientTimeout(self.timeout),
-            trust_env=self.trust_env,
-        )
-
-        # Keep track of sessions to close them later
-        self._sessions[session] = set()
-
-        # Override the `._request` method to register responses to be closed
-        session._wrapped_request = session._request
+    get_url_sync_snippet = """
+        response = get_session().get(url, headers=build_hf_headers(token=self.token))"""
 
-        async def _request(method, url, **kwargs):
-            response = await session._wrapped_request(method, url, **kwargs)
-            self._sessions[session].add(response)
-            return response
+    get_url_async_snippet = """
+        client = await self._get_async_client()
+        response = await client.get(url, headers=build_hf_headers(token=self.token))"""
 
-        session._request = _request
-
-        # Override the 'close' method to
-        # 1. close ongoing responses
-        # 2. deregister the session when closed
-        session._close = session.close
-
-        async def close_session():
-            for response in self._sessions[session]:
-                response.close()
-            await session._close()
-            self._sessions.pop(session, None)
-
-        session.close = close_session
-        return session
-
-"""
-    code = _add_before(code, "\n    async def get_endpoint_info(", client_session_code)
-
-    # Add self._sessions attribute in __init__
-    code = _add_before(
-        code,
-        "\n    def __repr__(self):\n",
-        "\n        # Keep track of the sessions to close them properly"
-        "\n        self._sessions: Dict['ClientSession', Set['ClientResponse']] = dict()",
-    )
-
-    return code
+    return code.replace(get_url_sync_snippet, get_url_async_snippet)
 
 
 def _adapt_proxy_client(code: str) -> str:
diff --git a/utils/generate_inference_types.py b/utils/generate_inference_types.py
index 823e814323..aaa071b8f3 100644
--- a/utils/generate_inference_types.py
+++ b/utils/generate_inference_types.py
@@ -17,7 +17,7 @@
 import argparse
 import re
 from pathlib import Path
-from typing import Dict, List, Literal, NoReturn, Optional
+from typing import Literal, NoReturn, Optional
 
 import libcst as cst
 from helpers import check_and_update_file_content, format_source_code
@@ -219,12 +219,12 @@ def _make_optional_fields_default_to_none(content: str):
     return "\n".join(lines)
 
 
-def _list_dataclasses(content: str) -> List[str]:
+def _list_dataclasses(content: str) -> list[str]:
     """List all dataclasses defined in the module."""
     return INHERITED_DATACLASS_REGEX.findall(content)
 
 
-def _list_type_aliases(content: str) -> List[str]:
+def _list_type_aliases(content: str) -> list[str]:
     """List all type aliases defined in the module."""
     return [alias_class for alias_class, _ in TYPE_ALIAS_REGEX.findall(content)]
 
@@ -234,7 +234,7 @@ def is_deprecated(self, docstring: Optional[str]) -> bool:
         """Check if a docstring contains @deprecated."""
         return docstring is not None and "@deprecated" in docstring.lower()
 
-    def get_docstring(self, body: List[cst.BaseStatement]) -> Optional[str]:
+    def get_docstring(self, body: list[cst.BaseStatement]) -> Optional[str]:
         """Extract docstring from a body of statements."""
         if not body:
             return None
@@ -294,7 +294,7 @@ def fix_inference_classes(content: str, module_name: str) -> str:
     return content
 
 
-def create_init_py(dataclasses: Dict[str, List[str]]):
+def create_init_py(dataclasses: dict[str, list[str]]):
     """Create __init__.py file with all dataclasses."""
     content = INIT_PY_HEADER
     content += "\n"
@@ -304,14 +304,14 @@ def create_init_py(dataclasses: Dict[str, List[str]]):
     return content
 
 
-def add_dataclasses_to_main_init(content: str, dataclasses: Dict[str, List[str]]):
+def add_dataclasses_to_main_init(content: str, dataclasses: dict[str, list[str]]):
     dataclasses_list = sorted({cls for classes in dataclasses.values() for cls in classes})
     dataclasses_str = ", ".join(f"'{cls}'" for cls in dataclasses_list)
 
     return MAIN_INIT_PY_REGEX.sub(f'"inference._generated.types": [{dataclasses_str}]', content)
 
 
-def generate_reference_package(dataclasses: Dict[str, List[str]], language: Literal["en", "ko"]) -> str:
+def generate_reference_package(dataclasses: dict[str, list[str]], language: Literal["en", "ko"]) -> str:
     """Generate the reference package content."""
 
     per_task_docs = []
diff --git a/utils/installers/install.ps1 b/utils/installers/install.ps1
new file mode 100644
index 0000000000..cb7f507cb8
--- /dev/null
+++ b/utils/installers/install.ps1
@@ -0,0 +1,421 @@
+# Hugging Face CLI Installer for Windows
+# Usage: powershell -ExecutionPolicy ByPass -c "irm https://hf.co/cli/install.ps1 | iex"
+# Or: curl -LsSf https://hf.co/cli/install.ps1 | pwsh -
+
+<#
+.SYNOPSIS
+Installs the Hugging Face CLI on Windows by creating an isolated virtual environment and exposing the `hf` command.
+
+.DESCRIPTION
+Downloads and installs the `huggingface_hub[cli]` package into a dedicated virtual environment, then copies the generated `hf.exe` console script to a directory on the user's PATH.
+
+.PARAMETER Force
+Recreates the virtual environment even if it already exists. Off by default.
+
+.PARAMETER Verbose
+Enables verbose output, including detailed pip logs.
+
+.PARAMETER NoModifyPath
+Skips PATH modifications; `hf` must be invoked via its full path unless you add it manually.
+
+.EXAMPLE
+powershell -c "irm https://hf.co/cli/install.ps1 | iex"
+#>
+
+<#
+.NOTES
+Environment variables:
+  HF_HOME           Installation base directory; installer uses $env:HF_HOME\cli when set
+  HF_CLI_BIN_DIR    Directory for the hf wrapper (default: $env:USERPROFILE\.local\bin)
+  HF_CLI_VERSION    Install a specific huggingface_hub version (default: latest)
+#>
+
+param(
+    [switch]$Force = $false,
+    [switch]$Verbose,
+    [switch]$NoModifyPath
+)
+
+$script:LogLevel = if ($Verbose) { 2 } else { 1 }
+$script:PathUpdated = $false
+
+if ($Verbose) {
+    $env:HF_CLI_VERBOSE_PIP = '1'
+}
+
+# Set error action preference
+$ErrorActionPreference = "Stop"
+
+# Colors for output
+$Colors = @{
+    Red = 'Red'
+    Green = 'Green'
+    Yellow = 'Yellow'
+    Blue = 'Blue'
+    White = 'White'
+}
+
+function Write-Log {
+    param(
+        [string]$Message,
+        [string]$Level = "INFO"
+    )
+
+    $timestamp = Get-Date -Format "HH:mm:ss"
+    switch ($Level) {
+        "DEBUG" {
+            if ($script:LogLevel -lt 2) { return }
+            Write-Host "[$timestamp] [DEBUG] $Message" -ForegroundColor $Colors.Blue
+        }
+        "INFO" {
+            if ($script:LogLevel -lt 1) { return }
+            Write-Host "[$timestamp] [INFO] $Message" -ForegroundColor $Colors.Blue
+        }
+        "SUCCESS" {
+            Write-Host "[$timestamp] [SUCCESS] $Message" -ForegroundColor $Colors.Green
+        }
+        "WARNING" {
+            Write-Host "[$timestamp] [WARNING] $Message" -ForegroundColor $Colors.Yellow
+        }
+        "ERROR" {
+            Write-Host "[$timestamp] [ERROR] $Message" -ForegroundColor $Colors.Red
+        }
+    }
+}
+
+# Normalize user-supplied paths
+function Resolve-CliPath {
+    param([string]$Path)
+
+    if (-not $Path) { return $null }
+
+    $expanded = [Environment]::ExpandEnvironmentVariables($Path)
+
+    if ($expanded -eq '~') { return $env:USERPROFILE }
+
+    if ($expanded -like '~\*') {
+        $suffix = $expanded.Substring(2)
+        if ([string]::IsNullOrWhiteSpace($suffix)) { return $env:USERPROFILE }
+        return (Join-Path $env:USERPROFILE $suffix)
+    }
+
+    if ([System.IO.Path]::IsPathRooted($expanded)) {
+        return [System.IO.Path]::GetFullPath($expanded)
+    }
+
+    $base = (Get-Location).ProviderPath
+    return [System.IO.Path]::GetFullPath((Join-Path $base $expanded))
+}
+
+# Compose installer directories using environment variables only
+if ($env:HF_HOME) {
+    $HF_CLI_DIR = Resolve-CliPath (Join-Path $env:HF_HOME "cli")
+} else {
+    $HF_CLI_DIR = Resolve-CliPath (Join-Path $env:USERPROFILE ".hf-cli")
+}
+
+$VENV_DIR = Join-Path $HF_CLI_DIR "venv"
+
+if ($env:HF_CLI_BIN_DIR) {
+    $BIN_DIR = Resolve-CliPath $env:HF_CLI_BIN_DIR
+} else {
+    $BIN_DIR = Resolve-CliPath (Join-Path $env:USERPROFILE ".local\bin")
+}
+
+$SCRIPTS_DIR = Join-Path $VENV_DIR "Scripts"
+$script:VenvPython = $null
+
+function Test-Command {
+    param([string]$Command)
+    try { Get-Command $Command -ErrorAction Stop | Out-Null; return $true } catch { return $false }
+}
+
+function Test-PythonVersion {
+    param([string]$PythonCmd)
+    try {
+        $version = & $PythonCmd --version 2>&1
+        if ($version -match "Python 3\.(\d+)\.") {
+            $minorVersion = [int]$matches[1]
+            return $minorVersion -ge 9 # Python 3.9+
+        }
+        return $false
+    } catch { return $false }
+}
+
+function Find-Python {
+    Write-Log "Looking for Python 3.9+ installation..."
+
+    # Try common Python commands
+    $pythonCommands = @("python", "python3", "py")
+
+    foreach ($cmd in $pythonCommands) {
+        if (Test-Command $cmd) {
+            if (Test-PythonVersion $cmd) {
+                $version = & $cmd --version 2>&1
+                Write-Log "Found compatible Python: $version using command '$cmd'"
+                return $cmd
+            }
+        }
+    }
+
+    # Try Python Launcher for Windows
+    if (Test-Command "py") {
+        try {
+            $version = py -3 --version 2>&1
+            if ($version -match "Python 3\.(\d+)\.") {
+                $minorVersion = [int]$matches[1]
+                if ($minorVersion -ge 9) {
+                    Write-Log "Found compatible Python: $version using Python Launcher"
+                    return "py -3"
+                }
+            }
+        } catch { }
+    }
+
+    Write-Log "Python 3.9+ is required but not found." "ERROR"
+    Write-Log "Please install Python from https://python.org or Microsoft Store" "ERROR"
+    Write-Log "Make sure to check 'Add Python to PATH' during installation" "ERROR"
+    throw "Python 3.9+ not found"
+}
+
+function New-Directories {
+    Write-Log "Creating directories..."
+    if (-not (Test-Path $HF_CLI_DIR)) { New-Item -ItemType Directory -Path $HF_CLI_DIR -Force | Out-Null }
+    if (-not (Test-Path $BIN_DIR)) { New-Item -ItemType Directory -Path $BIN_DIR -Force | Out-Null }
+}
+
+function New-VirtualEnvironment {
+    param([string]$PythonCmd)
+
+    Write-Log "Creating virtual environment..."
+
+    if (Test-Path $VENV_DIR) {
+        if ($Force) {
+            Write-Log "Virtual environment already exists; removing it since --force was passed" "WARNING"
+            Remove-Item -Path $VENV_DIR -Recurse -Force
+        } else {
+            Write-Log "Virtual environment already exists. Use -Force to recreate." "WARNING"
+            Write-Log "Skipping virtual environment creation..."
+            return
+        }
+    }
+
+    # Fail fast when venv module is unavailable
+    try {
+        if ($PythonCmd -eq "py -3") {
+            & py -3 -c "import venv" | Out-Null
+        } else {
+            & $PythonCmd -c "import venv" | Out-Null
+        }
+    } catch {
+        Write-Log "Python installation is missing the venv module." "ERROR"
+        Write-Log "Install the optional venv feature or repair Python before retrying." "ERROR"
+        Write-Log "Microsoft Store Python: Repair via Apps settings" "INFO"
+        Write-Log "python.org installer: Choose 'Modify' and enable 'pip/venv'." "INFO"
+        throw "Python venv module unavailable"
+    }
+
+    # Create virtual environment
+    if ($PythonCmd -eq "py -3") {
+        & py -3 -m venv $VENV_DIR
+    } else {
+        & $PythonCmd -m venv $VENV_DIR
+    }
+    if (-not $?) { throw "Failed to create virtual environment" }
+
+    # Mark this installation as installer-managed
+    $markerFile = Join-Path $VENV_DIR ".hf_installer_marker"
+    New-Item -Path $markerFile -ItemType File -Force | Out-Null
+
+    # Use the venv's python -m pip for deterministic upgrades
+    $script:VenvPython = Join-Path $SCRIPTS_DIR "python.exe"
+    Write-Log "Upgrading pip..."
+    & $script:VenvPython -m pip install --upgrade pip
+    if (-not $?) { throw "Failed to upgrade pip" }
+}
+
+function Install-HuggingFaceHub {
+    $packageSpec = 'huggingface_hub[cli]'
+    $requestedVersion = $env:HF_CLI_VERSION
+    if ($requestedVersion) {
+        $packageSpec = "huggingface_hub[cli]==$requestedVersion"
+        Write-Log "Installing huggingface_hub[cli] (version $requestedVersion)..."
+    } else {
+        Write-Log "Installing huggingface_hub[cli] (latest)..."
+    }
+    if (-not $script:VenvPython) { $script:VenvPython = Join-Path $SCRIPTS_DIR "python.exe" }
+
+    # Allow optional pip arguments via HF_CLI_PIP_ARGS/HF_PIP_ARGS env vars
+    $extraArgsRaw = if ($env:HF_CLI_PIP_ARGS) { $env:HF_CLI_PIP_ARGS } else { $env:HF_PIP_ARGS }
+    $pipArgs = @('-m', 'pip', 'install', '--upgrade')
+    if ($env:HF_CLI_VERBOSE_PIP -ne '1') {
+        $pipArgs += @('--quiet', '--progress-bar', 'off', '--disable-pip-version-check')
+        Write-Log "(pip output suppressed; set HF_CLI_VERBOSE_PIP=1 for full logs)"
+    }
+    $pipArgs += $packageSpec
+    if ($extraArgsRaw) {
+        Write-Log "Passing extra pip arguments: $extraArgsRaw"
+        $pipArgs += $extraArgsRaw -split '\s+'
+    }
+
+    & $script:VenvPython @pipArgs
+    if (-not $?) { throw "Failed to install huggingface_hub" }
+}
+
+function Publish-HfCommand {
+    Write-Log "Copying hf CLI launcher..."
+
+    $hfExeSource = Join-Path $SCRIPTS_DIR "hf.exe"
+    if (-not (Test-Path $hfExeSource)) {
+        throw "hf.exe not found in virtual environment. Check that huggingface_hub[cli] installed correctly."
+    }
+
+    $hfExeTarget = Join-Path $BIN_DIR "hf.exe"
+    Copy-Item -Path $hfExeSource -Destination $hfExeTarget -Force
+
+    $hfScriptSource = Join-Path $SCRIPTS_DIR "hf-script.py"
+    if (Test-Path $hfScriptSource) {
+        Copy-Item -Path $hfScriptSource -Destination (Join-Path $BIN_DIR "hf-script.py") -Force
+    }
+
+    Write-Log "hf CLI available at $hfExeTarget"
+    Write-Log ('Run without updating PATH: & "{0}" --help' -f $hfExeTarget)
+}
+
+function Update-Path {
+    Write-Log "Checking PATH configuration..."
+
+    # Get current user PATH
+    $currentPath = [Environment]::GetEnvironmentVariable("PATH", "User")
+
+    if ($currentPath -notlike "*$BIN_DIR*") {
+        Write-Log "Adding $BIN_DIR to user PATH..."
+
+        try {
+            $newPath = "$BIN_DIR;" + $currentPath
+            [Environment]::SetEnvironmentVariable("PATH", $newPath, "User")
+
+            # Update PATH for current session
+            $env:PATH = "$BIN_DIR;$env:PATH"
+
+            Write-Log "Added $BIN_DIR to PATH. Changes will take effect in new terminals." "SUCCESS"
+            Write-Log "Current PowerShell session already includes hf after this update." "INFO"
+            Write-Log "Undo later via Settings ▸ Environment Variables, or: [Environment]::SetEnvironmentVariable(`"PATH`", ($([Environment]::GetEnvironmentVariable('PATH','User')) -replace [regex]::Escape(`"$BIN_DIR;`"), ''), 'User')" "INFO"
+            $script:PathUpdated = $true
+        }
+        catch {
+            Write-Log "Failed to update PATH automatically. Please add manually:" "WARNING"
+            Write-Log @"
+Run: [Environment]::SetEnvironmentVariable("PATH", "$BIN_DIR;$([Environment]::GetEnvironmentVariable('PATH','User'))", 'User')
+"@ "WARNING"
+        }
+    } else {
+        Write-Log "PATH already contains $BIN_DIR"
+    }
+}
+
+function Test-Installation {
+    Write-Log "Verifying installation..."
+
+    $hfExecutable = Join-Path $BIN_DIR "hf.exe"
+
+    if (Test-Path $hfExecutable) {
+        try {
+            # Test the CLI
+            $output = & $hfExecutable version 2>&1
+            if ($?) {
+                Write-Log "CLI location: $hfExecutable"
+                Write-Log "Installation directory: $HF_CLI_DIR"
+                return $true
+            } else {
+                Write-Log "Installation verification failed. The hf command is not working properly." "ERROR"
+                Write-Log "Error output: $output" "ERROR"
+                return $false
+            }
+        }
+        catch {
+            Write-Log "Installation verification failed: $($_.Exception.Message)" "ERROR"
+            return $false
+        }
+    } else {
+        Write-Log "Installation failed. hf.exe not found in $BIN_DIR." "ERROR"
+        return $false
+    }
+}
+
+function Show-UninstallInfo {
+    Write-Log ""
+    Write-Log "To uninstall the Hugging Face CLI:"
+    Write-Log "  Remove-Item -Path '$HF_CLI_DIR' -Recurse -Force"
+    Write-Log "  Remove-Item -Path '$BIN_DIR\\hf.exe'"
+    Write-Log "  Remove-Item -Path '$BIN_DIR\\hf-script.py' (if present)"
+    Write-Log ""
+    if ($script:PathUpdated) {
+        Write-Log "Remove '$BIN_DIR' from your user PATH via Settings ▸ Environment Variables," "INFO"
+        Write-Log "or run: [Environment]::SetEnvironmentVariable(`"PATH`", ($([Environment]::GetEnvironmentVariable('PATH','User')) -replace [regex]::Escape(`"$BIN_DIR;`"), ''), 'User')" "INFO"
+    } elseif ($NoModifyPath) {
+        Write-Log 'PATH was not modified (--no-modify-path).' 'INFO'
+    } else {
+        Write-Log "If you added '$BIN_DIR' to PATH manually, remove it when finished." "INFO"
+    }
+}
+
+function Show-Usage {
+    Write-Log ''
+    Write-Log 'Usage examples:'
+    Write-Log '  hf login'
+    Write-Log '  hf download deepseek-ai/DeepSeek-R1'
+    Write-Log '  hf jobs run python:3.12 python -c ''print("Hello from HF CLI!")'''
+    Write-Log ''
+    Write-Log "The 'hf' command is now available." 'SUCCESS'
+    Write-Log 'Please **close and reopen** your terminal to use it directly, for example: `hf --help`' 'INFO'
+    Write-Log 'Alternatively, you can test it immediately in this session by using the full path:' 'INFO'
+
+    $hfExecutable = Join-Path $BIN_DIR 'hf.exe'
+    Write-Log ('  & "{0}" --help' -f $hfExecutable)
+}
+
+# Main installation process
+function Main {
+    try {
+        Write-Log "Installing Hugging Face CLI for Windows..."
+        Write-Log "PowerShell version: $($PSVersionTable.PSVersion)"
+
+        $pythonCmd = Find-Python
+        New-Directories
+        New-VirtualEnvironment -PythonCmd $pythonCmd
+        Install-HuggingFaceHub
+        Publish-HfCommand
+        if ($NoModifyPath) {
+            Write-Log 'Skipping PATH modification (--no-modify-path).'
+        } else {
+            Update-Path
+        }
+
+        if (Test-Installation) {
+            $hfExecutable = Join-Path $BIN_DIR "hf.exe"
+            Show-Usage
+            Show-UninstallInfo
+            $requestedVersion = if ($env:HF_CLI_VERSION) { $env:HF_CLI_VERSION } else { 'latest' }
+            Write-Log "hf CLI ready!" "SUCCESS"
+            Write-Log "Binary: $hfExecutable"
+            Write-Log "Virtualenv: $HF_CLI_DIR"
+            Write-Log "CLI version: $requestedVersion"
+        } else {
+            throw "Installation verification failed"
+        }
+    }
+    catch {
+        Write-Log "Installation failed: $($_.Exception.Message)" "ERROR"
+        exit 1
+    }
+}
+
+# Handle Ctrl+C
+$null = Register-ObjectEvent -InputObject ([Console]) -EventName CancelKeyPress -Action {
+    Write-Log "Installation interrupted" "ERROR"
+    exit 130
+}
+
+# Run main function
+Main
diff --git a/utils/installers/install.sh b/utils/installers/install.sh
new file mode 100755
index 0000000000..011fcfb30c
--- /dev/null
+++ b/utils/installers/install.sh
@@ -0,0 +1,514 @@
+#!/bin/sh
+# Hugging Face CLI Installer for Linux/MacOS
+# Usage: curl -LsSf https://hf.co/cli/install.sh | sh -s -- [OPTIONS]
+
+
+if [ -z "$BASH_VERSION" ]; then
+    if command -v bash >/dev/null 2>&1; then
+        if [ -f "$0" ] && [ "$0" != "sh" ]; then
+            exec bash "$0" "$@"
+        else
+            tmp_dir=$(mktemp -d 2>/dev/null || mktemp -d -t hf-cli-install)
+            tmp_script="$tmp_dir/install.sh"
+            cat >"$tmp_script"
+            chmod +x "$tmp_script"
+            bash "$tmp_script" "$@"
+            status=$?
+            rm -rf "$tmp_dir"
+            exit $status
+        fi
+    else
+        echo "[ERROR] bash is required to run this installer." >&2
+        exit 1
+    fi
+fi
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Logging level: 0=quiet, 1=info (default), 2=verbose
+LOG_LEVEL=1
+
+# Configuration
+FORCE_REINSTALL="false"
+BIN_DIR="${HF_CLI_BIN_DIR:-$HOME/.local/bin}"
+UPDATED_RC_FILE=""
+REQUESTED_VERSION="${HF_CLI_VERSION:-}"
+SKIP_PATH_UPDATE="false"
+UPDATED_FISH_PATH="false"
+
+# Logging functions
+log_debug() {
+    if [ "$LOG_LEVEL" -lt 2 ]; then
+        return 0
+    fi
+    printf '%b\n' "${BLUE}[DEBUG]${NC} $1"
+}
+
+log_info() {
+    if [ "$LOG_LEVEL" -lt 1 ]; then
+        return 0
+    fi
+    printf '%b\n' "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    printf '%b\n' "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    printf '%b\n' "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    printf '%b\n' "${RED}[ERROR]${NC} $1" >&2
+}
+
+run_command() {
+    local description="$1"
+    shift
+    set +e
+    "$@"
+    local status=$?
+    set -e
+    if [ $status -ne 0 ]; then
+        log_error "$description"
+        log_error "Command: $*"
+        log_error "Re-run with --verbose for detailed output."
+        exit $status
+    fi
+}
+
+usage() {
+    cat <<'EOF'
+Usage: curl -LsSf https://hf.co/cli/install.sh | sh -s -- [OPTIONS]
+
+Options:
+  --force           Recreate the Hugging Face CLI virtual environment if it exists
+  --no-modify-path  Skip adding the hf wrapper directory to PATH
+  -v, --verbose     Enable verbose output (includes full pip logs)
+  --help, -h        Show this message and exit
+
+Environment variables:
+  HF_HOME           Installation base directory; installer uses $HF_HOME/cli when set
+  HF_CLI_BIN_DIR    Directory for the hf wrapper (default: ~/.local/bin)
+  HF_CLI_VERSION    Install a specific huggingface_hub version (default: latest)
+EOF
+}
+
+# Normalize user paths to absolute paths
+expand_path() {
+    local input="$1"
+    if [ -z "$input" ]; then
+        return 0
+    fi
+
+    case "$input" in
+        ~)
+            printf '%s\n' "$HOME"
+            ;;
+        ~/*)
+            printf '%s/%s\n' "$HOME" "${input#~/}"
+            ;;
+        /*)
+            printf '%s\n' "$input"
+            ;;
+        *)
+            printf '%s/%s\n' "$PWD" "$input"
+            ;;
+    esac
+}
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --force)
+            FORCE_REINSTALL="true"
+            ;;
+        --no-modify-path)
+            SKIP_PATH_UPDATE="true"
+            ;;
+        -v|--verbose)
+            LOG_LEVEL=2
+            HF_CLI_VERBOSE_PIP=1
+            ;;
+        --help|-h)
+            usage
+            exit 0
+            ;;
+        *)
+            log_error "Unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+# Persist fully resolved paths for downstream use and wrapper creation
+BIN_DIR=$(expand_path "$BIN_DIR")
+
+if [ -n "$HF_HOME" ]; then
+    HF_CLI_DIR="$HF_HOME/cli"
+else
+    HF_CLI_DIR="$HOME/.hf-cli"
+fi
+
+HF_CLI_DIR=$(expand_path "$HF_CLI_DIR")
+VENV_DIR="$HF_CLI_DIR/venv"
+
+# Check if command exists
+command_exists() {
+    command -v "$1" >/dev/null 2>&1
+}
+
+# Detect OS
+detect_os() {
+    if [[ "$OSTYPE" == "linux-gnu"* ]]; then
+        echo "linux"
+    elif [[ "$OSTYPE" == "darwin"* ]]; then
+        echo "macos"
+    else
+        echo "unknown"
+    fi
+}
+
+# Install Python if not available
+python_version_supported() {
+    "$1" <<'PY' >/dev/null 2>&1
+import sys
+sys.exit(0 if sys.version_info >= (3, 9) else 1)
+PY
+}
+
+ensure_python() {
+    local candidates=(python3 python)
+    local chosen=""
+    local version_output=""
+
+    for candidate in "${candidates[@]}"; do
+        if command_exists "$candidate"; then
+            version_output="$($candidate --version 2>&1)"
+            if python_version_supported "$candidate"; then
+                PYTHON_CMD="$candidate"
+                chosen="$candidate"
+                break
+            else
+                log_warning "$candidate detected ($version_output) but Python 3.9+ is required."
+            fi
+        fi
+    done
+
+    if [ -z "$chosen" ]; then
+        log_error "Python 3.9+ is required but was not found."
+        case "$(detect_os)" in
+            macos)
+                log_info "On macOS: brew install python (or download Python 3.9+ from python.org)"
+                ;;
+            linux)
+                if command_exists apt-get || command_exists apt; then
+                    log_info "On Debian/Ubuntu: sudo apt update && sudo apt install python3 python3-pip"
+                elif command_exists dnf; then
+                    log_info "On Fedora/RHEL: sudo dnf install python3 python3-pip"
+                elif command_exists yum; then
+                    log_info "On CentOS/RHEL: sudo yum install python3 python3-pip"
+                else
+                    log_info "Install Python 3.9+ with your distro's package manager."
+                fi
+                ;;
+            *)
+                log_info "Install Python 3.9+ from https://www.python.org/downloads/"
+                ;;
+        esac
+        exit 1
+    fi
+
+    log_info "Using Python: $($PYTHON_CMD --version)"
+}
+
+# Create directories
+create_directories() {
+    log_info "Creating directories..."
+    run_command "Failed to create install directory $HF_CLI_DIR" mkdir -p "$HF_CLI_DIR"
+    run_command "Failed to create bin directory $BIN_DIR" mkdir -p "$BIN_DIR"
+}
+
+# Create virtual environment
+create_venv() {
+    log_info "Creating virtual environment..."
+    if [ -d "$VENV_DIR" ]; then
+        if [ "$FORCE_REINSTALL" = "true" ]; then
+            log_warning "Virtual environment already exists; removing it since --force was passed"
+            rm -rf "$VENV_DIR"
+        else
+            log_info "Virtual environment already exists; reusing (pass --force to recreate)"
+            return
+        fi
+    fi
+
+    # Fail early with guidance when python lacks the venv module
+    if ! "$PYTHON_CMD" -m venv --help >/dev/null 2>&1; then
+        log_error "Python's venv module is unavailable. Install python3-venv / ensurepip and retry."
+        case "$(detect_os)" in
+            linux)
+                log_info "Try: sudo apt install python3-venv      # Debian/Ubuntu"
+                log_info "     sudo dnf install python3-venv      # Fedora"
+                ;;
+            macos)
+                log_info "Try reinstalling Python via Homebrew: brew install python"
+                ;;
+        esac
+        exit 1
+    fi
+
+    run_command "Failed to create virtual environment at $VENV_DIR" "$PYTHON_CMD" -m venv "$VENV_DIR"
+
+    # Mark this installation as installer-managed
+    touch "$VENV_DIR/.hf_installer_marker"
+
+    # Use the venv python for pip management
+    log_info "Upgrading pip..."
+    run_command "Failed to upgrade pip" "$VENV_DIR/bin/python" -m pip install --upgrade pip
+}
+
+# Install huggingface_hub with CLI extras
+install_hf_hub() {
+    local package_spec="huggingface_hub[cli]"
+    if [ -n "$REQUESTED_VERSION" ]; then
+        package_spec="huggingface_hub[cli]==$REQUESTED_VERSION"
+        log_info "Installing huggingface_hub[cli] (version $REQUESTED_VERSION)..."
+    else
+        log_info "Installing huggingface_hub[cli] (latest)..."
+    fi
+
+    local extra_pip_args="${HF_CLI_PIP_ARGS:-${HF_PIP_ARGS:-}}"
+    local -a pip_flags
+    if [ "${HF_CLI_VERBOSE_PIP:-}" = "1" ]; then
+        pip_flags=()
+    else
+        pip_flags=(--quiet --progress-bar off --disable-pip-version-check)
+    fi
+
+    if [ -n "$extra_pip_args" ]; then
+        log_info "Passing extra pip arguments: $extra_pip_args"
+    fi
+
+    if [ "${HF_CLI_VERBOSE_PIP:-}" != "1" ]; then
+        log_info "(pip output suppressed; set HF_CLI_VERBOSE_PIP=1 for full logs)"
+    fi
+
+    if [ -n "$extra_pip_args" ]; then
+        # shellcheck disable=SC2086
+        run_command "Failed to install $package_spec" "$VENV_DIR/bin/python" -m pip install --upgrade "$package_spec" ${pip_flags[*]} $extra_pip_args
+    else
+        # shellcheck disable=SC2086
+        run_command "Failed to install $package_spec" "$VENV_DIR/bin/python" -m pip install --upgrade "$package_spec" ${pip_flags[*]}
+    fi
+}
+
+# Expose the hf CLI by linking or copying the console script from the virtualenv
+expose_cli_command() {
+    log_info "Linking hf CLI into $BIN_DIR..."
+
+    local source_cli="$VENV_DIR/bin/hf"
+    if [ ! -x "$source_cli" ]; then
+        log_error "hf command not found in the virtual environment at $source_cli"
+        log_error "Verify that huggingface_hub[cli] installed correctly."
+        exit 1
+    fi
+
+    local link_method=""
+    if ln -sf "$source_cli" "$BIN_DIR/hf" 2>/dev/null; then
+        link_method="symlink"
+    else
+        if cp "$source_cli" "$BIN_DIR/hf" 2>/dev/null; then
+            link_method="copy"
+        else
+            log_error "Failed to place hf command in $BIN_DIR (tried symlink and copy)."
+            exit 1
+        fi
+    fi
+
+    chmod +x "$BIN_DIR/hf"
+
+    if [ "$link_method" = "symlink" ]; then
+        log_info "hf available at $BIN_DIR/hf (symlink to venv)"
+    else
+        log_info "hf available at $BIN_DIR/hf"
+    fi
+    log_info "Run without touching PATH: env PATH=\"$BIN_DIR:\$PATH\" hf --help"
+}
+
+# Update PATH if needed
+update_path() {
+    local shell_rc=""
+    local -a shell_rc_candidates=()
+
+    # Broaden shell detection and guidance for PATH propagation
+    case "$SHELL" in
+        */bash)
+            shell_rc_candidates=()
+            shell_rc_candidates+=("$HOME/.bashrc")
+            shell_rc_candidates+=("$HOME/.bash_profile")
+            shell_rc_candidates+=("$HOME/.profile")
+            ;;
+        */zsh)
+            shell_rc_candidates=()
+            shell_rc_candidates+=("$HOME/.zshrc")
+            shell_rc_candidates+=("$HOME/.zprofile")
+            ;;
+        */fish)
+            shell_rc_candidates=()
+            if command -v fish >/dev/null 2>&1; then
+                if fish -c "contains \"$BIN_DIR\" \$fish_user_paths" >/dev/null 2>&1; then
+                    log_info "$BIN_DIR already present in fish_user_paths"
+                    UPDATED_FISH_PATH="true"
+                    return
+                elif fish -c "set -Ux fish_user_paths \"$BIN_DIR\" \$fish_user_paths" >/dev/null 2>&1; then
+                    UPDATED_FISH_PATH="true"
+                    log_success "Added $BIN_DIR to fish_user_paths"
+                    log_info "Apply it now with: set -Ux fish_user_paths $BIN_DIR \$fish_user_paths"
+                    return
+                else
+                    log_warning "Could not update fish_user_paths automatically."
+                fi
+            fi
+            ;;
+        *)
+            shell_rc_candidates=()
+            shell_rc_candidates+=("$HOME/.profile")
+            ;;
+    esac
+
+    if [[ ":$PATH:" != *":$BIN_DIR:"* ]]; then
+        if [ "$SKIP_PATH_UPDATE" = "true" ]; then
+            log_info "Skipping PATH update (--no-modify-path)."
+            return
+        fi
+
+        log_info "$BIN_DIR is not in your PATH"
+
+            if [ "${#shell_rc_candidates[@]}" -gt 0 ]; then
+                for candidate in "${shell_rc_candidates[@]}"; do
+                    if [ -f "$candidate" ]; then
+                        shell_rc="$candidate"
+                        break
+                    fi
+            done
+
+            if [ -z "$shell_rc" ]; then
+                shell_rc="${shell_rc_candidates[0]}"
+                log_info "Creating shell config file at $shell_rc to update PATH"
+                touch "$shell_rc"
+            fi
+
+            if ! grep -Fq "$BIN_DIR" "$shell_rc"; then
+                {
+                    echo ""
+                    echo "# Added by Hugging Face CLI installer"
+                    echo "export PATH=\"$BIN_DIR:\$PATH\""
+                } >> "$shell_rc"
+                UPDATED_RC_FILE="$shell_rc"
+                log_success "Added $BIN_DIR to PATH via $shell_rc"
+                if [ "$LOG_LEVEL" -ge 1 ]; then
+                    log_info "Apply it now with: source $shell_rc"
+                fi
+            fi
+        else
+            log_warning "Could not automatically update PATH for your shell."
+            if [[ "$SHELL" == *"/fish" ]]; then
+                if [ "$UPDATED_FISH_PATH" != "true" ]; then
+                    log_warning "Run: set -Ux fish_user_paths $BIN_DIR \$fish_user_paths"
+                fi
+            else
+                log_warning "Add this line to your shell config: export PATH=\"$BIN_DIR:\$PATH\""
+            fi
+        fi
+    fi
+}
+
+# Verify installation
+verify_installation() {
+    log_info "Verifying installation..."
+    
+    if [ -x "$BIN_DIR/hf" ]; then
+        # Test the CLI
+        if "$BIN_DIR/hf" version >/dev/null 2>&1; then
+            log_success "Hugging Face CLI installed successfully!"
+            log_info "CLI location: $BIN_DIR/hf"
+            log_info "Installation directory: $HF_CLI_DIR"
+        else
+            log_error "Installation verification failed. The hf command is not working properly."
+            exit 1
+        fi
+    else
+        log_error "Installation failed. Wrapper script not found."
+        exit 1
+    fi
+}
+
+# Uninstall function 
+show_uninstall_info() {
+    log_info ""
+    log_info "To uninstall the Hugging Face CLI, run:"
+    log_info "  rm -rf $HF_CLI_DIR"
+    log_info "  rm -f $BIN_DIR/hf"
+    log_info ""
+    if [ -n "$UPDATED_RC_FILE" ]; then
+        log_info "  (shell) Undo PATH entry: sed -i.bak '/Added by Hugging Face CLI installer/d' $UPDATED_RC_FILE && rm -f ${UPDATED_RC_FILE}.bak"
+    elif [ "$UPDATED_FISH_PATH" = "true" ]; then
+        log_info "  (fish) Undo PATH entry: fish -c 'set -Ux fish_user_paths (string match -v \"$BIN_DIR\" \$fish_user_paths)'"
+    elif [ "$SKIP_PATH_UPDATE" = "true" ]; then
+        log_info "  (PATH unchanged because --no-modify-path was used)"
+    else
+        log_info "  Remove any PATH edits you made manually."
+    fi
+}
+
+# Main installation process
+main() {
+    log_info "Installing Hugging Face CLI..."
+    log_info "OS: $(detect_os)"
+    log_info "Force reinstall: $FORCE_REINSTALL"
+    log_info "Install dir: $HF_CLI_DIR"
+    log_info "Bin dir: $BIN_DIR"
+    log_info "Requested version: ${REQUESTED_VERSION:-latest}"
+    log_info "Skip PATH update: $SKIP_PATH_UPDATE"
+
+    ensure_python
+    create_directories
+    create_venv
+    install_hf_hub
+    expose_cli_command
+    update_path
+    verify_installation
+
+    if [[ ":$PATH:" == *":$BIN_DIR:"* ]]; then
+        log_info "Current version: $(hf version 2>/dev/null || echo 'Run source ~/.bashrc or restart your shell first')"
+    else
+        log_info "Current version: $($BIN_DIR/hf version)"
+    fi
+
+    show_uninstall_info
+
+    log_success "hf CLI ready!"
+    log_info "Binary: $BIN_DIR/hf"
+    log_info "Virtualenv: $HF_CLI_DIR"
+    log_info "CLI version: ${REQUESTED_VERSION:-latest}"
+    log_info "Try it now: env PATH=\"$BIN_DIR:\$PATH\" hf --help"
+    log_info "Examples:"
+    log_info "  hf login"
+    log_info "  hf download deepseek-ai/DeepSeek-R1"
+    log_info "  hf jobs run python:3.12 python -c 'print(\"Hello from HF CLI!\")'"
+    log_info ""
+}
+
+# Handle Ctrl+C
+trap 'log_error "Installation interrupted"; exit 130' INT
+
+main "$@"