diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml index 6e72641382..830b147805 100644 --- a/.github/conda/meta.yaml +++ b/.github/conda/meta.yaml @@ -16,7 +16,7 @@ requirements: - pip - fsspec - filelock - - requests + - httpx - tqdm - typing-extensions - packaging @@ -26,7 +26,7 @@ requirements: - python - pip - filelock - - requests + - httpx - tqdm - typing-extensions - packaging diff --git a/.github/workflows/check-installers.yml b/.github/workflows/check-installers.yml new file mode 100644 index 0000000000..9a3757e1bc --- /dev/null +++ b/.github/workflows/check-installers.yml @@ -0,0 +1,88 @@ +name: Check CLI installers + +on: + push: + branches: + - main + paths: + - "utils/installers/**" + - ".github/workflows/check-installers.yml" + pull_request: + paths: + - "utils/installers/**" + - ".github/workflows/check-installers.yml" + workflow_dispatch: {} + +permissions: + contents: read + +jobs: + linux-installer: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run installer + shell: bash + run: | + set -euo pipefail + + HF_TEST_ROOT=$(mktemp -d) + INSTALL_DIR="$HF_TEST_ROOT/install" + BIN_DIR="$HF_TEST_ROOT/bin" + + HF_HOME="$INSTALL_DIR" HF_CLI_BIN_DIR="$BIN_DIR" utils/installers/install.sh --no-modify-path + + export PATH="$BIN_DIR:$PATH" + + HF_VERSION_PATH="$HF_TEST_ROOT/hf-version.txt" + hf version | tee "$HF_VERSION_PATH" + if ! grep -Eq 'huggingface_hub version: [0-9]+(\.[0-9]+){1,2}' "$HF_VERSION_PATH"; then + echo "hf version output missing huggingface_hub version" >&2 + cat "$HF_VERSION_PATH" >&2 + exit 1 + fi + + NO_COLOR=1 hf --help + + rm -rf "$HF_TEST_ROOT" + + windows-installer: + runs-on: windows-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run installer + shell: pwsh + run: | + $hfTestRoot = Join-Path $env:TEMP ([System.Guid]::NewGuid().ToString()) + $installDir = Join-Path $hfTestRoot 'install' + $binDir = Join-Path $hfTestRoot 'bin' + New-Item -ItemType Directory -Path $installDir -Force | Out-Null + New-Item -ItemType Directory -Path $binDir -Force | Out-Null + + $env:HF_HOME = $installDir + $env:HF_CLI_BIN_DIR = $binDir + & "$PWD/utils/installers/install.ps1" -NoModifyPath + + $env:PATH = "$binDir;$env:PATH" + + $hfVersionPath = Join-Path $hfTestRoot 'hf-version.txt' + & hf.exe version | Tee-Object -FilePath $hfVersionPath + if ($LASTEXITCODE -ne 0) { + throw 'hf version failed' + } + if (-not (Select-String -Path $hfVersionPath -Pattern 'huggingface_hub version: [0-9]+(\.[0-9]+){1,2}')) { + throw 'hf version output missing huggingface_hub version' + } + + $env:NO_COLOR = '1' + & hf.exe --help + if ($LASTEXITCODE -ne 0) { + throw 'hf --help failed' + } + Remove-Item Env:NO_COLOR + + Remove-Item -Path $hfTestRoot -Recurse -Force diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml deleted file mode 100644 index df663ce975..0000000000 --- a/.github/workflows/contrib-tests.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Contrib tests - -on: - workflow_dispatch: - schedule: - - cron: '0 0 * * 6' # Run once a week, Saturday midnight - push: - branches: - - ci_contrib_* - pull_request: - types: [assigned, opened, synchronize, reopened] - paths: - - contrib/** - -jobs: - build: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - contrib: [ - "sentence_transformers", - "spacy", - "timm", - ] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - # Install pip - - name: Install pip - run: pip install --upgrade pip - - # Install downstream library and its specific dependencies - - name: Install ${{ matrix.contrib }} - run: pip install -r contrib/${{ matrix.contrib }}/requirements.txt - - # Install huggingface_hub from source code + testing extras - - name: Install `huggingface_hub` - run: | - pip uninstall -y huggingface_hub - pip install .[testing] - - # Run tests - - name: Run tests - run: pytest contrib/${{ matrix.contrib }} diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml index d9594a5435..a29c577e75 100644 --- a/.github/workflows/python-quality.yml +++ b/.github/workflows/python-quality.yml @@ -35,9 +35,8 @@ jobs: - name: Install dependencies run: uv pip install "huggingface_hub[dev] @ ." - - run: .venv/bin/ruff check tests src contrib # linter - - run: .venv/bin/ruff format --check tests src contrib # formatter - - run: .venv/bin/python utils/check_contrib_list.py + - run: .venv/bin/ruff check tests src # linter + - run: .venv/bin/ruff format --check tests src # formatter - run: .venv/bin/python utils/check_inference_input_params.py - run: .venv/bin/python utils/check_static_imports.py - run: .venv/bin/python utils/check_all_variable.py @@ -50,4 +49,4 @@ jobs: - run: .venv/bin/mypy src/huggingface_hub/__init__.py --follow-imports=silent --show-traceback # Run mypy on full package - - run: .venv/bin/mypy src \ No newline at end of file + - run: .venv/bin/mypy src diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 11bfcc806f..f30945a461 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -21,27 +21,17 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.13"] - test_name: - [ - "Repository only", - "Everything else", - "Inference only", - "Xet only" - ] + python-version: ["3.9", "3.13"] + test_name: ["Everything else", "Inference only", "Xet only"] include: - - python-version: "3.13" # LFS not ran on 3.8 + - python-version: "3.13" # LFS not ran on 3.9 test_name: "lfs" - - python-version: "3.8" + - python-version: "3.9" test_name: "fastai" - python-version: "3.10" # fastai not supported on 3.12 and 3.11 -> test it on 3.10 test_name: "fastai" - - python-version: "3.8" - test_name: "tensorflow" - - python-version: "3.10" # tensorflow not supported on 3.12 -> test it on 3.10 - test_name: "tensorflow" - - python-version: "3.8" # test torch~=1.11 on python 3.8 only. - test_name: "Python 3.8, torch_1.11" + - python-version: "3.9" # test torch~=1.11 on python 3.9 only. + test_name: "Python 3.9, torch_1.11" - python-version: "3.12" # test torch latest on python 3.12 only. test_name: "torch_latest" steps: @@ -65,7 +55,7 @@ jobs: case "${{ matrix.test_name }}" in - "Repository only" | "Everything else" | "Inference only") + "Everything else" | "Inference only") sudo apt update sudo apt install -y libsndfile1-dev ;; @@ -84,17 +74,11 @@ jobs: uv pip install --upgrade torch ;; - "Python 3.8, torch_1.11") + "Python 3.9, torch_1.11") uv pip install "huggingface_hub[torch] @ ." uv pip install torch~=1.11 ;; - tensorflow) - sudo apt update - sudo apt install -y graphviz - uv pip install "huggingface_hub[tensorflow-testing] @ ." - ;; - esac # If not "Xet only", we want to test upload/download with regular LFS workflow @@ -112,13 +96,6 @@ jobs: case "${{ matrix.test_name }}" in - "Repository only") - # Run repo tests concurrently - PYTEST="$PYTEST ../tests -k 'TestRepository' -n 4" - echo $PYTEST - eval $PYTEST - ;; - "Inference only") # Run inference tests concurrently PYTEST="$PYTEST ../tests -k 'test_inference' -n 4" @@ -140,14 +117,7 @@ jobs: eval "$PYTEST ../tests/test_fastai*" ;; - tensorflow) - # Cannot be on same line since '_tf*' checks if tensorflow is NOT imported by default - eval "$PYTEST ../tests/test_tf*" - eval "$PYTEST ../tests/test_keras*" - eval "$PYTEST ../tests/test_serialization.py" - ;; - - "Python 3.8, torch_1.11" | torch_latest) + "Python 3.9, torch_1.11" | torch_latest) eval "$PYTEST ../tests/test_hub_mixin*" eval "$PYTEST ../tests/test_serialization.py" ;; @@ -178,7 +148,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.9", "3.11"] test_name: ["Everything else", "Xet only"] steps: diff --git a/.github/workflows/release-conda.yml b/.github/workflows/release-conda.yml index 135d988809..b6ead02950 100644 --- a/.github/workflows/release-conda.yml +++ b/.github/workflows/release-conda.yml @@ -26,7 +26,7 @@ jobs: with: auto-update-conda: true auto-activate-base: false - python-version: 3.8 + python-version: 3.9 activate-environment: "build-hub" - name: Setup conda env diff --git a/Makefile b/Makefile index faa2a63500..35128faf2b 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,13 @@ -.PHONY: contrib quality style test +.PHONY: quality style test -check_dirs := contrib src tests utils setup.py +check_dirs := src tests utils setup.py quality: ruff check $(check_dirs) # linter ruff format --check $(check_dirs) # formatter python utils/check_inference_input_params.py - python utils/check_contrib_list.py python utils/check_static_imports.py python utils/check_all_variable.py python utils/generate_async_inference_client.py @@ -18,7 +17,6 @@ quality: style: ruff format $(check_dirs) # formatter ruff check --fix $(check_dirs) # linter - python utils/check_contrib_list.py --update python utils/check_static_imports.py --update python utils/check_all_variable.py --update python utils/generate_async_inference_client.py --update @@ -38,42 +36,3 @@ repocard: test: pytest ./tests/ - -# Taken from https://stackoverflow.com/a/12110773 -# Commands: -# make contrib_setup_timm : setup tests for timm -# make contrib_test_timm : run tests for timm -# make contrib_timm : setup and run tests for timm -# make contrib_clear_timm : delete timm virtual env -# -# make contrib_setup : setup ALL tests -# make contrib_test : run ALL tests -# make contrib : setup and run ALL tests -# make contrib_clear : delete all virtual envs -# Use -j4 flag to run jobs in parallel. -CONTRIB_LIBS := sentence_transformers spacy timm -CONTRIB_JOBS := $(addprefix contrib_,${CONTRIB_LIBS}) -CONTRIB_CLEAR_JOBS := $(addprefix contrib_clear_,${CONTRIB_LIBS}) -CONTRIB_SETUP_JOBS := $(addprefix contrib_setup_,${CONTRIB_LIBS}) -CONTRIB_TEST_JOBS := $(addprefix contrib_test_,${CONTRIB_LIBS}) - -contrib_clear_%: - rm -rf contrib/$*/.venv - -contrib_setup_%: - python3 -m venv contrib/$*/.venv - ./contrib/$*/.venv/bin/pip install -r contrib/$*/requirements.txt - ./contrib/$*/.venv/bin/pip uninstall -y huggingface_hub - ./contrib/$*/.venv/bin/pip install -e .[testing] - -contrib_test_%: - ./contrib/$*/.venv/bin/python -m pytest contrib/$* - -contrib_%: - make contrib_setup_$* - make contrib_test_$* - -contrib: ${CONTRIB_JOBS}; -contrib_clear: ${CONTRIB_CLEAR_JOBS}; echo "Successful contrib tests." -contrib_setup: ${CONTRIB_SETUP_JOBS}; echo "Successful contrib setup." -contrib_test: ${CONTRIB_TEST_JOBS}; echo "Successful contrib tests." diff --git a/contrib/README.md b/contrib/README.md deleted file mode 100644 index 05db2d705b..0000000000 --- a/contrib/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# Contrib test suite - -The contrib folder contains simple end-to-end scripts to test integration of `huggingface_hub` in downstream libraries. The main goal is to proactively notice breaking changes and deprecation warnings. - -## Add tests for a new library - -To add another contrib lib, one must: -1. Create a subfolder with the lib name. Example: `./contrib/transformers` -2. Create a `requirements.txt` file specific to this lib. Example `./contrib/transformers/requirements.txt` -3. Implements tests for this lib. Example: `./contrib/transformers/test_push_to_hub.py` -4. Run `make style`. This will edit both `makefile` and `.github/workflows/contrib-tests.yml` to add the lib to list of libs to test. Make sure changes are accurate before committing. - -## Run contrib tests on CI - -Contrib tests can be [manually triggered in GitHub](https://github.com/huggingface/huggingface_hub/actions) with the `Contrib tests` workflow. - -Tests are not run in the default test suite (for each PR) as this would slow down development process. The goal is to notice breaking changes, not to avoid them. In particular, it is interesting to trigger it before a release to make sure it will not cause too much friction. - -## Run contrib tests locally - -Tests must be ran individually for each dependent library. Here is an example to run -`timm` tests. Tests are separated to avoid conflicts between version dependencies. - -### Run all contrib tests - -Before running tests, a virtual env must be setup for each contrib library. To do so, run: - -```sh -# Run setup in parallel to save time -make contrib_setup -j4 -``` - -Then tests can be run - -```sh -# Optional: -j4 to run in parallel. Output will be messy in that case. -make contrib_test -j4 -``` - -Optionally, it is possible to setup and run all tests in a single command. However this -take more time as you don't need to setup the venv each time you run tests. - -```sh -make contrib -j4 -``` - -Finally, it is possible to delete all virtual envs to get a fresh start for contrib tests. -After running this command, `contrib_setup` will have to re-download/re-install all dependencies. - -``` -make contrib_clear -``` - -### Run contrib tests for a single lib - -Instead of running tests for all contrib libraries, you can run a specific lib: - -```sh -# Setup timm tests -make contrib_setup_timm - -# Run timm tests -make contrib_test_timm - -# (or) Setup and run timm tests at once -make contrib_timm - -# Delete timm virtualenv if corrupted -make contrib_clear_timm -``` diff --git a/contrib/__init__.py b/contrib/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/contrib/conftest.py b/contrib/conftest.py deleted file mode 100644 index 285139fd69..0000000000 --- a/contrib/conftest.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import time -import uuid -from typing import Generator - -import pytest - -from huggingface_hub import delete_repo - - -@pytest.fixture(scope="session") -def token() -> str: - # Not critical, only usable on the sandboxed CI instance. - return "hf_94wBhPGp6KrrTH3KDchhKpRxZwd6dmHWLL" - - -@pytest.fixture(scope="session") -def user() -> str: - return "__DUMMY_TRANSFORMERS_USER__" - - -@pytest.fixture(autouse=True, scope="session") -def login_as_dummy_user(token: str) -> Generator: - """Log in with dummy user token.""" - # Cannot use `monkeypatch` fixture since we want it to be "session-scoped" - old_token = os.environ["HF_TOKEN"] - os.environ["HF_TOKEN"] = token - yield - os.environ["HF_TOKEN"] = old_token - - -@pytest.fixture -def repo_name(request) -> None: - """ - Return a readable pseudo-unique repository name for tests. - - Example: "repo-2fe93f-16599646671840" - """ - prefix = request.module.__name__ # example: `test_timm` - id = uuid.uuid4().hex[:6] - ts = int(time.time() * 10e3) - return f"repo-{prefix}-{id}-{ts}" - - -@pytest.fixture -def cleanup_repo(user: str, repo_name: str) -> None: - """Delete the repo at the end of the tests. - - TODO: Adapt to handle `repo_type` as well - """ - yield # run test - delete_repo(repo_id=f"{user}/{repo_name}") diff --git a/contrib/sentence_transformers/__init__.py b/contrib/sentence_transformers/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/contrib/sentence_transformers/requirements.txt b/contrib/sentence_transformers/requirements.txt deleted file mode 100644 index c8c5244b95..0000000000 --- a/contrib/sentence_transformers/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -git+https://github.com/UKPLab/sentence-transformers.git#egg=sentence-transformers diff --git a/contrib/sentence_transformers/test_sentence_transformers.py b/contrib/sentence_transformers/test_sentence_transformers.py deleted file mode 100644 index d1ceeb43dc..0000000000 --- a/contrib/sentence_transformers/test_sentence_transformers.py +++ /dev/null @@ -1,37 +0,0 @@ -import time - -import pytest -from sentence_transformers import SentenceTransformer, util - -from huggingface_hub import model_info - -from ..utils import production_endpoint - - -@pytest.fixture(scope="module") -def multi_qa_model() -> SentenceTransformer: - with production_endpoint(): - return SentenceTransformer("multi-qa-MiniLM-L6-cos-v1") - - -def test_from_pretrained(multi_qa_model: SentenceTransformer) -> None: - # Example taken from https://www.sbert.net/docs/hugging_face.html#using-hugging-face-models. - query_embedding = multi_qa_model.encode("How big is London") - passage_embedding = multi_qa_model.encode( - [ - "London has 9,787,426 inhabitants at the 2011 census", - "London is known for its financial district", - ] - ) - print("Similarity:", util.dot_score(query_embedding, passage_embedding)) - - -def test_push_to_hub(multi_qa_model: SentenceTransformer, repo_name: str, user: str, cleanup_repo: None) -> None: - multi_qa_model.save_to_hub(repo_name, organization=user) - - # Sleep to ensure that model_info isn't called too soon - time.sleep(1) - - # Check model has been pushed properly - model_id = f"{user}/{repo_name}" - assert model_info(model_id).library_name == "sentence-transformers" diff --git a/contrib/spacy/__init__.py b/contrib/spacy/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/contrib/spacy/requirements.txt b/contrib/spacy/requirements.txt deleted file mode 100644 index 6255342454..0000000000 --- a/contrib/spacy/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -git+https://github.com/explosion/spacy-huggingface-hub.git#egg=spacy-huggingface-hub diff --git a/contrib/spacy/test_spacy.py b/contrib/spacy/test_spacy.py deleted file mode 100644 index 00d4c9b671..0000000000 --- a/contrib/spacy/test_spacy.py +++ /dev/null @@ -1,48 +0,0 @@ -import time - -from spacy_huggingface_hub import push - -from huggingface_hub import delete_repo, hf_hub_download, model_info -from huggingface_hub.errors import HfHubHTTPError - -from ..utils import production_endpoint - - -def test_push_to_hub(user: str) -> None: - """Test equivalent of `python -m spacy huggingface-hub push`. - - (0. Delete existing repo on the Hub (if any)) - 1. Download an example file from production - 2. Push the model! - 3. Check model pushed the Hub + as spacy library - (4. Cleanup) - """ - model_id = f"{user}/en_core_web_sm" - _delete_repo(model_id) - - # Download example file from HF Hub (see https://huggingface.co/spacy/en_core_web_sm) - with production_endpoint(): - whl_path = hf_hub_download( - repo_id="spacy/en_core_web_sm", - filename="en_core_web_sm-any-py3-none-any.whl", - ) - - # Push spacy model to Hub - push(whl_path) - - # Sleep to ensure that model_info isn't called too soon - time.sleep(1) - - # Check model has been pushed properly - model_id = f"{user}/en_core_web_sm" - assert model_info(model_id).library_name == "spacy" - - # Cleanup - _delete_repo(model_id) - - -def _delete_repo(model_id: str) -> None: - try: - delete_repo(model_id) - except HfHubHTTPError: - pass diff --git a/contrib/timm/__init__.py b/contrib/timm/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/contrib/timm/requirements.txt b/contrib/timm/requirements.txt deleted file mode 100644 index 33944e7373..0000000000 --- a/contrib/timm/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Timm -git+https://github.com/rwightman/pytorch-image-models.git#egg=timm -safetensors diff --git a/contrib/timm/test_timm.py b/contrib/timm/test_timm.py deleted file mode 100644 index f57788f8c4..0000000000 --- a/contrib/timm/test_timm.py +++ /dev/null @@ -1,20 +0,0 @@ -import timm - -from ..utils import production_endpoint - - -MODEL_ID = "timm/mobilenetv3_large_100.ra_in1k" - - -@production_endpoint() -def test_load_from_hub() -> None: - # Test load only config - _ = timm.models.load_model_config_from_hf(MODEL_ID) - - # Load entire model from Hub - _ = timm.create_model("hf_hub:" + MODEL_ID, pretrained=True) - - -def test_push_to_hub(repo_name: str, cleanup_repo: None) -> None: - model = timm.create_model("mobilenetv3_rw") - timm.models.push_to_hf_hub(model, repo_name) diff --git a/contrib/utils.py b/contrib/utils.py deleted file mode 100644 index e1681cd561..0000000000 --- a/contrib/utils.py +++ /dev/null @@ -1,56 +0,0 @@ -import contextlib -from typing import Generator -from unittest.mock import patch - - -@contextlib.contextmanager -def production_endpoint() -> Generator: - """Patch huggingface_hub to connect to production server in a context manager. - - Ugly way to patch all constants at once. - TODO: refactor when https://github.com/huggingface/huggingface_hub/issues/1172 is fixed. - - Example: - ```py - def test_push_to_hub(): - # Pull from production Hub - with production_endpoint(): - model = ...from_pretrained("modelname") - - # Push to staging Hub - model.push_to_hub() - ``` - """ - PROD_ENDPOINT = "https://huggingface.co" - ENDPOINT_TARGETS = [ - "huggingface_hub.constants", - "huggingface_hub._commit_api", - "huggingface_hub.hf_api", - "huggingface_hub.lfs", - "huggingface_hub.commands.user", - "huggingface_hub.utils._git_credential", - ] - - PROD_URL_TEMPLATE = PROD_ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}" - URL_TEMPLATE_TARGETS = [ - "huggingface_hub.constants", - "huggingface_hub.file_download", - ] - - from huggingface_hub.hf_api import api - - patchers = ( - [patch(target + ".ENDPOINT", PROD_ENDPOINT) for target in ENDPOINT_TARGETS] - + [patch(target + ".HUGGINGFACE_CO_URL_TEMPLATE", PROD_URL_TEMPLATE) for target in URL_TEMPLATE_TARGETS] - + [patch.object(api, "endpoint", PROD_URL_TEMPLATE)] - ) - - # Start all patches - for patcher in patchers: - patcher.start() - - yield - - # Stop all patches - for patcher in patchers: - patcher.stop() diff --git a/docs/source/cn/_toctree.yml b/docs/source/cn/_toctree.yml index b4949efa35..db6d3244a9 100644 --- a/docs/source/cn/_toctree.yml +++ b/docs/source/cn/_toctree.yml @@ -20,7 +20,4 @@ title: 概览 - local: guides/hf_file_system title: Hugging Face 文件系统 -- title: "concepts" - sections: - - local: concepts/git_vs_http - title: Git vs HTTP 范式 + diff --git a/docs/source/cn/concepts/git_vs_http.md b/docs/source/cn/concepts/git_vs_http.md deleted file mode 100644 index b582b5f991..0000000000 --- a/docs/source/cn/concepts/git_vs_http.md +++ /dev/null @@ -1,40 +0,0 @@ - - -# Git 与 HTTP 范式 - -`huggingface_hub`库是用于与Hugging Face Hub进行交互的库,Hugging Face Hub是一组基于Git的存储库(模型、数据集或Spaces)。使用 `huggingface_hub`有两种主要方式来访问Hub。 - -第一种方法,即所谓的“基于git”的方法,由[`Repository`]类驱动。这种方法使用了一个包装器,它在 `git`命令的基础上增加了专门与Hub交互的额外函数。第二种选择,称为“基于HTTP”的方法,涉及使用[`HfApi`]客户端进行HTTP请求。让我们来看一看每种方法的优缺点。 - -## 存储库:基于历史的 Git 方法 - -最初,`huggingface_hub`主要围绕 [`Repository`] 类构建。它为常见的 `git` 命令(如 `"git add"`、`"git commit"`、`"git push"`、`"git tag"`、`"git checkout"` 等)提供了 Python 包装器 - -该库还可以帮助设置凭据和跟踪大型文件,这些文件通常在机器学习存储库中使用。此外,该库允许您在后台执行其方法,使其在训练期间上传数据很有用。 - -使用 [`Repository`] 的最大优点是它允许你在本地机器上维护整个存储库的本地副本。这也可能是一个缺点,因为它需要你不断更新和维护这个本地副本。这类似于传统软件开发中,每个开发人员都维护自己的本地副本,并在开发功能时推送更改。但是,在机器学习的上下文中,这可能并不总是必要的,因为用户可能只需要下载推理所需的权重,或将权重从一种格式转换为另一种格式,而无需克隆整个存储库。 - -## HfApi: 一个功能强大且方便的HTTP客户端 - -`HfApi` 被开发为本地 git 存储库的替代方案,因为本地 git 存储库在处理大型模型或数据集时可能会很麻烦。`HfApi` 提供与基于 git 的方法相同的功能,例如下载和推送文件以及创建分支和标签,但无需本地文件夹来保持同步。 - -`HfApi`除了提供 `git` 已经提供的功能外,还提供其他功能,例如: - -* 管理存储库 -* 使用缓存下载文件以进行有效的重复使用 -* 在 Hub 中搜索存储库和元数据 -* 访问社区功能,如讨论、PR和评论 -* 配置Spaces - -## 我应该使用什么?以及何时使用? - -总的来说,在大多数情况下,`HTTP 方法`是使用 huggingface_hub 的推荐方法。但是,在以下几种情况下,维护本地 git 克隆(使用 `Repository`)可能更有益: - -如果您在本地机器上训练模型,使用传统的 git 工作流程并定期推送更新可能更有效。`Repository` 被优化为此类情况,因为它能够在后台运行。 -如果您需要手动编辑大型文件,`git `是最佳选择,因为它只会将文件的差异发送到服务器。使用 `HfAPI` 客户端,每次编辑都会上传整个文件。请记住,大多数大型文件是二进制文件,因此无法从 git 差异中受益。 - -并非所有 git 命令都通过 [`HfApi`] 提供。有些可能永远不会被实现,但我们一直在努力改进并缩小差距。如果您没有看到您的用例被覆盖。 - -请在[Github](https://github.com/huggingface/huggingface_hub)打开一个 issue!我们欢迎反馈,以帮助我们与我们的用户一起构建 🤗 生态系统。 diff --git a/docs/source/cn/guides/repository.md b/docs/source/cn/guides/repository.md index ac64acd90b..fd827fa71a 100644 --- a/docs/source/cn/guides/repository.md +++ b/docs/source/cn/guides/repository.md @@ -153,91 +153,3 @@ GitRefs( >>> from huggingface_hub import move_repo >>> move_repo(from_id="Wauplin/cool-model", to_id="huggingface/cool-model") ``` - -## 管理存储库的本地副本 - -上述所有操作都可以通过HTTP请求完成。然而,在某些情况下,您可能希望在本地拥有存储库的副本,并使用您熟悉的Git命令与之交互。 - -[`Repository`] 类允许您使用类似于Git命令的函数与Hub上的文件和存储库进行交互。它是对Git和Git-LFS方法的包装,以使用您已经了解和喜爱的Git命令。在开始之前,请确保已安装Git-LFS(请参阅[此处](https://git-lfs.github.com/)获取安装说明)。 - -### 使用本地存储库 - -使用本地存储库路径实例化一个 [`Repository`] 对象: - -请运行以下代码: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="//") -``` - -### 克隆 - -`clone_from`参数将一个存储库从Hugging Face存储库ID克隆到由 `local_dir`参数指定的本地目录: - -请运行以下代码: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="w2v2", clone_from="facebook/wav2vec2-large-960h-lv60") -``` -`clone_from`还可以使用URL克隆存储库: - -请运行以下代码: - -```py ->>> repo = Repository(local_dir="huggingface-hub", clone_from="https://huggingface.co/facebook/wav2vec2-large-960h-lv60") -``` - -你可以将`clone_from`参数与[`create_repo`]结合使用,以创建并克隆一个存储库: - -请运行以下代码: - -```py ->>> repo_url = create_repo(repo_id="repo_name") ->>> repo = Repository(local_dir="repo_local_path", clone_from=repo_url) -``` - -当你克隆一个存储库时,通过在克隆时指定`git_user`和`git_email`参数,你还可以为克隆的存储库配置Git用户名和电子邮件。当用户提交到该存储库时,Git将知道提交的作者是谁。 - -请运行以下代码: - -```py ->>> repo = Repository( -... "my-dataset", -... clone_from="/", -... token=True, -... repo_type="dataset", -... git_user="MyName", -... git_email="me@cool.mail" -... ) -``` - -### 分支 - -分支对于协作和实验而不影响当前文件和代码非常重要。使用[`~Repository.git_checkout`]来在不同的分支之间切换。例如,如果你想从 `branch1`切换到 `branch2`: - -请运行以下代码: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="huggingface-hub", clone_from="/", revision='branch1') ->>> repo.git_checkout("branch2") -``` - -### 拉取 - -[`~Repository.git_pull`] 允许你使用远程存储库的更改更新当前本地分支: - -请运行以下代码: - -```py ->>> from huggingface_hub import Repository ->>> repo.git_pull() -``` - -如果你希望本地的提交发生在你的分支被远程的新提交更新之后,请设置`rebase=True`: - -```py ->>> repo.git_pull(rebase=True) -``` diff --git a/docs/source/cn/installation.md b/docs/source/cn/installation.md index c800b4b173..516d8b9f70 100644 --- a/docs/source/cn/installation.md +++ b/docs/source/cn/installation.md @@ -6,7 +6,7 @@ rendered properly in your Markdown viewer. 在开始之前,您需要通过安装适当的软件包来设置您的环境 -huggingface_hub 在 Python 3.8 或更高版本上进行了测试,可以保证在这些版本上正常运行。如果您使用的是 Python 3.7 或更低版本,可能会出现兼容性问题 +huggingface_hub 在 Python 3.9 或更高版本上进行了测试,可以保证在这些版本上正常运行。如果您使用的是 Python 3.7 或更低版本,可能会出现兼容性问题 ## 使用 pip 安装 @@ -48,11 +48,7 @@ pip install --upgrade huggingface_hub 您可以通过`pip`安装可选依赖项,请运行以下代码: ```bash -# 安装 TensorFlow 特定功能的依赖项 -# /!\ 注意:这不等同于 `pip install tensorflow` -pip install 'huggingface_hub[tensorflow]' - -# 安装 TensorFlow 特定功能和 CLI 特定功能的依赖项 +# 安装 Torch 特定功能和 CLI 特定功能的依赖项 pip install 'huggingface_hub[cli,torch]' ``` @@ -60,7 +56,7 @@ pip install 'huggingface_hub[cli,torch]' - `cli`:为 `huggingface_hub` 提供更方便的命令行界面 -- `fastai`,` torch`, `tensorflow`: 运行框架特定功能所需的依赖项 +- `fastai`,` torch`: 运行框架特定功能所需的依赖项 - `dev`:用于为库做贡献的依赖项。包括 `testing`(用于运行测试)、`typing`(用于运行类型检查器)和 `quality`(用于运行 linter) diff --git a/docs/source/de/_toctree.yml b/docs/source/de/_toctree.yml index 48807ba0d8..2b994c7cc6 100644 --- a/docs/source/de/_toctree.yml +++ b/docs/source/de/_toctree.yml @@ -34,7 +34,3 @@ title: Integrieren einer Bibliothek - local: guides/webhooks_server title: Webhooks server -- title: "Konzeptionelle Anleitungen" - sections: - - local: concepts/git_vs_http - title: Git vs. HTTP-Paradigma diff --git a/docs/source/de/concepts/git_vs_http.md b/docs/source/de/concepts/git_vs_http.md deleted file mode 100644 index 978123762a..0000000000 --- a/docs/source/de/concepts/git_vs_http.md +++ /dev/null @@ -1,69 +0,0 @@ - - -# Git vs. HTTP-Paradigma - -Die `huggingface_hub`-Bibliothek ist eine Bibliothek zur Interaktion mit dem Hugging Face -Hub, einer Sammlung von auf Git basierenden Repositories (Modelle, Datensätze oder -Spaces). Es gibt zwei Hauptmethoden, um auf den Hub mit `huggingface_hub` zuzugreifen. - -Der erste Ansatz, der sogenannte "Git-basierte" Ansatz, wird von der [`Repository`] Klasse -geleitet. Diese Methode verwendet einen Wrapper um den `git`-Befehl mit zusätzlichen -Funktionen, die speziell für die Interaktion mit dem Hub entwickelt wurden. Die zweite -Option, die als "HTTP-basierter" Ansatz bezeichnet wird, umfasst das Senden von -HTTP-Anfragen mit dem [`HfApi`] Client. Schauen wir uns die Vor- und Nachteile jeder -Methode an. - -## Repository: Der historische git-basierte Ansatz - -Ursprünglich wurde `huggingface_hub` größtenteils um die [`Repository`] Klasse herum -entwickelt. Sie bietet Python-Wrapper für gängige git-Befehle wie `"git add"`, `"git commit"`, -`"git push"`, `"git tag"`, `"git checkout"` usw. - -Die Bibliothek hilft auch beim Festlegen von Zugangsdaten und beim Tracking von großen -Dateien, die in Machine-Learning-Repositories häufig verwendet werden. Darüber hinaus -ermöglicht die Bibliothek das Ausführen ihrer Methoden im Hintergrund, was nützlich ist, -um Daten während des Trainings hochzuladen. - -Der Hauptvorteil bei der Verwendung einer [`Repository`] besteht darin, dass Sie eine -lokale Kopie des gesamten Repositorys auf Ihrem Computer pflegen können. Dies kann jedoch -auch ein Nachteil sein, da es erfordert, diese lokale Kopie ständig zu aktualisieren und -zu pflegen. Dies ähnelt der traditionellen Softwareentwicklung, bei der jeder Entwickler -eine eigene lokale Kopie pflegt und Änderungen überträgt, wenn an einer Funktion -gearbeitet wird. Im Kontext des Machine Learning ist dies jedoch nicht immer erforderlich, -da Benutzer möglicherweise nur Gewichte für die Inferenz herunterladen oder Gewichte von -einem Format in ein anderes konvertieren müssen, ohne das gesamte Repository zu klonen. - -## HfApi: Ein flexibler und praktischer HTTP-Client - -Die [`HfApi`] Klasse wurde entwickelt, um eine Alternative zu lokalen Git-Repositories -bereitzustellen, die besonders bei der Arbeit mit großen Modellen oder Datensätzen -umständlich zu pflegen sein können. Die [`HfApi`] Klasse bietet die gleiche Funktionalität -wie git-basierte Ansätze, wie das Herunterladen und Hochladen von Dateien sowie das -Erstellen von Branches und Tags, jedoch ohne die Notwendigkeit eines lokalen Ordners, der -synchronisiert werden muss. - -Zusätzlich zu den bereits von `git` bereitgestellten Funktionen bietet die [`HfApi`] -Klasse zusätzliche Features wie die Möglichkeit, Repositories zu verwalten, Dateien mit -Caching für effiziente Wiederverwendung herunterzuladen, im Hub nach Repositories und -Metadaten zu suchen, auf Community-Funktionen wie Diskussionen, Pull Requests und -Kommentare zuzugreifen und Spaces-Hardware und Geheimnisse zu konfigurieren. - -## Was sollte ich verwenden ? Und wann ? - -Insgesamt ist der **HTTP-basierte Ansatz in den meisten Fällen die empfohlene Methode zur Verwendung von** -`huggingface_hub`. Es gibt jedoch einige Situationen, in denen es vorteilhaft sein kann, -eine lokale Git-Kopie (mit [`Repository`]) zu pflegen: -- Wenn Sie ein Modell auf Ihrem Computer trainieren, kann es effizienter sein, einen -herkömmlichen git-basierten Workflow zu verwenden und regelmäßige Updates zu pushen. -[`Repository`] ist für diese Art von Situation mit seiner Fähigkeit zur Hintergrundarbeit optimiert. -- Wenn Sie große Dateien manuell bearbeiten müssen, ist `git` die beste Option, da es nur -die Differenz an den Server sendet. Mit dem [`HfAPI`] Client wird die gesamte Datei bei -jeder Bearbeitung hochgeladen. Beachten Sie jedoch, dass die meisten großen Dateien binär -sind und daher sowieso nicht von Git-Diffs profitieren. - -Nicht alle Git-Befehle sind über [`HfApi`] verfügbar. Einige werden vielleicht nie -implementiert, aber wir bemühen uns ständig, die Lücken zu schließen und zu verbessern. -Wenn Sie Ihren Anwendungsfall nicht abgedeckt sehen, öffnen Sie bitte [ein Issue auf -Github](https://github.com/huggingface/huggingface_hub)! Wir freuen uns über Feedback, um das 🤗-Ökosystem mit und für unsere Benutzer aufzubauen. diff --git a/docs/source/de/guides/inference.md b/docs/source/de/guides/inference.md index 0fbc1ac378..dbb0ea19da 100644 --- a/docs/source/de/guides/inference.md +++ b/docs/source/de/guides/inference.md @@ -8,7 +8,6 @@ Inferenz ist der Prozess, bei dem ein trainiertes Modell verwendet wird, um Vorh - [Inferenz API](https://huggingface.co/docs/api-inference/index): ein Service, der Ihnen ermöglicht, beschleunigte Inferenz auf der Infrastruktur von Hugging Face kostenlos auszuführen. Dieser Service ist eine schnelle Möglichkeit, um anzufangen, verschiedene Modelle zu testen und AI-Produkte zu prototypisieren. - [Inferenz Endpunkte](https://huggingface.co/inference-endpoints/index): ein Produkt zur einfachen Bereitstellung von Modellen im Produktivbetrieb. Die Inferenz wird von Hugging Face in einer dedizierten, vollständig verwalteten Infrastruktur auf einem Cloud-Anbieter Ihrer Wahl durchgeführt. -Diese Dienste können mit dem [`InferenceClient`] Objekt aufgerufen werden. Dieser fungiert als Ersatz für den älteren [`InferenceApi`] Client und fügt spezielle Unterstützung für Aufgaben und das Ausführen von Inferenz hinzu, sowohl auf [Inferenz API](https://huggingface.co/docs/api-inference/index) als auch auf [Inferenz Endpunkten](https://huggingface.co/docs/inference-endpoints/index). Im Abschnitt [Legacy InferenceAPI client](#legacy-inferenceapi-client) erfahren Sie, wie Sie zum neuen Client migrieren können. > [!TIP] > [`InferenceClient`] ist ein Python-Client, der HTTP-Anfragen an unsere APIs stellt. Wenn Sie die HTTP-Anfragen direkt mit Ihrem bevorzugten Tool (curl, postman,...) durchführen möchten, lesen Sie bitte die Dokumentationsseiten der [Inferenz API](https://huggingface.co/docs/api-inference/index) oder der [Inferenz Endpunkte](https://huggingface.co/docs/inference-endpoints/index). @@ -77,34 +76,34 @@ Aufrufe, die mit dem [`InferenceClient`] gemacht werden, können mit einem [User Das Ziel von [`InferenceClient`] ist es, die einfachste Schnittstelle zum Ausführen von Inferenzen auf Hugging Face-Modellen bereitzustellen. Es verfügt über eine einfache API, die die gebräuchlichsten Aufgaben unterstützt. Hier ist eine Liste der derzeit unterstützten Aufgaben: -| Domäne | Aufgabe | Unterstützt | Dokumentation | -|--------|--------------------------------|--------------|------------------------------------| -| Audio | [Audio Classification](https://huggingface.co/tasks/audio-classification) | ✅ | [`~InferenceClient.audio_classification`] | -| | [Automatic Speech Recognition](https://huggingface.co/tasks/automatic-speech-recognition) | ✅ | [`~InferenceClient.automatic_speech_recognition`] | -| | [Text-to-Speech](https://huggingface.co/tasks/text-to-speech) | ✅ | [`~InferenceClient.text_to_speech`] | -| Computer Vision | [Image Classification](https://huggingface.co/tasks/image-classification) | ✅ | [`~InferenceClient.image_classification`] | -| | [Image Segmentation](https://huggingface.co/tasks/image-segmentation) | ✅ | [`~InferenceClient.image_segmentation`] | -| | [Image-to-Image](https://huggingface.co/tasks/image-to-image) | ✅ | [`~InferenceClient.image_to_image`] | -| | [Image-to-Text](https://huggingface.co/tasks/image-to-text) | ✅ | [`~InferenceClient.image_to_text`] | -| | [Object Detection](https://huggingface.co/tasks/object-detection) | ✅ | [`~InferenceClient.object_detection`] | -| | [Text-to-Image](https://huggingface.co/tasks/text-to-image) | ✅ | [`~InferenceClient.text_to_image`] | -| | [Zero-Shot-Image-Classification](https://huggingface.co/tasks/zero-shot-image-classification) | ✅ | [`~InferenceClient.zero_shot_image_classification`] | -| Multimodal | [Documentation Question Answering](https://huggingface.co/tasks/document-question-answering) | ✅ | [`~InferenceClient.document_question_answering`] | -| | [Visual Question Answering](https://huggingface.co/tasks/visual-question-answering) | ✅ | [`~InferenceClient.visual_question_answering`] | -| NLP | [Conversational](https://huggingface.co/tasks/conversational) | ✅ | [`~InferenceClient.conversational`] | -| | [Feature Extraction](https://huggingface.co/tasks/feature-extraction) | ✅ | [`~InferenceClient.feature_extraction`] | -| | [Fill Mask](https://huggingface.co/tasks/fill-mask) | ✅ | [`~InferenceClient.fill_mask`] | -| | [Question Answering](https://huggingface.co/tasks/question-answering) | ✅ | [`~InferenceClient.question_answering`] | -| | [Sentence Similarity](https://huggingface.co/tasks/sentence-similarity) | ✅ | [`~InferenceClient.sentence_similarity`] | -| | [Summarization](https://huggingface.co/tasks/summarization) | ✅ | [`~InferenceClient.summarization`] | -| | [Table Question Answering](https://huggingface.co/tasks/table-question-answering) | ✅ | [`~InferenceClient.table_question_answering`] | -| | [Text Classification](https://huggingface.co/tasks/text-classification) | ✅ | [`~InferenceClient.text_classification`] | -| | [Text Generation](https://huggingface.co/tasks/text-generation) | ✅ | [`~InferenceClient.text_generation`] | -| | [Token Classification](https://huggingface.co/tasks/token-classification) | ✅ | [`~InferenceClient.token_classification`] | -| | [Translation](https://huggingface.co/tasks/translation) | ✅ | [`~InferenceClient.translation`] | -| | [Zero Shot Classification](https://huggingface.co/tasks/zero-shot-classification) | ✅ | [`~InferenceClient.zero_shot_classification`] | -| Tabular | [Tabular Classification](https://huggingface.co/tasks/tabular-classification) | ✅ | [`~InferenceClient.tabular_classification`] | -| | [Tabular Regression](https://huggingface.co/tasks/tabular-regression) | ✅ | [`~InferenceClient.tabular_regression`] | +| Domäne | Aufgabe | Unterstützt | Dokumentation | +| --------------- | --------------------------------------------------------------------------------------------- | ----------- | --------------------------------------------------- | +| Audio | [Audio Classification](https://huggingface.co/tasks/audio-classification) | ✅ | [`~InferenceClient.audio_classification`] | +| | [Automatic Speech Recognition](https://huggingface.co/tasks/automatic-speech-recognition) | ✅ | [`~InferenceClient.automatic_speech_recognition`] | +| | [Text-to-Speech](https://huggingface.co/tasks/text-to-speech) | ✅ | [`~InferenceClient.text_to_speech`] | +| Computer Vision | [Image Classification](https://huggingface.co/tasks/image-classification) | ✅ | [`~InferenceClient.image_classification`] | +| | [Image Segmentation](https://huggingface.co/tasks/image-segmentation) | ✅ | [`~InferenceClient.image_segmentation`] | +| | [Image-to-Image](https://huggingface.co/tasks/image-to-image) | ✅ | [`~InferenceClient.image_to_image`] | +| | [Image-to-Text](https://huggingface.co/tasks/image-to-text) | ✅ | [`~InferenceClient.image_to_text`] | +| | [Object Detection](https://huggingface.co/tasks/object-detection) | ✅ | [`~InferenceClient.object_detection`] | +| | [Text-to-Image](https://huggingface.co/tasks/text-to-image) | ✅ | [`~InferenceClient.text_to_image`] | +| | [Zero-Shot-Image-Classification](https://huggingface.co/tasks/zero-shot-image-classification) | ✅ | [`~InferenceClient.zero_shot_image_classification`] | +| Multimodal | [Documentation Question Answering](https://huggingface.co/tasks/document-question-answering) | ✅ | [`~InferenceClient.document_question_answering`] | +| | [Visual Question Answering](https://huggingface.co/tasks/visual-question-answering) | ✅ | [`~InferenceClient.visual_question_answering`] | +| NLP | [Conversational](https://huggingface.co/tasks/conversational) | ✅ | [`~InferenceClient.conversational`] | +| | [Feature Extraction](https://huggingface.co/tasks/feature-extraction) | ✅ | [`~InferenceClient.feature_extraction`] | +| | [Fill Mask](https://huggingface.co/tasks/fill-mask) | ✅ | [`~InferenceClient.fill_mask`] | +| | [Question Answering](https://huggingface.co/tasks/question-answering) | ✅ | [`~InferenceClient.question_answering`] | +| | [Sentence Similarity](https://huggingface.co/tasks/sentence-similarity) | ✅ | [`~InferenceClient.sentence_similarity`] | +| | [Summarization](https://huggingface.co/tasks/summarization) | ✅ | [`~InferenceClient.summarization`] | +| | [Table Question Answering](https://huggingface.co/tasks/table-question-answering) | ✅ | [`~InferenceClient.table_question_answering`] | +| | [Text Classification](https://huggingface.co/tasks/text-classification) | ✅ | [`~InferenceClient.text_classification`] | +| | [Text Generation](https://huggingface.co/tasks/text-generation) | ✅ | [`~InferenceClient.text_generation`] | +| | [Token Classification](https://huggingface.co/tasks/token-classification) | ✅ | [`~InferenceClient.token_classification`] | +| | [Translation](https://huggingface.co/tasks/translation) | ✅ | [`~InferenceClient.translation`] | +| | [Zero Shot Classification](https://huggingface.co/tasks/zero-shot-classification) | ✅ | [`~InferenceClient.zero_shot_classification`] | +| Tabular | [Tabular Classification](https://huggingface.co/tasks/tabular-classification) | ✅ | [`~InferenceClient.tabular_classification`] | +| | [Tabular Regression](https://huggingface.co/tasks/tabular-regression) | ✅ | [`~InferenceClient.tabular_regression`] | > [!TIP] @@ -175,90 +174,3 @@ Einige Aufgaben erfordern binäre Eingaben, zum Beispiel bei der Arbeit mit Bild [{'score': 0.9779096841812134, 'label': 'Blenheim spaniel'}, ...] ``` -## Legacy InferenceAPI client - -Der [`InferenceClient`] dient als Ersatz für den veralteten [`InferenceApi`]-Client. Er bietet spezifische Unterstützung für Aufgaben und behandelt Inferenz sowohl auf der [Inferenz API](https://huggingface.co/docs/api-inference/index) als auch auf den [Inferenz Endpunkten](https://huggingface.co/docs/inference-endpoints/index). - -Hier finden Sie eine kurze Anleitung, die Ihnen hilft, von [`InferenceApi`] zu [`InferenceClient`] zu migrieren. - -### Initialisierung - -Ändern Sie von - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="bert-base-uncased", token=API_TOKEN) -``` - -zu - -```python ->>> from huggingface_hub import InferenceClient ->>> inference = InferenceClient(model="bert-base-uncased", token=API_TOKEN) -``` - -### Ausführen einer bestimmten Aufgabe - -Ändern Sie von - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="paraphrase-xlm-r-multilingual-v1", task="feature-extraction") ->>> inference(...) -``` - -zu - -```python ->>> from huggingface_hub import InferenceClient ->>> inference = InferenceClient() ->>> inference.feature_extraction(..., model="paraphrase-xlm-r-multilingual-v1") -``` - -> [!TIP] -> Dies ist der empfohlene Weg, um Ihren Code an [`InferenceClient`] anzupassen. Dadurch können Sie von den aufgabenspezifischen Methoden wie `feature_extraction` profitieren. - -### Eigene Anfragen ausführen - -Ändern Sie von - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="bert-base-uncased") ->>> inference(inputs="The goal of life is [MASK].") -[{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}] -``` -zu - -```python ->>> from huggingface_hub import InferenceClient ->>> client = InferenceClient() ->>> response = client.post(json={"inputs": "The goal of life is [MASK]."}, model="bert-base-uncased") ->>> response.json() -[{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}] -``` - -### Mit Parametern ausführen - -Ändern Sie von - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="typeform/distilbert-base-uncased-mnli") ->>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!" ->>> params = {"candidate_labels":["refund", "legal", "faq"]} ->>> inference(inputs, params) -{'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]} -``` - -zu - -```python ->>> from huggingface_hub import InferenceClient ->>> client = InferenceClient() ->>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!" ->>> params = {"candidate_labels":["refund", "legal", "faq"]} ->>> response = client.post(json={"inputs": inputs, "parameters": params}, model="typeform/distilbert-base-uncased-mnli") ->>> response.json() -{'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]} -``` diff --git a/docs/source/de/guides/integrations.md b/docs/source/de/guides/integrations.md index 06384c80da..34e9bae3ce 100644 --- a/docs/source/de/guides/integrations.md +++ b/docs/source/de/guides/integrations.md @@ -82,7 +82,7 @@ Obwohl dieser Ansatz flexibel ist, hat er einige Nachteile, insbesondere in Bezu - `token`: zum Herunterladen aus einem privaten Repository - `revision`: zum Herunterladen von einem spezifischen Branch - `cache_dir`: um Dateien in einem spezifischen Verzeichnis zu cachen -- `force_download`/`resume_download`/`local_files_only`: um den Cache wieder zu verwenden oder nicht +- `force_download`/`local_files_only`: um den Cache wieder zu verwenden oder nicht - `api_endpoint`/`proxies`: HTTP-Session konfigurieren Beim Pushen von Modellen werden ähnliche Parameter unterstützt: @@ -202,8 +202,7 @@ class PyTorchModelHubMixin(ModelHubMixin): revision: str, cache_dir: str, force_download: bool, - proxies: Optional[Dict], - resume_download: bool, + proxies: Optional[dict], local_files_only: bool, token: Union[str, bool, None], map_location: str = "cpu", # zusätzliches Argument @@ -221,8 +220,6 @@ class PyTorchModelHubMixin(ModelHubMixin): revision=revision, cache_dir=cache_dir, force_download=force_download, - proxies=proxies, - resume_download=resume_download, token=token, local_files_only=local_files_only, ) @@ -242,9 +239,9 @@ Und das war's! Ihre Bibliothek ermöglicht es Benutzern nun, Dateien vom und zum Lassen Sie uns die beiden Ansätze, die wir gesehen haben, schnell mit ihren Vor- und Nachteilen zusammenfassen. Die untenstehende Tabelle ist nur indikativ. Ihr Framework könnte einige Besonderheiten haben, die Sie berücksichtigen müssen. Dieser Leitfaden soll nur Richtlinien und Ideen geben, wie Sie die Integration handhaben können. Kontaktieren Sie uns in jedem Fall, wenn Sie Fragen haben! -| Integration | Mit Helfern | Mit [`ModelHubMixin`] | -|:---:|:---:|:---:| -| Benutzererfahrung | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | -| Flexibilität | Sehr flexibel.
Sie haben die volle Kontrolle über die Implementierung. | Weniger flexibel.
Ihr Framework muss eine Modellklasse haben. | -| Wartung | Mehr Wartung, um Unterstützung für Konfiguration und neue Funktionen hinzuzufügen. Könnte auch das Beheben von Benutzerproblemen erfordern. | Weniger Wartung, da die meisten Interaktionen mit dem Hub in `huggingface_hub` implementiert sind. | -| Dokumentation/Typ-Annotation| Manuell zu schreiben. | Teilweise durch `huggingface_hub` behandelt. | +| Integration | Mit Helfern | Mit [`ModelHubMixin`] | +| :--------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------: | +| Benutzererfahrung | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | +| Flexibilität | Sehr flexibel.
Sie haben die volle Kontrolle über die Implementierung. | Weniger flexibel.
Ihr Framework muss eine Modellklasse haben. | +| Wartung | Mehr Wartung, um Unterstützung für Konfiguration und neue Funktionen hinzuzufügen. Könnte auch das Beheben von Benutzerproblemen erfordern. | Weniger Wartung, da die meisten Interaktionen mit dem Hub in `huggingface_hub` implementiert sind. | +| Dokumentation/Typ-Annotation | Manuell zu schreiben. | Teilweise durch `huggingface_hub` behandelt. | diff --git a/docs/source/de/installation.md b/docs/source/de/installation.md index 3ba965bd4b..a603d25558 100644 --- a/docs/source/de/installation.md +++ b/docs/source/de/installation.md @@ -6,7 +6,7 @@ rendered properly in your Markdown viewer. Bevor Sie beginnen, müssen Sie Ihre Umgebung vorbereiten, indem Sie die entsprechenden Pakete installieren. -`huggingface_hub` wurde für **Python 3.8+** getestet. +`huggingface_hub` wurde für **Python 3.9+** getestet. ## Installation mit pip @@ -44,10 +44,6 @@ Einige Abhängigkeiten von `huggingface_hub` sind [optional](https://setuptools. Sie können optionale Abhängigkeiten über `pip` installieren: ```bash -# Abhängigkeiten für spezifische TensorFlow-Funktionen installieren -# /!\ Achtung: dies entspricht nicht `pip install tensorflow` -pip install 'huggingface_hub[tensorflow]' - # Abhängigkeiten sowohl für torch-spezifische als auch für CLI-spezifische Funktionen installieren. pip install 'huggingface_hub[cli,torch]' ``` @@ -55,7 +51,7 @@ pip install 'huggingface_hub[cli,torch]' Hier ist die Liste der optionalen Abhängigkeiten in huggingface_hub: - `cli`: bietet eine komfortablere CLI-Schnittstelle für huggingface_hub. -- `fastai`, `torch`, `tensorflow`: Abhängigkeiten, um framework-spezifische Funktionen auszuführen. +- `fastai`, `torch`: Abhängigkeiten, um framework-spezifische Funktionen auszuführen. - `dev`: Abhängigkeiten, um zur Bibliothek beizutragen. Enthält `testing` (um Tests auszuführen), `typing` (um den Type Checker auszuführen) und `quality` (um Linters auszuführen). diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 4c03a41c7b..5407e0374a 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -46,6 +46,8 @@ sections: - local: concepts/git_vs_http title: Git vs HTTP paradigm + - local: concepts/migration + title: Migrating to huggingface_hub v1.0 - title: 'Reference' sections: - local: package_reference/overview @@ -54,8 +56,6 @@ title: Authentication - local: package_reference/environment_variables title: Environment variables - - local: package_reference/repository - title: Managing local and online repositories - local: package_reference/hf_api title: Hugging Face Hub API - local: package_reference/file_download diff --git a/docs/source/en/concepts/git_vs_http.md b/docs/source/en/concepts/git_vs_http.md index fe5ca4a8fc..49d0370752 100644 --- a/docs/source/en/concepts/git_vs_http.md +++ b/docs/source/en/concepts/git_vs_http.md @@ -4,56 +4,28 @@ rendered properly in your Markdown viewer. # Git vs HTTP paradigm -The `huggingface_hub` library is a library for interacting with the Hugging Face Hub, which is a -collection of git-based repositories (models, datasets or Spaces). There are two main -ways to access the Hub using `huggingface_hub`. - -The first approach, the so-called "git-based" approach, is led by the [`Repository`] class. -This method uses a wrapper around the `git` command with additional functions specifically -designed to interact with the Hub. The second option, called the "HTTP-based" approach, -involves making HTTP requests using the [`HfApi`] client. Let's examine the pros and cons -of each approach. - -## Repository: the historical git-based approach - -At first, `huggingface_hub` was mostly built around the [`Repository`] class. It provides -Python wrappers for common `git` commands such as `"git add"`, `"git commit"`, `"git push"`, -`"git tag"`, `"git checkout"`, etc. - -The library also helps with setting credentials and tracking large files, which are often -used in machine learning repositories. Additionally, the library allows you to execute its -methods in the background, making it useful for uploading data during training. - -The main advantage of using a [`Repository`] is that it allows you to maintain a local -copy of the entire repository on your machine. This can also be a disadvantage as -it requires you to constantly update and maintain this local copy. This is similar to -traditional software development where each developer maintains their own local copy and -pushes changes when working on a feature. However, in the context of machine learning, -this may not always be necessary as users may only need to download weights for inference -or convert weights from one format to another without the need to clone the entire -repository. - -> [!WARNING] -> [`Repository`] is now deprecated in favor of the http-based alternatives. Given its large adoption in legacy code, the complete removal of [`Repository`] will only happen in release `v1.0`. +The `huggingface_hub` library is a library for interacting with the Hugging Face Hub, which is a collection of git-based repositories (models, datasets or Spaces). There are two main ways to access the Hub using `huggingface_hub`. + +The first approach, the so-called "git-based" approach, relies on using standard `git` commands directly in a terminal. This method allows you to clone repositories, create commits, and push changes manually. The second option, called the "HTTP-based" approach, involves making HTTP requests using the [`HfApi`] client. Let's examine the pros and cons of each approach. + +## Git: the historical CLI-based approach + +At first, most users interacted with the Hugging Face Hub using plain `git` commands such as `git clone`, `git add`, `git commit`, `git push`, `git tag`, or `git checkout`. + +This approach lets you work with a full local copy of the repository on your machine, just like in traditional software development. This can be an advantage when you need offline access or want to work with the full history of a repository. However, it also comes with downsides: you are responsible for keeping the repository up-to-date locally, handling credentials, and managing large files (via `git-lfs`), which can become cumbersome when working with large machine learning models or datasets. + +In many machine learning workflows, you may only need to download a few files for inference or convert weights without needing to clone the entire repository. In such cases, using `git` can be overkill and introduce unnecessary complexity. ## HfApi: a flexible and convenient HTTP client -The [`HfApi`] class was developed to provide an alternative to local git repositories, which -can be cumbersome to maintain, especially when dealing with large models or datasets. The -[`HfApi`] class offers the same functionality as git-based approaches, such as downloading -and pushing files and creating branches and tags, but without the need for a local folder -that needs to be kept in sync. +The [`HfApi`] class was developed to provide an alternative to using local git repositories, which can be cumbersome to maintain, especially when dealing with large models or datasets. The [`HfApi`] class offers the same functionality as git-based workflows -such as downloading and pushing files and creating branches and tags- but without the need for a local folder that needs to be kept in sync. -In addition to the functionalities already provided by `git`, the [`HfApi`] class offers -additional features, such as the ability to manage repos, download files using caching for -efficient reuse, search the Hub for repos and metadata, access community features such as -discussions, PRs, and comments, and configure Spaces hardware and secrets. +In addition to the functionalities already provided by `git`, the [`HfApi`] class offers additional features, such as the ability to manage repos, download files using caching for efficient reuse, search the Hub for repos and metadata, access community features such as discussions, PRs, and comments, and configure Spaces hardware and secrets. ## What should I use ? And when ? -Overall, the **HTTP-based approach is the recommended way to use** `huggingface_hub` -in all cases. [`HfApi`] allows to pull and push changes, work with PRs, tags and branches, interact with discussions and much more. Since the `0.16` release, the http-based methods can also run in the background, which was the last major advantage of the [`Repository`] class. +Overall, the **HTTP-based approach is the recommended way to use** `huggingface_hub` in all cases. [`HfApi`] allows you to pull and push changes, work with PRs, tags and branches, interact with discussions and much more. -However, not all git commands are available through [`HfApi`]. Some may never be implemented, but we are always trying to improve and close the gap. If you don't see your use case covered, please open [an issue on Github](https://github.com/huggingface/huggingface_hub)! We welcome feedback to help build the 🤗 ecosystem with and for our users. +However, not all git commands are available through [`HfApi`]. Some may never be implemented, but we are always trying to improve and close the gap. If you don't see your use case covered, please open [an issue on GitHub](https://github.com/huggingface/huggingface_hub)! We welcome feedback to help build the HF ecosystem with and for our users. -This preference of the http-based [`HfApi`] over the git-based [`Repository`] does not mean that git versioning will disappear from the Hugging Face Hub anytime soon. It will always be possible to use `git` commands locally in workflows where it makes sense. +This preference for the HTTP-based [`HfApi`] over direct `git` commands does not mean that git versioning will disappear from the Hugging Face Hub anytime soon. It will always be possible to use `git` locally in workflows where it makes sense. \ No newline at end of file diff --git a/docs/source/en/concepts/migration.md b/docs/source/en/concepts/migration.md new file mode 100644 index 0000000000..2f60edc53d --- /dev/null +++ b/docs/source/en/concepts/migration.md @@ -0,0 +1,95 @@ +# Migrating to huggingface_hub v1.0 + +The v1.0 release is a major milestone for the `huggingface_hub` library. It marks our commitment to API stability and the maturity of the library. We have made several improvements and breaking changes to make the library more robust and easier to use. + +This guide is intended to help you migrate your existing code to the new version. If you have any questions or feedback, please let us know by [opening an issue on GitHub](https://github.com/huggingface/huggingface_hub/issues). + +## Python 3.9+ + +`huggingface_hub` now requires Python 3.9 or higher. Python 3.8 is no longer supported. + +## HTTPX migration + +The `huggingface_hub` library now uses [`httpx`](https://www.python-httpx.org/) instead of `requests` for HTTP requests. This change was made to improve performance and to support both synchronous and asynchronous requests the same way. We therefore dropped both `requests` and `aiohttp` dependencies. + +### Breaking changes + +This is a major change that affects the entire library. While we have tried to make this change as transparent as possible, you may need to update your code in some cases. Here is a list of breaking changes introduced in the process: + +- **Proxy configuration**: "per method" proxies are no longer supported. Proxies must be configured globally using the `HTTP_PROXY` and `HTTPS_PROXY` environment variables. +- **Custom HTTP backend**: The `configure_http_backend` function has been removed. You should now use [`set_client_factory`] and [`set_async_client_factory`] to configure the HTTP clients. +- **Error handling**: HTTP errors are not inherited from `requests.HTTPError` anymore, but from `httpx.HTTPError`. We recommend catching `huggingface_hub.HfHubHttpError` which is a subclass of `requests.HTTPError` in v0.x and of `httpx.HTTPError` in v1.x. Catching from the `huggingface_hub` error ensures your code is compatible with both the old and new versions of the library. +- **SSLError**: `httpx` does not have the concept of `SSLError`. It is now a generic `httpx.ConnectError`. +- **`LocalEntryNotFoundError`**: This error no longer inherits from `HTTPError`. We now define a `EntryNotFoundError` (new) that is inherited by both [`LocalEntryNotFoundError`] (if file not found in local cache) and [`RemoteEntryNotFoundError`] (if file not found in repo on the Hub). Only the remote error inherits from `HTTPError`. +- **`InferenceClient`**: The `InferenceClient` can now be used as a context manager. This is especially useful when streaming tokens from a language model to ensure that the connection is closed properly. +- **`AsyncInferenceClient`**: The `trust_env` parameter has been removed from the `AsyncInferenceClient`'s constructor. Environment variables are trusted by default by `httpx`. If you explicitly don't want to trust the environment, you must configure it with [`set_client_factory`]. + +For more details, you can check [PR #3328](https://github.com/huggingface/huggingface_hub/pull/3328) that introduced `httpx`. + +### Why `httpx`? + + +The migration from `requests` to `httpx` brings several key improvements that enhance the library's performance, reliability, and maintainability: + +**Thread Safety and Connection Reuse**: `httpx` is thread-safe by design, allowing us to safely reuse the same client across multiple threads. This connection reuse reduces the overhead of establishing new connections for each HTTP request, improving performance especially when making frequent requests to the Hub. + +**HTTP/2 Support**: `httpx` provides native HTTP/2 support, which offers better efficiency when making multiple requests to the same server (exactly our use case). This translates to lower latency and reduced resource consumption compared to HTTP/1.1. + +**Unified Sync/Async API**: Unlike our previous setup with separate `requests` (sync) and `aiohttp` (async) dependencies, `httpx` provides both synchronous and asynchronous clients with identical behavior. This ensures that `InferenceClient` and `AsyncInferenceClient` have consistent functionality and eliminates subtle behavioral differences that previously existed between the two implementations. + +**Improved SSL Error Handling**: `httpx` handles SSL errors more gracefully, making debugging connection issues easier and more reliable. + +**Future-Proof Architecture**: `httpx` is actively maintained and designed for modern Python applications. In contrast, `requests` is in maintenance mode and won't receive major updates like thread-safety improvements or HTTP/2 support. + +**Better Environment Variable Handling**: `httpx` provides more consistent handling of environment variables across both sync and async contexts, eliminating previous inconsistencies where `requests` would read local environment variables by default while `aiohttp` would not. + +The transition to `httpx` positions `huggingface_hub` with a modern, efficient, and maintainable HTTP backend. While most users should experience seamless operation, the underlying improvements provide better performance and reliability for all Hub interactions. + +## `Repository` class + +The `Repository` class has been removed in v1.0. It was a thin wrapper around the `git` CLI for managing repositories. You can still use `git` directly in the terminal, but the recommended approach is to use the HTTP-based API in the `huggingface_hub` library for a smoother experience, especially when dealing with large files. + +Here is a mapping from the legacy `Repository` class to the new `HfApi` one: + +| `Repository` method | `HfApi` method | +| ------------------------------------------ | ----------------------------------------------------- | +| `repo.clone_from` | `snapshot_download` | +| `repo.git_add` + `git_commit` + `git_push` | [`upload_file`], [`upload_folder`], [`create_commit`] | +| `repo.git_tag` | `create_tag` | +| `repo.git_branch` | `create_branch` | + +## `HfFolder` class + +`HfFolder` was used to manage the user access token. Use [`login`] to save a new token, [`logout`] to delete it and [`whoami`] to check the user associated to the current token. Finally, use [`get_token`] to retrieve user's token in a script. + + +## `InferenceApi` class + +`InferenceApi` was a class to interact with the Inference API. It is now recommended to use the [`InferenceClient`] class instead. + +## Other deprecated features + +Some methods and parameters have been removed in v1.0. The ones listed below have already been deprecated with a warning message in v0.x. + +- `constants.hf_cache_home` has been removed. Please use `HF_HOME` instead. +- `use_auth_token` parameters have been removed from all methods. Please use `token` instead. +- `get_token_permission` method has been removed. +- `update_repo_visibility` method has been removed. Please use `update_repo_settings` instead. +- `is_write_action` parameter has been removed from `build_hf_headers` as well as `write_permission` from `login`. The concept of "write permission" has been removed and is no longer relevant now that fine-grained tokens are the recommended approach. +- `new_session` parameter in `login` has been renamed to `skip_if_logged_in` for better clarity. +- `resume_download`, `force_filename`, and `local_dir_use_symlinks` parameters have been removed from `hf_hub_download` and `snapshot_download`. +- `library`, `language`, `tags`, and `task` parameters have been removed from `list_models`. + +## TensorFlow and Keras 2.x support + +All TensorFlow-related code and dependencies have been removed in v1.0. This includes the following breaking changes: + +- `huggingface_hub[tensorflow]` is no longer a supported extra dependency +- The `split_tf_state_dict_into_shards` and `get_tf_storage_size` utility functions have been removed. +- The `tensorflow`, `fastai`, and `fastcore` versions are no longer included in the built-in headers. + +The Keras 2.x integration has also been removed. This includes the `KerasModelHubMixin` class and the `save_pretrained_keras`, `from_pretrained_keras`, and `push_to_hub_keras` utilities. Keras 2.x is a legacy and unmaintained library. The recommended approach is to use Keras 3.x which is tightly integrated with the Hub (i.e. it contains built-in method to load/push to Hub). If you still want to work with Keras 2.x, you should downgrade `huggingface_hub` to v0.x version. + +## `upload_file` and `upload_folder` return values + +The [`upload_file`] and [`upload_folder`] functions now return the URL of the commit created on the Hub. Previously, they returned the URL of the file or folder. This is to align with the return value of [`create_commit`], [`delete_file`] and [`delete_folder`]. \ No newline at end of file diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index 9fe5e7a34a..c15811a538 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -17,28 +17,45 @@ First of all, let's install the CLI: > [!TIP] > In the snippet above, we also installed the `[cli]` extra dependencies to make the user experience better, especially when using the `cache delete` command. +Alternatively, you can install the `hf` CLI with a single command: + +On macOS and Linux: + +```bash +>>> curl -LsSf https://hf.co/cli/install.sh | sh +``` + +On Windows: + +```powershell +>>> powershell -ExecutionPolicy ByPass -c "irm https://hf.co/cli/install.ps1 | iex" +``` + Once installed, you can check that the CLI is correctly setup: ``` >>> hf --help -usage: hf [] - -positional arguments: - {auth,cache,download,repo,repo-files,upload,upload-large-folder,env,version,lfs-enable-largefiles,lfs-multipart-upload} - hf command helpers - auth Manage authentication (login, logout, etc.). - cache Manage local cache directory. - download Download files from the Hub - repo Manage repos on the Hub. - repo-files Manage files in a repo on the Hub. - upload Upload a file or a folder to the Hub. Recommended for single-commit uploads. - upload-large-folder - Upload a large folder to the Hub. Recommended for resumable uploads. - env Print information about the environment. - version Print information about the hf version. - -options: - -h, --help show this help message and exit +Usage: hf [OPTIONS] COMMAND [ARGS]... + + Hugging Face Hub CLI + +Options: + --install-completion Install completion for the current shell. + --show-completion Show completion for the current shell, to copy it or + customize the installation. + --help Show this message and exit. + +Commands: + auth Manage authentication (login, logout, etc.). + cache Manage local cache directory. + download Download files from the Hub. + env Print information about the environment. + jobs Run and manage Jobs on the Hub. + repo Manage repos on the Hub. + repo-files Manage files in a repo on the Hub. + upload Upload a file or a folder to the Hub. + upload-large-folder Upload a large folder to the Hub. + version Print information about the hf version. ``` If the CLI is correctly installed, you should see a list of all the options available in the CLI. If you get an error message such as `command not found: hf`, please refer to the [Installation](../installation) guide. @@ -244,6 +261,46 @@ A `.cache/huggingface/` folder is created at the root of your local directory co fuyu/model-00001-of-00002.safetensors ``` +### Dry-run mode + +In some cases, you would like to check which files would be downloaded before actually downloading them. You can check this using the `--dry-run` parameter. It lists all files to download on the repo and checks whether they are already downloaded or not. This gives an idea of how many files have to be downloaded and their sizes. + +```sh +>>> hf download openai-community/gpt2 --dry-run +[dry-run] Fetching 26 files: 100%|█████████████| 26/26 [00:04<00:00, 6.26it/s] +[dry-run] Will download 11 files (out of 26) totalling 5.6G. +File Bytes to download +--------------------------------- ----------------- +.gitattributes - +64-8bits.tflite 125.2M +64-fp16.tflite 248.3M +64.tflite 495.8M +README.md - +config.json - +flax_model.msgpack 497.8M +generation_config.json - +merges.txt - +model.safetensors 548.1M +onnx/config.json - +onnx/decoder_model.onnx 653.7M +onnx/decoder_model_merged.onnx 655.2M +onnx/decoder_with_past_model.onnx 653.7M +onnx/generation_config.json - +onnx/merges.txt - +onnx/special_tokens_map.json - +onnx/tokenizer.json - +onnx/tokenizer_config.json - +onnx/vocab.json - +pytorch_model.bin 548.1M +rust_model.ot 702.5M +tf_model.h5 497.9M +tokenizer.json - +tokenizer_config.json - +vocab.json - +``` + +For more details, check out the [download guide](./download.md#dry-run-mode). + ### Specify cache directory If not using `--local-dir`, all files will be downloaded by default to the cache directory defined by the `HF_HOME` [environment variable](../package_reference/environment_variables#hfhome). You can specify a custom cache using `--cache-dir`: @@ -276,7 +333,7 @@ By default, the `hf download` command will be verbose. It will print details suc On machines with slow connections, you might encounter timeout issues like this one: ```bash -`requests.exceptions.ReadTimeout: (ReadTimeoutError("HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: a33d910c-84c6-4514-8362-c705e2039d38)')` +`httpx.TimeoutException: (TimeoutException("HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: a33d910c-84c6-4514-8362-c705e2039d38)')` ``` To mitigate this issue, you can set the `HF_HUB_DOWNLOAD_TIMEOUT` environment variable to a higher value (default is 10): @@ -430,6 +487,69 @@ By default, the `hf upload` command will be verbose. It will print details such https://huggingface.co/Wauplin/my-cool-model/tree/main ``` +## hf repo + +`hf repo` lets you create, delete, move repositories and update their settings on the Hugging Face Hub. It also includes subcommands to manage branches and tags. + +### Create a repo + +```bash +>>> hf repo create Wauplin/my-cool-model +Successfully created Wauplin/my-cool-model on the Hub. +Your repo is now available at https://huggingface.co/Wauplin/my-cool-model +``` + +Create a private dataset or a Space: + +```bash +>>> hf repo create my-cool-dataset --repo-type dataset --private +>>> hf repo create my-gradio-space --repo-type space --space-sdk gradio +``` + +Use `--exist-ok` if the repo may already exist, and `--resource-group-id` to target an Enterprise resource group. + +### Delete a repo + +```bash +>>> hf repo delete Wauplin/my-cool-model +``` + +Datasets and Spaces: + +```bash +>>> hf repo delete my-cool-dataset --repo-type dataset +>>> hf repo delete my-gradio-space --repo-type space +``` + +### Move a repo + +```bash +>>> hf repo move old-namespace/my-model new-namespace/my-model +``` + +### Update repo settings + +```bash +>>> hf repo settings Wauplin/my-cool-model --gated auto +>>> hf repo settings Wauplin/my-cool-model --private true +>>> hf repo settings Wauplin/my-cool-model --private false +``` + +- `--gated`: one of `auto`, `manual`, `false` +- `--private true|false`: set repository privacy + +### Manage branches + +```bash +>>> hf repo branch create Wauplin/my-cool-model dev +>>> hf repo branch create Wauplin/my-cool-model release-1 --revision refs/pr/104 +>>> hf repo branch delete Wauplin/my-cool-model dev +``` + +> [!TIP] +> All commands accept `--repo-type` (one of `model`, `dataset`, `space`) and `--token` if you need to authenticate explicitly. Use `--help` on any command to see all options. + + ## hf repo-files If you want to delete files from a Hugging Face repository, use the `hf repo-files` command. @@ -571,7 +691,6 @@ Copy-and-paste the text below in your GitHub issue. - Who am I ?: Wauplin - Configured git credential helpers: store - FastAI: N/A -- Tensorflow: 2.11.0 - Torch: 1.12.1 - Jinja2: 3.1.2 - Graphviz: 0.20.1 @@ -797,3 +916,34 @@ Manage scheduled jobs using # Delete a scheduled job >>> hf jobs scheduled delete ``` + +## hf endpoints + +Use `hf endpoints` to list, deploy, describe, and manage Inference Endpoints directly from the terminal. The legacy +`hf inference-endpoints` alias remains available for compatibility. + +```bash +# Lists endpoints in your namespace +>>> hf endpoints ls + +# Deploy an endpoint from Model Catalog +>>> hf endpoints catalog deploy --repo openai/gpt-oss-120b --name my-endpoint + +# Deploy an endpoint from the Hugging Face Hub +>>> hf endpoints deploy my-endpoint --repo gpt2 --framework pytorch --accelerator cpu --instance-size x2 --instance-type intel-icl + +# List catalog entries +>>> hf endpoints catalog ls + +# Show status and metadata +>>> hf endpoints describe my-endpoint + +# Pause the endpoint +>>> hf endpoints pause my-endpoint + +# Delete without confirmation prompt +>>> hf endpoints delete my-endpoint --yes +``` + +> [!TIP] +> Add `--namespace` to target an organization, `--token` to override authentication. diff --git a/docs/source/en/guides/download.md b/docs/source/en/guides/download.md index e01fa0ac74..2c5e64157c 100644 --- a/docs/source/en/guides/download.md +++ b/docs/source/en/guides/download.md @@ -158,6 +158,89 @@ Fetching 2 files: 100%|███████████████████ For more details about the CLI download command, please refer to the [CLI guide](./cli#hf-download). +## Dry-run mode + +In some cases, you would like to check which files would be downloaded before actually downloading them. You can check this using the `--dry-run` parameter. It lists all files to download on the repo and checks whether they are already downloaded or not. This gives an idea of how many files have to be downloaded and their sizes. + +Here is an example, checking on a single file: + +```sh +>>> hf download openai-community/gpt2 onnx/decoder_model_merged.onnx --dry-run +[dry-run] Will download 1 files (out of 1) totalling 655.2M +File Bytes to download +------------------------------ ----------------- +onnx/decoder_model_merged.onnx 655.2M +``` + +And if the file is already cached: + +```sh +>>> hf download openai-community/gpt2 onnx/decoder_model_merged.onnx --dry-run +[dry-run] Will download 0 files (out of 1) totalling 0.0. +File Bytes to download +------------------------------ ----------------- +onnx/decoder_model_merged.onnx - +``` + +You can also execute a dry-run on an entire repository: + +```sh +>>> hf download openai-community/gpt2 --dry-run +[dry-run] Fetching 26 files: 100%|█████████████| 26/26 [00:04<00:00, 6.26it/s] +[dry-run] Will download 11 files (out of 26) totalling 5.6G. +File Bytes to download +--------------------------------- ----------------- +.gitattributes - +64-8bits.tflite 125.2M +64-fp16.tflite 248.3M +64.tflite 495.8M +README.md - +config.json - +flax_model.msgpack 497.8M +generation_config.json - +merges.txt - +model.safetensors 548.1M +onnx/config.json - +onnx/decoder_model.onnx 653.7M +onnx/decoder_model_merged.onnx 655.2M +onnx/decoder_with_past_model.onnx 653.7M +onnx/generation_config.json - +onnx/merges.txt - +onnx/special_tokens_map.json - +onnx/tokenizer.json - +onnx/tokenizer_config.json - +onnx/vocab.json - +pytorch_model.bin 548.1M +rust_model.ot 702.5M +tf_model.h5 497.9M +tokenizer.json - +tokenizer_config.json - +vocab.json - +``` + +And with files filtering: + +```sh +>>> hf download openai-community/gpt2 --include "*.json" --dry-run +[dry-run] Fetching 11 files: 100%|█████████████| 11/11 [00:00<00:00, 80518.92it/s] +[dry-run] Will download 0 files (out of 11) totalling 0.0. +File Bytes to download +---------------------------- ----------------- +config.json - +generation_config.json - +onnx/config.json - +onnx/generation_config.json - +onnx/special_tokens_map.json - +onnx/tokenizer.json - +onnx/tokenizer_config.json - +onnx/vocab.json - +tokenizer.json - +tokenizer_config.json - +vocab.json - +``` + +Finally, you can also make a dry-run programmatically by passing `dry_run=True` to [`hf_hub_download`] and [`snapshot_download`]. It will return a [`DryRunFileInfo`] (respectively a list of [`DryRunFileInfo`]) with for each file, their commit hash, file name and file size, whether the file is cached and whether the file would be downloaded. In practice, the file will be downloaded if not cached or if `force_download=True` is passed. + ## Faster downloads There are two options to speed up downloads. Both involve installing a Python package written in Rust. diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md index 6fa08f2736..8dab419f57 100644 --- a/docs/source/en/guides/inference.md +++ b/docs/source/en/guides/inference.md @@ -11,10 +11,6 @@ The `huggingface_hub` library provides a unified interface to run inference acro 2. [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index): a product to easily deploy models to production. Inference is run by Hugging Face in a dedicated, fully managed infrastructure on a cloud provider of your choice. 3. Local endpoints: you can also run inference with local inference servers like [llama.cpp](https://github.com/ggerganov/llama.cpp), [Ollama](https://ollama.com/), [vLLM](https://github.com/vllm-project/vllm), [LiteLLM](https://docs.litellm.ai/docs/simple_proxy), or [Text Generation Inference (TGI)](https://github.com/huggingface/text-generation-inference) by connecting the client to these local endpoints. -These services can all be called from the [`InferenceClient`] object. It acts as a replacement for the legacy -[`InferenceApi`] client, adding specific support for tasks and third-party providers. -Learn how to migrate to the new client in the [Legacy InferenceAPI client](#legacy-inferenceapi-client) section. - > [!TIP] > [`InferenceClient`] is a Python client making HTTP calls to our APIs. If you want to make the HTTP calls directly using > your preferred tool (curl, postman,...), please refer to the [Inference Providers](https://huggingface.co/docs/inference-providers/index) documentation diff --git a/docs/source/en/guides/inference_endpoints.md b/docs/source/en/guides/inference_endpoints.md index c89c47621a..1a1d64b8a9 100644 --- a/docs/source/en/guides/inference_endpoints.md +++ b/docs/source/en/guides/inference_endpoints.md @@ -33,6 +33,16 @@ The first step is to create an Inference Endpoint using [`create_inference_endpo ... ) ``` +Or via CLI: + +```bash +hf endpoints deploy my-endpoint-name --repo gpt2 --framework pytorch --accelerator cpu --vendor aws --region us-east-1 --instance-size x2 --instance-type intel-icl --task text-generation + +# Deploy from the catalog with a single command +hf endpoints catalog deploy my-endpoint-name --repo openai/gpt-oss-120b +``` + + In this example, we created a `protected` Inference Endpoint named `"my-endpoint-name"`, to serve [gpt2](https://huggingface.co/gpt2) for `text-generation`. A `protected` Inference Endpoint means your token is required to access the API. We also need to provide additional information to configure the hardware requirements, such as vendor, region, accelerator, instance type, and size. You can check out the list of available resources [here](https://api.endpoints.huggingface.cloud/#/v2%3A%3Aprovider/list_vendors). Alternatively, you can create an Inference Endpoint manually using the [Web interface](https://ui.endpoints.huggingface.co/new) for convenience. Refer to this [guide](https://huggingface.co/docs/inference-endpoints/guides/advanced) for details on advanced settings and their usage. The value returned by [`create_inference_endpoint`] is an [`InferenceEndpoint`] object: @@ -42,6 +52,12 @@ The value returned by [`create_inference_endpoint`] is an [`InferenceEndpoint`] InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2', status='pending', url=None) ``` +Or via CLI: + +```bash +hf endpoints describe my-endpoint-name +``` + It's a dataclass that holds information about the endpoint. You can access important attributes such as `name`, `repository`, `status`, `task`, `created_at`, `updated_at`, etc. If you need it, you can also access the raw response from the server with `endpoint.raw`. Once your Inference Endpoint is created, you can find it on your [personal dashboard](https://ui.endpoints.huggingface.co/). @@ -101,6 +117,14 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2 [InferenceEndpoint(name='aws-starchat-beta', namespace='huggingface', repository='HuggingFaceH4/starchat-beta', status='paused', url=None), ...] ``` +Or via CLI: + +```bash +hf endpoints describe my-endpoint-name +hf endpoints ls --namespace huggingface +hf endpoints ls --namespace '*' +``` + ## Check deployment status In the rest of this guide, we will assume that we have a [`InferenceEndpoint`] object called `endpoint`. You might have noticed that the endpoint has a `status` attribute of type [`InferenceEndpointStatus`]. When the Inference Endpoint is deployed and accessible, the status should be `"running"` and the `url` attribute is set: @@ -117,6 +141,12 @@ Before reaching a `"running"` state, the Inference Endpoint typically goes throu InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2', status='pending', url=None) ``` +Or via CLI: + +```bash +hf endpoints describe my-endpoint-name +``` + Instead of fetching the Inference Endpoint status while waiting for it to run, you can directly call [`~InferenceEndpoint.wait`]. This helper takes as input a `timeout` and a `fetch_every` parameter (in seconds) and will block the thread until the Inference Endpoint is deployed. Default values are respectively `None` (no timeout) and `5` seconds. ```py @@ -189,6 +219,14 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2 # Endpoint is not 'running' but still has a URL and will restart on first call. ``` +Or via CLI: + +```bash +hf endpoints pause my-endpoint-name +hf endpoints resume my-endpoint-name +hf endpoints scale-to-zero my-endpoint-name +``` + ### Update model or hardware requirements In some cases, you might also want to update your Inference Endpoint without creating a new one. You can either update the hosted model or the hardware requirements to run the model. You can do this using [`~InferenceEndpoint.update`]: @@ -207,6 +245,14 @@ InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2 InferenceEndpoint(name='my-endpoint-name', namespace='Wauplin', repository='gpt2-large', status='pending', url=None) ``` +Or via CLI: + +```bash +hf endpoints update my-endpoint-name --repo gpt2-large +hf endpoints update my-endpoint-name --min-replica 2 --max-replica 6 +hf endpoints update my-endpoint-name --accelerator cpu --instance-size x4 --instance-type intel-icl +``` + ### Delete the endpoint Finally if you won't use the Inference Endpoint anymore, you can simply call [`~InferenceEndpoint.delete()`]. diff --git a/docs/source/en/guides/integrations.md b/docs/source/en/guides/integrations.md index e5ac9aaa87..61dace2df4 100644 --- a/docs/source/en/guides/integrations.md +++ b/docs/source/en/guides/integrations.md @@ -244,8 +244,6 @@ class PyTorchModelHubMixin(ModelHubMixin): revision: str, cache_dir: str, force_download: bool, - proxies: Optional[Dict], - resume_download: bool, local_files_only: bool, token: Union[str, bool, None], map_location: str = "cpu", # additional argument @@ -265,8 +263,6 @@ class PyTorchModelHubMixin(ModelHubMixin): revision=revision, cache_dir=cache_dir, force_download=force_download, - proxies=proxies, - resume_download=resume_download, token=token, local_files_only=local_files_only, ) @@ -428,11 +424,11 @@ Your framework might have some specificities that you need to address. This guid ideas on how to handle integration. In any case, feel free to contact us if you have any questions! -| Integration | Using helpers | Using [`ModelHubMixin`] | -|:---:|:---:|:---:| -| User experience | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | -| Flexibility | Very flexible.
You fully control the implementation. | Less flexible.
Your framework must have a model class. | -| Maintenance | More maintenance to add support for configuration, and new features. Might also require fixing issues reported by users. | Less maintenance as most of the interactions with the Hub are implemented in `huggingface_hub`. | -| Documentation / Type annotation | To be written manually. | Partially handled by `huggingface_hub`. | -| Download counter | To be handled manually. | Enabled by default if class has a `config` attribute. | -| Model card | To be handled manually | Generated by default with library_name, tags, etc. | +| Integration | Using helpers | Using [`ModelHubMixin`] | +| :-----------------------------: | :----------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------: | +| User experience | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | +| Flexibility | Very flexible.
You fully control the implementation. | Less flexible.
Your framework must have a model class. | +| Maintenance | More maintenance to add support for configuration, and new features. Might also require fixing issues reported by users. | Less maintenance as most of the interactions with the Hub are implemented in `huggingface_hub`. | +| Documentation / Type annotation | To be written manually. | Partially handled by `huggingface_hub`. | +| Download counter | To be handled manually. | Enabled by default if class has a `config` attribute. | +| Model card | To be handled manually | Generated by default with library_name, tags, etc. | diff --git a/docs/source/en/guides/repository.md b/docs/source/en/guides/repository.md index 8937b14e5f..2c47100016 100644 --- a/docs/source/en/guides/repository.md +++ b/docs/source/en/guides/repository.md @@ -41,6 +41,14 @@ Create an empty repository with [`create_repo`] and give it a name with the `rep 'https://huggingface.co/lysandre/test-model' ``` +Or via CLI: + +```bash +>>> hf repo create lysandre/test-model +Successfully created lysandre/test-model on the Hub. +Your repo is now available at https://huggingface.co/lysandre/test-model +``` + By default, [`create_repo`] creates a model repository. But you can use the `repo_type` parameter to specify another repository type. For example, if you want to create a dataset repository: ```py @@ -49,6 +57,12 @@ By default, [`create_repo`] creates a model repository. But you can use the `rep 'https://huggingface.co/datasets/lysandre/test-dataset' ``` +Or via CLI: + +```bash +>>> hf repo create lysandre/test-dataset --repo-type dataset +``` + When you create a repository, you can set your repository visibility with the `private` parameter. ```py @@ -56,6 +70,12 @@ When you create a repository, you can set your repository visibility with the `p >>> create_repo("lysandre/test-private", private=True) ``` +Or via CLI: + +```bash +>>> hf repo create lysandre/test-private --private +``` + If you want to change the repository visibility at a later time, you can use the [`update_repo_settings`] function. > [!TIP] @@ -71,6 +91,12 @@ Specify the `repo_id` of the repository you want to delete: >>> delete_repo(repo_id="lysandre/my-corrupted-dataset", repo_type="dataset") ``` +Or via CLI: + +```bash +>>> hf repo delete lysandre/my-corrupted-dataset --repo-type dataset +``` + ### Duplicate a repository (only for Spaces) In some cases, you want to copy someone else's repo to adapt it to your use case. @@ -111,7 +137,15 @@ You can create new branch and tags using [`create_branch`] and [`create_tag`]: >>> create_tag("bigcode/the-stack", repo_type="dataset", revision="v0.1-release", tag="v0.1.1", tag_message="Bump release version.") ``` -You can use the [`delete_branch`] and [`delete_tag`] functions in the same way to delete a branch or a tag. +Or via CLI: + +```bash +>>> hf repo branch create Matthijs/speecht5-tts-demo handle-dog-speaker --repo-type space +>>> hf repo tag create bigcode/the-stack v0.1.1 --repo-type dataset --revision v0.1-release -m "Bump release version." +``` + +You can use the [`delete_branch`] and [`delete_tag`] functions in the same way to delete a branch or a tag, or `hf repo branch delete` and `hf repo tag delete` respectively in CLI. + ### List all branches and tags @@ -149,6 +183,12 @@ A repository can be public or private. A private repository is only visible to y >>> update_repo_settings(repo_id=repo_id, private=True) ``` +Or via CLI: + +```bash +>>> hf repo settings lysandre/test-private --private true +``` + ### Setup gated access To give more control over how repos are used, the Hub allows repo authors to enable **access requests** for their repos. User must agree to share their contact information (username and email address) with the repo authors to access the files when enabled. A repo with access requests enabled is called a **gated repo**. @@ -162,6 +202,12 @@ You can set a repo as gated using [`update_repo_settings`]: >>> api.update_repo_settings(repo_id=repo_id, gated="auto") # Set automatic gating for a model ``` +Or via CLI: + +```bash +>>> hf repo settings lysandre/test-private --gated auto +``` + ### Rename your repository You can rename your repository on the Hub using [`move_repo`]. Using this method, you can also move the repo from a user to @@ -173,81 +219,8 @@ that you should be aware of. For example, you can't transfer your repo to anothe >>> move_repo(from_id="Wauplin/cool-model", to_id="huggingface/cool-model") ``` -## Manage a local copy of your repository - -All the actions described above can be done using HTTP requests. However, in some cases you might be interested in having -a local copy of your repository and interact with it using the Git commands you are familiar with. - -The [`Repository`] class allows you to interact with files and repositories on the Hub with functions similar to Git commands. It is a wrapper over Git and Git-LFS methods to use the Git commands you already know and love. Before starting, please make sure you have Git-LFS installed (see [here](https://git-lfs.github.com/) for installation instructions). - -> [!WARNING] -> [`Repository`] is deprecated in favor of the http-based alternatives implemented in [`HfApi`]. Given its large adoption in legacy code, the complete removal of [`Repository`] will only happen in release `v1.0`. For more details, please read [this explanation page](./concepts/git_vs_http). - -### Use a local repository - -Instantiate a [`Repository`] object with a path to a local repository: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="//") -``` - -### Clone - -The `clone_from` parameter clones a repository from a Hugging Face repository ID to a local directory specified by the `local_dir` argument: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="w2v2", clone_from="facebook/wav2vec2-large-960h-lv60") -``` - -`clone_from` can also clone a repository using a URL: - -```py ->>> repo = Repository(local_dir="huggingface-hub", clone_from="https://huggingface.co/facebook/wav2vec2-large-960h-lv60") -``` - -You can combine the `clone_from` parameter with [`create_repo`] to create and clone a repository: - -```py ->>> repo_url = create_repo(repo_id="repo_name") ->>> repo = Repository(local_dir="repo_local_path", clone_from=repo_url) -``` - -You can also configure a Git username and email to a cloned repository by specifying the `git_user` and `git_email` parameters when you clone a repository. When users commit to that repository, Git will be aware of the commit author. +Or via CLI: -```py ->>> repo = Repository( -... "my-dataset", -... clone_from="/", -... token=True, -... repo_type="dataset", -... git_user="MyName", -... git_email="me@cool.mail" -... ) -``` - -### Branch - -Branches are important for collaboration and experimentation without impacting your current files and code. Switch between branches with [`~Repository.git_checkout`]. For example, if you want to switch from `branch1` to `branch2`: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="huggingface-hub", clone_from="/", revision='branch1') ->>> repo.git_checkout("branch2") -``` - -### Pull - -[`~Repository.git_pull`] allows you to update a current local branch with changes from a remote repository: - -```py ->>> from huggingface_hub import Repository ->>> repo.git_pull() -``` - -Set `rebase=True` if you want your local commits to occur after your branch is updated with the new commits from the remote: - -```py ->>> repo.git_pull(rebase=True) +```bash +>>> hf repo move Wauplin/cool-model huggingface/cool-model ``` diff --git a/docs/source/en/guides/upload.md b/docs/source/en/guides/upload.md index 24c93fcf6e..6936fbf9b2 100644 --- a/docs/source/en/guides/upload.md +++ b/docs/source/en/guides/upload.md @@ -4,12 +4,7 @@ rendered properly in your Markdown viewer. # Upload files to the Hub -Sharing your files and work is an important aspect of the Hub. The `huggingface_hub` offers several options for uploading your files to the Hub. You can use these functions independently or integrate them into your library, making it more convenient for your users to interact with the Hub. This guide will show you how to push files: - -- without using Git. -- that are very large with [Git LFS](https://git-lfs.github.com/). -- with the `commit` context manager. -- with the [`~Repository.push_to_hub`] function. +Sharing your files and work is an important aspect of the Hub. The `huggingface_hub` offers several options for uploading your files to the Hub. You can use these functions independently or integrate them into your library, making it more convenient for your users to interact with the Hub. Whenever you want to upload files to the Hub, you need to log in to your Hugging Face account. For more details about authentication, check out [this section](../quick-start#authentication). @@ -465,111 +460,3 @@ update of the object is that **the binary content is removed** from it, meaning you don't store another reference to it. This is expected as we don't want to keep in memory the content that is already uploaded. Finally we create the commit by passing all the operations to [`create_commit`]. You can pass additional operations (add, delete or copy) that have not been processed yet and they will be handled correctly. - -## (legacy) Upload files with Git LFS - -All the methods described above use the Hub's API to upload files. This is the recommended way to upload files to the Hub. -However, we also provide [`Repository`], a wrapper around the git tool to manage a local repository. - -> [!WARNING] -> Although [`Repository`] is not formally deprecated, we recommend using the HTTP-based methods described above instead. -> For more details about this recommendation, please have a look at [this guide](../concepts/git_vs_http) explaining the -> core differences between HTTP-based and Git-based approaches. - -Git LFS automatically handles files larger than 10MB. But for very large files (>5GB), you need to install a custom transfer agent for Git LFS: - -```bash -hf lfs-enable-largefiles -``` - -You should install this for each repository that has a very large file. Once installed, you'll be able to push files larger than 5GB. - -### commit context manager - -The `commit` context manager handles four of the most common Git commands: pull, add, commit, and push. `git-lfs` automatically tracks any file larger than 10MB. In the following example, the `commit` context manager: - -1. Pulls from the `text-files` repository. -2. Adds a change made to `file.txt`. -3. Commits the change. -4. Pushes the change to the `text-files` repository. - -```python ->>> from huggingface_hub import Repository ->>> with Repository(local_dir="text-files", clone_from="/text-files").commit(commit_message="My first file :)"): -... with open("file.txt", "w+") as f: -... f.write(json.dumps({"hey": 8})) -``` - -Here is another example of how to use the `commit` context manager to save and upload a file to a repository: - -```python ->>> import torch ->>> model = torch.nn.Transformer() ->>> with Repository("torch-model", clone_from="/torch-model", token=True).commit(commit_message="My cool model :)"): -... torch.save(model.state_dict(), "model.pt") -``` - -Set `blocking=False` if you would like to push your commits asynchronously. Non-blocking behavior is helpful when you want to continue running your script while your commits are being pushed. - -```python ->>> with repo.commit(commit_message="My cool model :)", blocking=False) -``` - -You can check the status of your push with the `command_queue` method: - -```python ->>> last_command = repo.command_queue[-1] ->>> last_command.status -``` - -Refer to the table below for the possible statuses: - -| Status | Description | -| -------- | ------------------------------------ | -| -1 | The push is ongoing. | -| 0 | The push has completed successfully. | -| Non-zero | An error has occurred. | - -When `blocking=False`, commands are tracked, and your script will only exit when all pushes are completed, even if other errors occur in your script. Some additional useful commands for checking the status of a push include: - -```python -# Inspect an error. ->>> last_command.stderr - -# Check whether a push is completed or ongoing. ->>> last_command.is_done - -# Check whether a push command has errored. ->>> last_command.failed -``` - -### push_to_hub - -The [`Repository`] class has a [`~Repository.push_to_hub`] function to add files, make a commit, and push them to a repository. Unlike the `commit` context manager, you'll need to pull from a repository first before calling [`~Repository.push_to_hub`]. - -For example, if you've already cloned a repository from the Hub, then you can initialize the `repo` from the local directory: - -```python ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="path/to/local/repo") -``` - -Update your local clone with [`~Repository.git_pull`] and then push your file to the Hub: - -```py ->>> repo.git_pull() ->>> repo.push_to_hub(commit_message="Commit my-awesome-file to the Hub") -``` - -However, if you aren't ready to push a file yet, you can use [`~Repository.git_add`] and [`~Repository.git_commit`] to only add and commit your file: - -```py ->>> repo.git_add("path/to/file") ->>> repo.git_commit(commit_message="add my first model config file :)") -``` - -When you're ready, push the file to your repository with [`~Repository.git_push`]: - -```py ->>> repo.git_push() -``` diff --git a/docs/source/en/installation.md b/docs/source/en/installation.md index 9af8a32676..e2c19bb69c 100644 --- a/docs/source/en/installation.md +++ b/docs/source/en/installation.md @@ -6,7 +6,7 @@ rendered properly in your Markdown viewer. Before you start, you will need to setup your environment by installing the appropriate packages. -`huggingface_hub` is tested on **Python 3.8+**. +`huggingface_hub` is tested on **Python 3.9+**. ## Install with pip @@ -46,17 +46,13 @@ Some dependencies of `huggingface_hub` are [optional](https://setuptools.pypa.io You can install optional dependencies via `pip`: ```bash -# Install dependencies for tensorflow-specific features -# /!\ Warning: this is not equivalent to `pip install tensorflow` -pip install 'huggingface_hub[tensorflow]' - # Install dependencies for both torch-specific and CLI-specific features. pip install 'huggingface_hub[cli,torch]' ``` Here is the list of optional dependencies in `huggingface_hub`: - `cli`: provide a more convenient CLI interface for `huggingface_hub`. -- `fastai`, `torch`, `tensorflow`: dependencies to run framework-specific features. +- `fastai`, `torch`: dependencies to run framework-specific features. - `dev`: dependencies to contribute to the lib. Includes `testing` (to run tests), `typing` (to run type checker) and `quality` (to run linters). @@ -107,6 +103,22 @@ Python will now look inside the folder you cloned to in addition to the normal l For example, if your Python packages are typically installed in `./.venv/lib/python3.13/site-packages/`, Python will also search the folder you cloned `./huggingface_hub/`. +## Install the Hugging Face CLI + +Use our one-liner installers to set up the `hf` CLI without touching your Python environment: + +On macOS and Linux: + +```bash +curl -LsSf https://hf.co/cli/install.sh | sh +``` + +On Windows: + +```powershell +powershell -ExecutionPolicy ByPass -c "irm https://hf.co/cli/install.ps1 | iex" +``` + ## Install with conda If you are more familiar with it, you can install `huggingface_hub` using the [conda-forge channel](https://anaconda.org/conda-forge/huggingface_hub): diff --git a/docs/source/en/package_reference/dataclasses.md b/docs/source/en/package_reference/dataclasses.md index 3691a2b40e..6575881db9 100644 --- a/docs/source/en/package_reference/dataclasses.md +++ b/docs/source/en/package_reference/dataclasses.md @@ -188,6 +188,14 @@ The `@strict` decorator enhances a dataclass with strict validation. [[autodoc]] dataclasses.strict +### `validate_typed_dict` + +Method to validate that a dictionary conforms to the types defined in a `TypedDict` class. + +This is the equivalent to dataclass validation but for `TypedDict`s. Since typed dicts are never instantiated (only used by static type checkers), validation step must be manually called. + +[[autodoc]] dataclasses.validate_typed_dict + ### `as_validated_field` Decorator to create a [`validated_field`]. Recommended for fields with a single validator to avoid boilerplate code. diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md index d0b32fb7e8..249a106454 100644 --- a/docs/source/en/package_reference/environment_variables.md +++ b/docs/source/en/package_reference/environment_variables.md @@ -179,7 +179,7 @@ Set to disable using `hf-xet`, even if it is available in your Python environmen Set to `True` for faster uploads and downloads from the Hub using `hf_transfer`. -By default, `huggingface_hub` uses the Python-based `requests.get` and `requests.post` functions. +By default, `huggingface_hub` uses the Python-based `httpx.get` and `httpx.post` functions. Although these are reliable and versatile, they may not be the most efficient choice for machines with high bandwidth. [`hf_transfer`](https://github.com/huggingface/hf_transfer) is a Rust-based package developed to diff --git a/docs/source/en/package_reference/hf_api.md b/docs/source/en/package_reference/hf_api.md index 99ce0c2905..07b039e02e 100644 --- a/docs/source/en/package_reference/hf_api.md +++ b/docs/source/en/package_reference/hf_api.md @@ -45,6 +45,10 @@ models = hf_api.list_models() [[autodoc]] huggingface_hub.hf_api.DatasetInfo +### DryRunFileInfo + +[[autodoc]] huggingface_hub.hf_api.DryRunFileInfo + ### GitRefInfo [[autodoc]] huggingface_hub.hf_api.GitRefInfo diff --git a/docs/source/en/package_reference/inference_client.md b/docs/source/en/package_reference/inference_client.md index eae0edc755..1a92641077 100644 --- a/docs/source/en/package_reference/inference_client.md +++ b/docs/source/en/package_reference/inference_client.md @@ -34,16 +34,3 @@ pip install --upgrade huggingface_hub[inference] ## InferenceTimeoutError [[autodoc]] InferenceTimeoutError - -## InferenceAPI - -[`InferenceAPI`] is the legacy way to call the Inference API. The interface is more simplistic and requires knowing -the input parameters and output format for each task. It also lacks the ability to connect to other services like -Inference Endpoints or AWS SageMaker. [`InferenceAPI`] will soon be deprecated so we recommend using [`InferenceClient`] -whenever possible. Check out [this guide](../guides/inference#legacy-inferenceapi-client) to learn how to switch from -[`InferenceAPI`] to [`InferenceClient`] in your scripts. - -[[autodoc]] InferenceApi - - __init__ - - __call__ - - all diff --git a/docs/source/en/package_reference/mixins.md b/docs/source/en/package_reference/mixins.md index 42c253e710..c725306efe 100644 --- a/docs/source/en/package_reference/mixins.md +++ b/docs/source/en/package_reference/mixins.md @@ -21,16 +21,6 @@ how to integrate any ML framework with the Hub. [[autodoc]] PyTorchModelHubMixin -### Keras - -[[autodoc]] KerasModelHubMixin - -[[autodoc]] from_pretrained_keras - -[[autodoc]] push_to_hub_keras - -[[autodoc]] save_pretrained_keras - ### Fastai [[autodoc]] from_pretrained_fastai diff --git a/docs/source/en/package_reference/repository.md b/docs/source/en/package_reference/repository.md deleted file mode 100644 index de7851d6a9..0000000000 --- a/docs/source/en/package_reference/repository.md +++ /dev/null @@ -1,51 +0,0 @@ - - -# Managing local and online repositories - -The `Repository` class is a helper class that wraps `git` and `git-lfs` commands. It provides tooling adapted -for managing repositories which can be very large. - -It is the recommended tool as soon as any `git` operation is involved, or when collaboration will be a point -of focus with the repository itself. - -## The Repository class - -[[autodoc]] Repository - - __init__ - - current_branch - - all - -## Helper methods - -[[autodoc]] huggingface_hub.repository.is_git_repo - -[[autodoc]] huggingface_hub.repository.is_local_clone - -[[autodoc]] huggingface_hub.repository.is_tracked_with_lfs - -[[autodoc]] huggingface_hub.repository.is_git_ignored - -[[autodoc]] huggingface_hub.repository.files_to_be_staged - -[[autodoc]] huggingface_hub.repository.is_tracked_upstream - -[[autodoc]] huggingface_hub.repository.commits_to_push - -## Following asynchronous commands - -The `Repository` utility offers several methods which can be launched asynchronously: -- `git_push` -- `git_pull` -- `push_to_hub` -- The `commit` context manager - -See below for utilities to manage such asynchronous methods. - -[[autodoc]] Repository - - commands_failed - - commands_in_progress - - wait_for_commands - -[[autodoc]] huggingface_hub.repository.CommandInProgress diff --git a/docs/source/en/package_reference/serialization.md b/docs/source/en/package_reference/serialization.md index 0022c72aed..dab6b5d771 100644 --- a/docs/source/en/package_reference/serialization.md +++ b/docs/source/en/package_reference/serialization.md @@ -128,11 +128,7 @@ If you want to save a state dictionary (e.g. a mapping between layer names and r [[autodoc]] huggingface_hub.save_torch_state_dict -The `serialization` module also contains low-level helpers to split a state dictionary into several shards, while creating a proper index in the process. These helpers are available for `torch` and `tensorflow` tensors and are designed to be easily extended to any other ML frameworks. - -### split_tf_state_dict_into_shards - -[[autodoc]] huggingface_hub.split_tf_state_dict_into_shards +The `serialization` module also contains low-level helpers to split a state dictionary into several shards, while creating a proper index in the process. These helpers are available for `torch` tensors and are designed to be easily extended to any other ML frameworks. ### split_torch_state_dict_into_shards @@ -156,7 +152,6 @@ The loading helpers support both single-file and sharded checkpoints in either s [[autodoc]] huggingface_hub.load_state_dict_from_file - ## Tensors helpers ### get_torch_storage_id diff --git a/docs/source/en/package_reference/utilities.md b/docs/source/en/package_reference/utilities.md index 80fe3148ff..2b66c260d1 100644 --- a/docs/source/en/package_reference/utilities.md +++ b/docs/source/en/package_reference/utilities.md @@ -120,23 +120,40 @@ You can also enable or disable progress bars for specific groups. This allows yo [[autodoc]] huggingface_hub.utils.enable_progress_bars -## Configure HTTP backend +## Configuring the HTTP Backend -In some environments, you might want to configure how HTTP calls are made, for example if you are using a proxy. -`huggingface_hub` let you configure this globally using [`configure_http_backend`]. All requests made to the Hub will -then use your settings. Under the hood, `huggingface_hub` uses `requests.Session` so you might want to refer to the -[`requests` documentation](https://requests.readthedocs.io/en/latest/user/advanced) to learn more about the available -parameters. + -Since `requests.Session` is not guaranteed to be thread-safe, `huggingface_hub` creates one session instance per thread. -Using sessions allows us to keep the connection open between HTTP calls and ultimately save time. If you are -integrating `huggingface_hub` in a third-party library and wants to make a custom call to the Hub, use [`get_session`] -to get a Session configured by your users (i.e. replace any `requests.get(...)` call by `get_session().get(...)`). +In `huggingface_hub` v0.x, HTTP requests were handled with `requests`, and configuration was done via `configure_http_backend`. Since we now use `httpx`, configuration works differently: you must provide a factory function that takes no arguments and returns an `httpx.Client`. You can review the [default implementation here](https://github.com/huggingface/huggingface_hub/blob/v1.0-release/src/huggingface_hub/utils/_http.py) to see which parameters are used by default. -[[autodoc]] configure_http_backend + + + +In some setups, you may need to control how HTTP requests are made, for example when working behind a proxy. The `huggingface_hub` library allows you to configure this globally with [`set_client_factory`]. After configuration, all requests to the Hub will use your custom settings. Since `huggingface_hub` relies on `httpx.Client` under the hood, you can check the [`httpx` documentation](https://www.python-httpx.org/advanced/clients/) for details on available parameters. + +If you are building a third-party library and need to make direct requests to the Hub, use [`get_session`] to obtain a correctly configured `httpx` client. Replace any direct `httpx.get(...)` calls with `get_session().get(...)` to ensure proper behavior. + +[[autodoc]] set_client_factory [[autodoc]] get_session +In rare cases, you may want to manually close the current session (for example, after a transient `SSLError`). You can do this with [`close_session`]. A new session will automatically be created on the next call to [`get_session`]. + +Sessions are always closed automatically when the process exits. + +[[autodoc]] close_session + +For async code, use [`set_async_client_factory`] to configure an `httpx.AsyncClient` and [`get_async_session`] to retrieve one. + +[[autodoc]] set_async_client_factory + +[[autodoc]] get_async_session + + + +Unlike the synchronous client, the lifecycle of the async client is not managed automatically. Use an async context manager to handle it properly. + + ## Handle HTTP errors @@ -177,35 +194,39 @@ Here is a list of HTTP errors thrown in `huggingface_hub`. the server response and format the error message to provide as much information to the user as possible. -[[autodoc]] huggingface_hub.utils.HfHubHTTPError +[[autodoc]] huggingface_hub.errors.HfHubHTTPError #### RepositoryNotFoundError -[[autodoc]] huggingface_hub.utils.RepositoryNotFoundError +[[autodoc]] huggingface_hub.errors.RepositoryNotFoundError #### GatedRepoError -[[autodoc]] huggingface_hub.utils.GatedRepoError +[[autodoc]] huggingface_hub.errors.GatedRepoError #### RevisionNotFoundError -[[autodoc]] huggingface_hub.utils.RevisionNotFoundError +[[autodoc]] huggingface_hub.errors.RevisionNotFoundError + +#### BadRequestError + +[[autodoc]] huggingface_hub.errors.BadRequestError #### EntryNotFoundError -[[autodoc]] huggingface_hub.utils.EntryNotFoundError +[[autodoc]] huggingface_hub.errors.EntryNotFoundError -#### BadRequestError +#### RemoteEntryNotFoundError -[[autodoc]] huggingface_hub.utils.BadRequestError +[[autodoc]] huggingface_hub.errors.RemoteEntryNotFoundError #### LocalEntryNotFoundError -[[autodoc]] huggingface_hub.utils.LocalEntryNotFoundError +[[autodoc]] huggingface_hub.errors.LocalEntryNotFoundError #### OfflineModeIsEnabled -[[autodoc]] huggingface_hub.utils.OfflineModeIsEnabled +[[autodoc]] huggingface_hub.errors.OfflineModeIsEnabled ## Telemetry @@ -251,20 +272,6 @@ huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in rep >>> my_cool_method(repo_id="other..repo..id") huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'. - ->>> @validate_hf_hub_args -... def my_cool_auth_method(token: str): -... print(token) - ->>> my_cool_auth_method(token="a token") -"a token" - ->>> my_cool_auth_method(use_auth_token="a use_auth_token") -"a use_auth_token" - ->>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token") -UserWarning: Both `token` and `use_auth_token` are passed (...). `use_auth_token` value will be ignored. -"a token" ``` #### validate_hf_hub_args @@ -284,8 +291,8 @@ validated. [[autodoc]] utils.validate_repo_id -#### smoothly_deprecate_use_auth_token +#### smoothly_deprecate_legacy_arguments Not exactly a validator, but ran as well. -[[autodoc]] utils.smoothly_deprecate_use_auth_token +[[autodoc]] utils._validators.smoothly_deprecate_legacy_arguments diff --git a/docs/source/fr/_toctree.yml b/docs/source/fr/_toctree.yml index f6c76ff6f5..d9ed776e0a 100644 --- a/docs/source/fr/_toctree.yml +++ b/docs/source/fr/_toctree.yml @@ -6,10 +6,6 @@ title: Démarrage rapide - local: installation title: Installation -- title: "Concepts" - sections: - - local: concepts/git_vs_http - title: Git ou HTTP? - title: "Guides" sections: - local: guides/integrations diff --git a/docs/source/fr/concepts/git_vs_http.md b/docs/source/fr/concepts/git_vs_http.md deleted file mode 100644 index 678119d48f..0000000000 --- a/docs/source/fr/concepts/git_vs_http.md +++ /dev/null @@ -1,64 +0,0 @@ - - -# Git ou HTTP? - -`huggingface_hub` est une librairie qui permet d'interagir avec le Hugging Face Hub, -qui est une collection de dépots Git (modèles, datasets ou spaces). -Il y a deux manières principales pour accéder au Hub en utilisant `huggingface_hub`. - -La première approche, basée sur Git, appelée approche "git-based", est rendue possible par la classe [`Repository`]. -Cette méthode utilise un wrapper autour de la commande `git` avec des fonctionnalités supplémentaires conçues pour interagir avec le Hub. La deuxième option, appelée approche "HTTP-based" , consiste à faire des requêtes HTTP en utilisant le client [`HfApi`]. Examinons -les avantages et les inconvénients de ces deux méthodes. - -## Repository: l'approche historique basée sur git - -Initialement, `huggingface_hub` était principalement construite autour de la classe [`Repository`]. Elle fournit des -wrappers Python pour les commandes `git` usuelles, telles que `"git add"`, `"git commit"`, `"git push"`, -`"git tag"`, `"git checkout"`, etc. - -Cette librairie permet aussi de gérer l'authentification et les fichiers volumineux, souvent présents dans les dépôts Git de machine learning. De plus, ses méthodes sont exécutables en arrière-plan, ce qui est utile pour upload des données durant l'entrainement d'un modèle. - -L'avantage principal de l'approche [`Repository`] est qu'elle permet de garder une -copie en local du dépot Git sur votre machine. Cela peut aussi devenir un désavantage, -car cette copie locale doit être mise à jour et maintenue constamment. C'est une méthode -analogue au développement de logiciel classique où chaque développeur maintient sa propre copie locale -et push ses changements lorsqu'il travaille sur une nouvelle fonctionnalité. -Toutefois, dans le contexte du machine learning la taille des fichiers rend peu pertinente cette approche car -les utilisateurs ont parfois besoin d'avoir -uniquement les poids des modèles pour l'inférence ou de convertir ces poids d'un format à un autre sans avoir à cloner -tout le dépôt. - -> [!WARNING] -> [`Repository`] est maintenant obsolète et remplacée par les alternatives basées sur des requêtes HTTP. Étant donné son adoption massive par les utilisateurs, -> la suppression complète de [`Repository`] ne sera faite que pour la version `v1.0`. - -## HfApi: Un client HTTP plus flexible - -La classe [`HfApi`] a été développée afin de fournir une alternative aux dépôts git locaux, -qui peuvent être encombrant à maintenir, en particulier pour des modèles ou datasets volumineux. -La classe [`HfApi`] offre les mêmes fonctionnalités que les approches basées sur Git, -telles que le téléchargement et le push de fichiers ainsi que la création de branches et de tags, mais sans -avoir besoin d'un fichier local qui doit être constamment synchronisé. - -En plus des fonctionnalités déjà fournies par `git`, La classe [`HfApi`] offre des fonctionnalités -additionnelles, telles que la capacité à gérer des dépôts, le téléchargement des fichiers -dans le cache (permettant une réutilisation), la recherche dans le Hub pour trouver -des dépôts et des métadonnées, l'accès aux fonctionnalités communautaires telles que, les discussions, -les pull requests et les commentaires. - -## Quelle méthode utiliser et quand ? - -En général, **l'approche HTTP est la méthode recommandée** pour utiliser `huggingface_hub` -[`HfApi`] permet de pull et push des changements, de travailler avec les pull requests, les tags et les branches, l'interaction avec les discussions -et bien plus encore. Depuis la version `0.16`, les méthodes HTTP-based peuvent aussi être exécutées en arrière-plan, ce qui constituait le -dernier gros avantage de la classe [`Repository`]. - -Toutefois, certaines commandes restent indisponibles en utilisant [`HfApi`]. -Peut être que certaines ne le seront jamais, mais nous essayons toujours de réduire le fossé entre ces deux approches. -Si votre cas d'usage n'est pas couvert, nous serions ravis de vous aider. Pour cela, ouvrez -[une issue sur Github](https://github.com/huggingface/huggingface_hub)! Nous écoutons tous les retours nous permettant de construire -l'écosystème 🤗 avec les utilisateurs et pour les utilisateurs. - -Cette préférence pour l'approche basée sur [`HfApi`] plutôt que [`Repository`] ne signifie pas que les dépôts stopperons d'être versionnés avec git sur le Hugging Face Hub. Il sera toujours possible d'utiliser les commandes `git` en local lorsque nécessaire. \ No newline at end of file diff --git a/docs/source/fr/guides/integrations.md b/docs/source/fr/guides/integrations.md index 5a9736667f..20dff4a73f 100644 --- a/docs/source/fr/guides/integrations.md +++ b/docs/source/fr/guides/integrations.md @@ -223,8 +223,6 @@ class PyTorchModelHubMixin(ModelHubMixin): revision: str, cache_dir: str, force_download: bool, - proxies: Optional[Dict], - resume_download: bool, local_files_only: bool, token: Union[str, bool, None], map_location: str = "cpu", # argument supplémentaire @@ -242,8 +240,6 @@ class PyTorchModelHubMixin(ModelHubMixin): revision=revision, cache_dir=cache_dir, force_download=force_download, - proxies=proxies, - resume_download=resume_download, token=token, local_files_only=local_files_only, ) @@ -266,9 +262,9 @@ est ici pour vous donner des indications et des idées sur comment gérer l'int n'hésitez pas à nous contacter si vous avez une question ! -| Intégration | Utilisant des helpers | Utilisant [`ModelHubMixin`] | -|:---:|:---:|:---:| -| Expérience utilisateur | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | -| Flexible | Très flexible.
Vous controllez complètement l'implémentation. | Moins flexible.
Votre framework doit avoir une classe de modèle. | -| Maintenance | Plus de maintenance pour ajouter du support pour la configuration, et de nouvelles fonctionnalités. Peut aussi nécessiter de fixx des problèmes signalés par les utilisateurs.| Moins de maintenance vu que la plupart des intégrations avec le Hub sont implémentés dans `huggingface_hub` | -| Documentation / Anotation de type| A écrire à la main | Géré partiellement par `huggingface_hub`. | +| Intégration | Utilisant des helpers | Utilisant [`ModelHubMixin`] | +| :-------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: | +| Expérience utilisateur | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | +| Flexible | Très flexible.
Vous controllez complètement l'implémentation. | Moins flexible.
Votre framework doit avoir une classe de modèle. | +| Maintenance | Plus de maintenance pour ajouter du support pour la configuration, et de nouvelles fonctionnalités. Peut aussi nécessiter de fixx des problèmes signalés par les utilisateurs. | Moins de maintenance vu que la plupart des intégrations avec le Hub sont implémentés dans `huggingface_hub` | +| Documentation / Anotation de type | A écrire à la main | Géré partiellement par `huggingface_hub`. | diff --git a/docs/source/fr/installation.md b/docs/source/fr/installation.md index eb4b2ee9b4..fe3a279102 100644 --- a/docs/source/fr/installation.md +++ b/docs/source/fr/installation.md @@ -7,7 +7,7 @@ rendered properly in your Markdown viewer. Avant de commencer, vous allez avoir besoin de préparer votre environnement en installant les packages appropriés. -`huggingface_hub` est testée sur **Python 3.8+**. +`huggingface_hub` est testée sur **Python 3.9+**. ## Installation avec pip @@ -48,17 +48,13 @@ Toutefois, certaines fonctionnalités de `huggingface_hub` ne seront pas disponi Vous pouvez installer des dépendances optionnelles via `pip`: ```bash -#Installation des dépendances pour les fonctionnalités spécifiques à Tensorflow. -#/!\ Attention : cette commande n'est pas équivalente à `pip install tensorflow`. -pip install 'huggingface_hub[tensorflow]' - #Installation des dépendances spécifiques à Pytorch et au CLI. pip install 'huggingface_hub[cli,torch]' ``` Voici une liste des dépendances optionnelles dans `huggingface_hub`: - `cli` fournit une interface d'invite de commande plus pratique pour `huggingface_hub`. -- `fastai`, `torch` et `tensorflow` sont des dépendances pour utiliser des fonctionnalités spécifiques à un framework. +- `fastai`, `torch` sont des dépendances pour utiliser des fonctionnalités spécifiques à un framework. - `dev` permet de contribuer à la librairie. Cette dépendance inclut `testing` (pour lancer des tests), `typing` (pour lancer le vérifieur de type) et `quality` (pour lancer des linters). diff --git a/docs/source/hi/_toctree.yml b/docs/source/hi/_toctree.yml index 5b9e412c50..f8b3606536 100644 --- a/docs/source/hi/_toctree.yml +++ b/docs/source/hi/_toctree.yml @@ -6,7 +6,3 @@ title: जल्दी शुरू - local: installation title: इंस्टालेशन -- title: "संकल्पना मार्गदर्शिकाएँ" - sections: - - local: concepts/git_vs_http - title: "संकल्पनाएँ/गिट_बनाम_एचटीटीपी" diff --git a/docs/source/hi/concepts/git_vs_http.md b/docs/source/hi/concepts/git_vs_http.md deleted file mode 100644 index 8a4e2a625a..0000000000 --- a/docs/source/hi/concepts/git_vs_http.md +++ /dev/null @@ -1,30 +0,0 @@ -# Git vs HTTP पैराडाइम - -`huggingface_hub` लाइब्रेरी Hugging Face Hub के साथ आदान-प्रदान करने के लिए एक लाइब्रेरी है, जो git-आधारित repositories (models, datasets या Spaces) का एक संग्रह है। `huggingface_hub` का उपयोग करके Hub तक पहुंचने के दो मुख्य तरीके हैं। - -पहला तरीका, जिसे "git-आधारित" तरीका कहा जाता है, [`Repository`] क्लास द्वारा संचालित है। यह विधि `git` कमांड के चारों ओर एक आवरण का उपयोग करती है जिसमें Hub के साथ आदान-प्रदान करने के लिए विशेष रूप से डिज़ाइन किए गए अतिरिक्त functions हैं। दूसरा विकल्प, जिसे "HTTP-आधारित" तरीका कहा जाता है, [`HfApi`] client का उपयोग करके HTTP requests बनाने में शामिल है। आइए प्रत्येक तरीका के फायदे और नुकसान की जांच करते हैं। - -## Repository: ऐतिहासिक git-आधारित तरीका - -शुरुआत में, `huggingface_hub` मुख्य रूप से [`Repository`] क्लास के चारों ओर बनाया गया था। यह सामान्य `git` कमांड जैसे `"git add"`, `"git commit"`, `"git push"`, `"git tag"`, `"git checkout"`, आदि के लिए Python wrappers प्रदान करता है। - -लाइब्रेरी विवरण सेट करने और बड़ी फाइलों को track करने में भी मदद करती है, जो अक्सर machine learning repositories में उपयोग की जाती हैं। इसके अतिरिक्त, लाइब्रेरी आपको अपनी विधियों को पृष्ठभूमि में कार्यान्वित करने की अनुमति देती है, जो training के दौरान डेटा अपलोड करने के लिए उपयोगी है। - -[`Repository`] का उपयोग करने का मुख्य फायदा यह है कि यह आपको अपनी मशीन पर संपूर्ण repository की एक local copy बनाए रखने की अनुमति देता है। यह एक नुकसान भी हो सकता है क्योंकि इसके लिए आपको इस local copy को लगातार update और maintain करना होता है। यह पारंपरिक software development के समान है जहां प्रत्येक developer अपनी स्वयं की local copy maintain करता है और feature पर काम करते समय changes push करता है। हालांकि, machine learning के संदर्भ में, यह हमेशा आवश्यक नहीं हो सकता क्योंकि users को केवल inference के लिए weights download करने या weights को एक format से दूसरे में convert करने की आवश्यकता हो सकती है, बिना पूरी repository को clone करने की आवश्यकता के। - -> [!WARNING] -> [`Repository`] अब http-आधारित विकल्पों के पक्ष में deprecated है। legacy code में इसकी बड़ी अपनाई जाने के कारण, [`Repository`] का पूर्ण removal केवल `v1.0` release में होगा। - -## HfApi: एक लचीला और सुविधाजनक HTTP client - -[`HfApi`] क्लास को local git repositories का एक विकल्प प्रदान करने के लिए विकसित किया गया था, जो maintain करना मुश्किल हो सकता है, विशेष रूप से बड़े models या datasets के साथ व्यवहार करते समय। [`HfApi`] क्लास git-आधारित तरीकाों की समान functionality प्रदान करती है, जैसे files download और push करना और branches तथा tags बनाना, लेकिन एक local folder की आवश्यकता के बिना जिसे sync में रखना पड़ता है। - -`git` द्वारा पहले से प्रदान की गई functionalities के अलावा, [`HfApi`] क्लास अतिरिक्त features प्रदान करती है, जैसे repos manage करने की क्षमता, efficient reuse के लिए caching का उपयोग करके files download करना, repos और metadata के लिए Hub को search करना, discussions, PRs, और comments जैसी community features तक पहुंच, और Spaces hardware और secrets को configure करना। - -## मुझे क्या उपयोग करना चाहिए? और कब? - -कुल मिलाकर, **HTTP-आधारित तरीका सभी cases में** `huggingface_hub` का उपयोग करने का **अनुशंसित तरीका है**। [`HfApi`] changes को pull और push करने, PRs, tags और branches के साथ काम करने, discussions के साथ interact करने और बहुत कुछ करने की अनुमति देता है। `0.16` release के बाद से, http-आधारित methods भी पृष्ठभूमि में चल सकती हैं, जो [`Repository`] क्लास का अंतिम प्रमुख फायदा था। - -हालांकि, सभी git commands [`HfApi`] के माध्यम से उपलब्ध नहीं हैं। कुछ को कभी भी implement नहीं किया जा सकता है, लेकिन हम हमेशा सुधार करने और gap को बंद करने की कोशिश कर रहे हैं। यदि आपको अपना use case covered नहीं दिखता है, तो कृपया [Github पर एक issue खोलें](https://github.com/huggingface/huggingface_hub)! हम अपने users के साथ और उनके लिए 🤗 ecosystem बनाने में मदद करने के लिए feedback का स्वागत करते हैं। - -git-आधारित [`Repository`] पर http-आधारित [`HfApi`] की यह प्राथमिकता का मतलब यह नहीं है कि git versioning Hugging Face Hub से जल्द ही गायब हो जाएगी। workflows में जहां यह समझ में आता है, वहां `git` commands का locally उपयोग करना हमेशा संभव होगा। \ No newline at end of file diff --git a/docs/source/hi/installation.md b/docs/source/hi/installation.md index 1659e85fd7..91d3702059 100644 --- a/docs/source/hi/installation.md +++ b/docs/source/hi/installation.md @@ -6,7 +6,7 @@ rendered properly in your Markdown viewer. आरंभ करने से पहले, आपको उपयुक्त पैकेज स्थापित करके अपना परिवेश सेटअप करना होगा। -`huggingface_hub` का परीक्षण **Python 3.8+** पर किया गया है। +`huggingface_hub` का परीक्षण **Python 3.9+** पर किया गया है। ## पिप के साथ स्थापित करें @@ -46,17 +46,13 @@ pip install --upgrade huggingface_hub आप `pip` के माध्यम से वैकल्पिक निर्भरताएँ स्थापित कर सकते हैं: ```bash -# Install dependencies for tensorflow-specific features -# /!\ Warning: this is not equivalent to `pip install tensorflow` -pip install 'huggingface_hub[tensorflow]' - # Install dependencies for both torch-specific and CLI-specific features. pip install 'huggingface_hub[cli,torch]' ``` यहां `huggingface_hub` में वैकल्पिक निर्भरताओं की सूची दी गई है: - `cli`: `huggingface_hub` के लिए अधिक सुविधाजनक CLI इंटरफ़ेस प्रदान करें। -- `fastai`, `torch`, `tensorflow`: फ्रेमवर्क-विशिष्ट सुविधाओं को चलाने के लिए निर्भरताएँ। +- `fastai`, `torch`: फ्रेमवर्क-विशिष्ट सुविधाओं को चलाने के लिए निर्भरताएँ। - `dev`: lib में योगदान करने के लिए निर्भरताएँ। इसमें 'परीक्षण' (परीक्षण चलाने के लिए), 'टाइपिंग' (टाइप चेकर चलाने के लिए) और 'गुणवत्ता' (लिंटर चलाने के लिए) शामिल हैं। diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml index 2c7a4da702..e67d69af38 100644 --- a/docs/source/ko/_toctree.yml +++ b/docs/source/ko/_toctree.yml @@ -18,8 +18,6 @@ title: 명령줄 인터페이스(CLI) 사용하기 - local: guides/hf_file_system title: Hf파일시스템 - - local: guides/repository - title: 리포지토리 - local: guides/search title: Hub에서 검색하기 - local: guides/inference @@ -40,10 +38,6 @@ title: 라이브러리 통합 - local: guides/webhooks_server title: 웹훅 서버 -- title: "개념 가이드" - sections: - - local: concepts/git_vs_http - title: Git 대 HTTP 패러다임 - title: "라이브러리 레퍼런스" sections: - local: package_reference/overview @@ -52,8 +46,6 @@ title: 로그인 및 로그아웃 - local: package_reference/environment_variables title: 환경 변수 - - local: package_reference/repository - title: 로컬 및 온라인 리포지토리 관리 - local: package_reference/hf_api title: 허깅페이스 Hub API - local: package_reference/file_download diff --git a/docs/source/ko/concepts/git_vs_http.md b/docs/source/ko/concepts/git_vs_http.md deleted file mode 100644 index c9812cb0f8..0000000000 --- a/docs/source/ko/concepts/git_vs_http.md +++ /dev/null @@ -1,50 +0,0 @@ - - -# Git 대 HTTP 패러다임 - -`huggingface_hub` 라이브러리는 git 기반의 저장소(Models, Datasets 또는 Spaces)로 구성된 Hugging Face Hub과 상호 작용하기 위한 라이브러리입니다. -`huggingface_hub`를 사용하여 Hub에 접근하는 방법은 크게 두 가지입니다. - -첫 번째 접근 방식인 소위 "git 기반" 접근 방식은 [`Repository`] 클래스가 주도합니다. -이 방법은 허브와 상호 작용하도록 특별히 설계된 추가 기능이 있는 `git` 명령에 랩퍼를 사용합니다. -두 번째 방법은 "HTTP 기반" 접근 방식이며, [`HfApi`] 클라이언트를 사용하여 HTTP 요청을 수행합니다. -각 방법의 장단점을 살펴보겠습니다. - -## Repository: 역사적인 Git 기반 접근 방식 - -먼저, `huggingface_hub`는 주로 [`Repository`] 클래스를 기반으로 구축되었습니다. -이 클래스는 `"git add"`, `"git commit"`, `"git push"`, `"git tag"`, `"git checkout"` 등과 같은 일반적인 `git` 명령에 대한 Python 랩퍼를 제공합니다. - -이 라이브러리는 머신러닝 저장소에서 자주 사용되는 큰 파일을 추적하고 자격 증명을 설정하는 데 도움이 됩니다. -또한, 이 라이브러리는 백그라운드에서 메소드를 실행할 수 있어, 훈련 중에 데이터를 업로드할 때 유용합니다. - -로컬 머신에 전체 저장소의 로컬 복사본을 유지할 수 있다는 것은 [`Repository`]를 사용하는 가장 큰 장점입니다. -하지만 동시에 로컬 복사본을 지속적으로 업데이트하고 유지해야 한다는 단점이 될 수도 있습니다. -이는 각 개발자가 자체 로컬 복사본을 유지하고 기능을 개발할 때 변경 사항을 push하는 전통적인 소프트웨어 개발과 유사합니다. -그러나 머신러닝의 경우, 사용자가 전체 저장소를 복제할 필요 없이 추론을 위해 가중치만 다운로드하거나 가중치를 한 형식에서 다른 형식으로 변환하기만 하면 되기 때문에 이런 방식이 항상 필요한 것은 아닙니다. - -> [!WARNING] -> [`Repository`]는 지원이 중단될 예정이므로 HTTP 기반 대안을 사용하는 것을 권장합니다. 기존 코드에서 널리 사용되기 때문에 [`Repository`]의 완전한 제거는 릴리스 `v1.0`에서 이루어질 예정입니다. - -## HfApi: 유연하고 편리한 HTTP 클라이언트 - -[`HfApi`] 클래스는 특히 큰 모델이나 데이터셋을 처리할 때 유지하기 어려운 로컬 git 저장소의 대안으로 개발되었습니다. -[`HfApi`] 클래스는 파일 다운로드 및 push, 브랜치 및 태그 생성과 같은 git 기반 접근 방식과 동일한 기능을 제공하지만, 동기화 상태를 유지해야 하는 로컬 폴더가 필요하지 않습니다. - -[`HfApi`] 클래스는 `git`이 제공하는 기능 외에도 추가적인 기능을 제공합니다. -저장소를 관리하고, 효율적인 재사용을 위해 캐싱을 사용하여 파일을 다운로드하고, Hub에서 저장소 및 메타데이터를 검색하고, 토론, PR 및 코멘트와 같은 커뮤니티 기능에 접근하고, Spaces 하드웨어 및 시크릿을 구성할 수 있습니다. - -## 무엇을 사용해야 하나요? 언제 사용하나요? - -전반적으로, **HTTP 기반 접근 방식은 모든 경우에** `huggingface_hub`를 사용하는 것이 좋습니다. -[`HfApi`]를 사용하면 변경 사항을 pull하고 push하고, PR, 태그 및 브랜치로 작업하고, 토론과 상호 작용하는 등의 작업을 할 수 있습니다. -`0.16` 릴리스부터는 [`Repository`] 클래스의 마지막 주요 장점이었던 http 기반 메소드도 백그라운드에서 실행할 수 있습니다. - -그러나 모든 git 명령이 [`HfApi`]를 통해 사용 가능한 것은 아닙니다. 일부는 구현되지 않을 수도 있지만, 저희는 항상 개선하고 격차를 줄이기 위해 노력하고 있습니다. -사용 사례에 해당되지 않는 경우, [Github에서 이슈](https://github.com/huggingface/huggingface_hub)를 개설해 주세요! -사용자와 함께, 사용자를 위한 🤗 생태계를 구축하는 데 도움이 되는 피드백을 환영합니다. - -git 기반 [`Repository`]보다 http 기반 [`HfApi`]를 선호한다고 해서 Hugging Face Hub에서 git 버전 관리가 바로 사라지는 것은 아닙니다. -워크플로우 상 합당하다면 언제든 로컬에서 `git` 명령을 사용할 수 있습니다. diff --git a/docs/source/ko/guides/cli.md b/docs/source/ko/guides/cli.md index a8096f948b..af88b1ac99 100644 --- a/docs/source/ko/guides/cli.md +++ b/docs/source/ko/guides/cli.md @@ -448,7 +448,6 @@ Copy-and-paste the text below in your GitHub issue. - Who am I ?: Wauplin - Configured git credential helpers: store - FastAI: N/A -- Tensorflow: 2.11.0 - Torch: 1.12.1 - Jinja2: 3.1.2 - Graphviz: 0.20.1 diff --git a/docs/source/ko/guides/inference.md b/docs/source/ko/guides/inference.md index 55a89a7801..7444e2b306 100644 --- a/docs/source/ko/guides/inference.md +++ b/docs/source/ko/guides/inference.md @@ -8,7 +8,6 @@ rendered properly in your Markdown viewer. - [추론 API](https://huggingface.co/docs/api-inference/index): Hugging Face의 인프라에서 가속화된 추론을 실행할 수 있는 서비스로 무료로 제공됩니다. 이 서비스는 추론을 시작하고 다양한 모델을 테스트하며 AI 제품의 프로토타입을 만드는 빠른 방법입니다. - [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index): 모델을 제품 환경에 쉽게 배포할 수 있는 제품입니다. 사용자가 선택한 클라우드 환경에서 완전 관리되는 전용 인프라에서 Hugging Face를 통해 추론이 실행됩니다. -이러한 서비스들은 [`InferenceClient`] 객체를 사용하여 호출할 수 있습니다. 이는 이전의 [`InferenceApi`] 클라이언트를 대체하는 역할을 하며, 작업에 대한 특별한 지원을 추가하고 [추론 API](https://huggingface.co/docs/api-inference/index) 및 [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index)에서 추론 작업을 처리합니다. 새 클라이언트로의 마이그레이션에 대한 자세한 내용은 [레거시 InferenceAPI 클라이언트](#legacy-inferenceapi-client) 섹션을 참조하세요. > [!TIP] > [`InferenceClient`]는 API에 HTTP 호출을 수행하는 Python 클라이언트입니다. HTTP 호출을 원하는 툴을 이용하여 직접 사용하려면 (curl, postman 등) [추론 API](https://huggingface.co/docs/api-inference/index) 또는 [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index) 문서 페이지를 참조하세요. @@ -77,35 +76,35 @@ text-to-image 작업을 시작해보겠습니다. [`InferenceClient`]의 목표는 Hugging Face 모델에서 추론을 실행하기 위한 가장 쉬운 인터페이스를 제공하는 것입니다. 이는 가장 일반적인 작업들을 지원하는 간단한 API를 가지고 있습니다. 현재 지원되는 작업 목록은 다음과 같습니다: -| 도메인 | 작업 | 지원 여부 | 문서 | -|--------|--------------------------------|--------------|------------------------------------| -| 오디오 | [오디오 분류](https://huggingface.co/tasks/audio-classification) | ✅ | [`~InferenceClient.audio_classification`] | -| 오디오 | [오디오 투 오디오](https://huggingface.co/tasks/audio-to-audio) | ✅ | [`~InferenceClient.audio_to_audio`] | -| | [자동 음성 인식](https://huggingface.co/tasks/automatic-speech-recognition) | ✅ | [`~InferenceClient.automatic_speech_recognition`] | -| | [텍스트 투 스피치](https://huggingface.co/tasks/text-to-speech) | ✅ | [`~InferenceClient.text_to_speech`] | -| 컴퓨터 비전 | [이미지 분류](https://huggingface.co/tasks/image-classification) | ✅ | [`~InferenceClient.image_classification`] | -| | [이미지 분할](https://huggingface.co/tasks/image-segmentation) | ✅ | [`~InferenceClient.image_segmentation`] | -| | [이미지 투 이미지](https://huggingface.co/tasks/image-to-image) | ✅ | [`~InferenceClient.image_to_image`] | -| | [이미지 투 텍스트](https://huggingface.co/tasks/image-to-text) | ✅ | [`~InferenceClient.image_to_text`] | -| | [객체 탐지](https://huggingface.co/tasks/object-detection) | ✅ | [`~InferenceClient.object_detection`] | -| | [텍스트 투 이미지](https://huggingface.co/tasks/text-to-image) | ✅ | [`~InferenceClient.text_to_image`] | -| | [제로샷 이미지 분류](https://huggingface.co/tasks/zero-shot-image-classification) | ✅ | [`~InferenceClient.zero_shot_image_classification`] | -| 멀티모달 | [문서 질의 응답](https://huggingface.co/tasks/document-question-answering) | ✅ | [`~InferenceClient.document_question_answering`] | -| | [시각적 질의 응답](https://huggingface.co/tasks/visual-question-answering) | ✅ | [`~InferenceClient.visual_question_answering`] | -| 자연어 처리 | [대화형](https://huggingface.co/tasks/conversational) | ✅ | [`~InferenceClient.conversational`] | -| | [특성 추출](https://huggingface.co/tasks/feature-extraction) | ✅ | [`~InferenceClient.feature_extraction`] | -| | [마스크 채우기](https://huggingface.co/tasks/fill-mask) | ✅ | [`~InferenceClient.fill_mask`] | -| | [질의 응답](https://huggingface.co/tasks/question-answering) | ✅ | [`~InferenceClient.question_answering`] | -| | [문장 유사도](https://huggingface.co/tasks/sentence-similarity) | ✅ | [`~InferenceClient.sentence_similarity`] | -| | [요약](https://huggingface.co/tasks/summarization) | ✅ | [`~InferenceClient.summarization`] | -| | [테이블 질의 응답](https://huggingface.co/tasks/table-question-answering) | ✅ | [`~InferenceClient.table_question_answering`] | -| | [텍스트 분류](https://huggingface.co/tasks/text-classification) | ✅ | [`~InferenceClient.text_classification`] | -| | [텍스트 생성](https://huggingface.co/tasks/text-generation) | ✅ | [`~InferenceClient.text_generation`] | -| | [토큰 분류](https://huggingface.co/tasks/token-classification) | ✅ | [`~InferenceClient.token_classification`] | -| | [번역](https://huggingface.co/tasks/translation) | ✅ | [`~InferenceClient.translation`] | -| | [제로샷 분류](https://huggingface.co/tasks/zero-shot-classification) | ✅ | [`~InferenceClient.zero_shot_classification`] | -| 타블로 | [타블로 작업 분류](https://huggingface.co/tasks/tabular-classification) | ✅ | [`~InferenceClient.tabular_classification`] | -| | [타블로 회귀](https://huggingface.co/tasks/tabular-regression) | ✅ | [`~InferenceClient.tabular_regression`] | +| 도메인 | 작업 | 지원 여부 | 문서 | +| ----------- | --------------------------------------------------------------------------------- | --------- | --------------------------------------------------- | +| 오디오 | [오디오 분류](https://huggingface.co/tasks/audio-classification) | ✅ | [`~InferenceClient.audio_classification`] | +| 오디오 | [오디오 투 오디오](https://huggingface.co/tasks/audio-to-audio) | ✅ | [`~InferenceClient.audio_to_audio`] | +| | [자동 음성 인식](https://huggingface.co/tasks/automatic-speech-recognition) | ✅ | [`~InferenceClient.automatic_speech_recognition`] | +| | [텍스트 투 스피치](https://huggingface.co/tasks/text-to-speech) | ✅ | [`~InferenceClient.text_to_speech`] | +| 컴퓨터 비전 | [이미지 분류](https://huggingface.co/tasks/image-classification) | ✅ | [`~InferenceClient.image_classification`] | +| | [이미지 분할](https://huggingface.co/tasks/image-segmentation) | ✅ | [`~InferenceClient.image_segmentation`] | +| | [이미지 투 이미지](https://huggingface.co/tasks/image-to-image) | ✅ | [`~InferenceClient.image_to_image`] | +| | [이미지 투 텍스트](https://huggingface.co/tasks/image-to-text) | ✅ | [`~InferenceClient.image_to_text`] | +| | [객체 탐지](https://huggingface.co/tasks/object-detection) | ✅ | [`~InferenceClient.object_detection`] | +| | [텍스트 투 이미지](https://huggingface.co/tasks/text-to-image) | ✅ | [`~InferenceClient.text_to_image`] | +| | [제로샷 이미지 분류](https://huggingface.co/tasks/zero-shot-image-classification) | ✅ | [`~InferenceClient.zero_shot_image_classification`] | +| 멀티모달 | [문서 질의 응답](https://huggingface.co/tasks/document-question-answering) | ✅ | [`~InferenceClient.document_question_answering`] | +| | [시각적 질의 응답](https://huggingface.co/tasks/visual-question-answering) | ✅ | [`~InferenceClient.visual_question_answering`] | +| 자연어 처리 | [대화형](https://huggingface.co/tasks/conversational) | ✅ | [`~InferenceClient.conversational`] | +| | [특성 추출](https://huggingface.co/tasks/feature-extraction) | ✅ | [`~InferenceClient.feature_extraction`] | +| | [마스크 채우기](https://huggingface.co/tasks/fill-mask) | ✅ | [`~InferenceClient.fill_mask`] | +| | [질의 응답](https://huggingface.co/tasks/question-answering) | ✅ | [`~InferenceClient.question_answering`] | +| | [문장 유사도](https://huggingface.co/tasks/sentence-similarity) | ✅ | [`~InferenceClient.sentence_similarity`] | +| | [요약](https://huggingface.co/tasks/summarization) | ✅ | [`~InferenceClient.summarization`] | +| | [테이블 질의 응답](https://huggingface.co/tasks/table-question-answering) | ✅ | [`~InferenceClient.table_question_answering`] | +| | [텍스트 분류](https://huggingface.co/tasks/text-classification) | ✅ | [`~InferenceClient.text_classification`] | +| | [텍스트 생성](https://huggingface.co/tasks/text-generation) | ✅ | [`~InferenceClient.text_generation`] | +| | [토큰 분류](https://huggingface.co/tasks/token-classification) | ✅ | [`~InferenceClient.token_classification`] | +| | [번역](https://huggingface.co/tasks/translation) | ✅ | [`~InferenceClient.translation`] | +| | [제로샷 분류](https://huggingface.co/tasks/zero-shot-classification) | ✅ | [`~InferenceClient.zero_shot_classification`] | +| 타블로 | [타블로 작업 분류](https://huggingface.co/tasks/tabular-classification) | ✅ | [`~InferenceClient.tabular_classification`] | +| | [타블로 회귀](https://huggingface.co/tasks/tabular-regression) | ✅ | [`~InferenceClient.tabular_regression`] | > [!TIP] > 각 작업에 대해 더 자세히 알고 싶거나 사용 방법 및 각 작업에 대한 가장 인기 있는 모델을 알아보려면 [Tasks](https://huggingface.co/tasks) 페이지를 확인하세요. @@ -175,70 +174,3 @@ pip install --upgrade huggingface_hub[inference] >>> client.image_classification("https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Cute_dog.jpg/320px-Cute_dog.jpg") [{'score': 0.9779096841812134, 'label': 'Blenheim spaniel'}, ...] ``` - -## 레거시 InferenceAPI 클라이언트[[legacy-inferenceapi-client]] - -[`InferenceClient`]는 레거시 [`InferenceApi`] 클라이언트를 대체하여 작동합니다. 특정 작업에 대한 지원을 제공하고 [추론 API](https://huggingface.co/docs/api-inference/index) 및 [추론 엔드포인트](https://huggingface.co/docs/inference-endpoints/index)에서 추론을 처리합니다. - -아래는 [`InferenceApi`]에서 [`InferenceClient`]로 마이그레이션하는 데 도움이 되는 간단한 가이드입니다. - -### 초기화[[initialization]] - -변경 전: - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="bert-base-uncased", token=API_TOKEN) -``` - -변경 후: - -```python ->>> from huggingface_hub import InferenceClient ->>> inference = InferenceClient(model="bert-base-uncased", token=API_TOKEN) -``` - -### 특정 작업에서 실행하기[[run-on-a-specific-task]] - -변경 전: - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="paraphrase-xlm-r-multilingual-v1", task="feature-extraction") ->>> inference(...) -``` - -변경 후: - -```python ->>> from huggingface_hub import InferenceClient ->>> inference = InferenceClient() ->>> inference.feature_extraction(..., model="paraphrase-xlm-r-multilingual-v1") -``` - -> [!TIP] -> 위의 방법은 코드를 [`InferenceClient`]에 맞게 조정하는 권장 방법입니다. 이렇게 하면 `feature_extraction`과 같이 작업에 특화된 메소드를 활용할 수 있습니다. - -### 사용자 정의 요청 실행[[run-custom-request]] - -변경 전: - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="bert-base-uncased") ->>> inference(inputs="The goal of life is [MASK].") -[{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}] -``` - -### 매개변수와 함께 실행하기[[run-with-parameters]] - -변경 전: - -```python ->>> from huggingface_hub import InferenceApi ->>> inference = InferenceApi(repo_id="typeform/distilbert-base-uncased-mnli") ->>> inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!" ->>> params = {"candidate_labels":["refund", "legal", "faq"]} ->>> inference(inputs, params) -{'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]} -``` diff --git a/docs/source/ko/guides/integrations.md b/docs/source/ko/guides/integrations.md index f0946bc298..b595a3c630 100644 --- a/docs/source/ko/guides/integrations.md +++ b/docs/source/ko/guides/integrations.md @@ -81,7 +81,7 @@ def push_to_hub(model: MyModelClass, repo_name: str) -> None: - `token`: 개인 리포지토리에서 다운로드하기 위한 토큰 - `revision`: 특정 브랜치에서 다운로드하기 위한 리비전 - `cache_dir`: 특정 디렉터리에 파일을 캐시하기 위한 디렉터리 -- `force_download`/`resume_download`/`local_files_only`: 캐시를 재사용할 것인지 여부를 결정하는 매개변수 +- `force_download`/`local_files_only`: 캐시를 재사용할 것인지 여부를 결정하는 매개변수 - `proxies`: HTTP 세션 구성 모델을 푸시할 때는 유사한 매개변수가 지원됩니다: @@ -211,8 +211,7 @@ class PyTorchModelHubMixin(ModelHubMixin): revision: str, cache_dir: str, force_download: bool, - proxies: Optional[Dict], - resume_download: bool, + proxies: Optional[dict], local_files_only: bool, token: Union[str, bool, None], map_location: str = "cpu", # 추가 인자 @@ -232,8 +231,6 @@ class PyTorchModelHubMixin(ModelHubMixin): revision=revision, cache_dir=cache_dir, force_download=force_download, - proxies=proxies, - resume_download=resume_download, token=token, local_files_only=local_files_only, ) @@ -393,11 +390,11 @@ class VoiceCraft( 두 가지 접근 방법에 대한 장단점을 간단히 정리해보겠습니다. 아래 표는 단순히 예시일 뿐입니다. 각자 다른 프레임워크에는 고려해야 할 특정 사항이 있을 수 있습니다. 이 가이드는 통합을 다루는 아이디어와 지침을 제공하기 위한 것입니다. 언제든지 궁금한 점이 있으면 문의해 주세요! -| 통합 | helpers 사용 시 | [`ModelHubMixin`] 사용 시 | -|:---:|:---:|:---:| -| 사용자 경험 | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | -| 유연성 | 매우 유연합니다.
구현을 완전히 제어합니다. | 유연성이 떨어집니다.
프레임워크에는 모델 클래스가 있어야 합니다. | -| 유지 관리 | 구성 및 새로운 기능에 대한 지원을 추가하기 위한 유지 관리가 더 필요합니다. 사용자가 보고한 문제를 해결해야할 수도 있습니다. | Hub와의 대부분의 상호 작용이 `huggingface_hub`에서 구현되므로 유지 관리가 줄어듭니다. | -| 문서화 / 타입 주석 | 수동으로 작성해야 합니다. | `huggingface_hub`에서 부분적으로 처리됩니다. | -| 다운로드 횟수 표시기 | 수동으로 처리해야 합니다. | 클래스에 `config` 속성이 있다면 기본적으로 활성화됩니다. | -| 모델 카드 | 수동으로 처리해야 합니다. | library_name, tags 등을 활용하여 기본적으로 생성됩니다. | +| 통합 | helpers 사용 시 | [`ModelHubMixin`] 사용 시 | +| :------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------: | +| 사용자 경험 | `model = load_from_hub(...)`
`push_to_hub(model, ...)` | `model = MyModel.from_pretrained(...)`
`model.push_to_hub(...)` | +| 유연성 | 매우 유연합니다.
구현을 완전히 제어합니다. | 유연성이 떨어집니다.
프레임워크에는 모델 클래스가 있어야 합니다. | +| 유지 관리 | 구성 및 새로운 기능에 대한 지원을 추가하기 위한 유지 관리가 더 필요합니다. 사용자가 보고한 문제를 해결해야할 수도 있습니다. | Hub와의 대부분의 상호 작용이 `huggingface_hub`에서 구현되므로 유지 관리가 줄어듭니다. | +| 문서화 / 타입 주석 | 수동으로 작성해야 합니다. | `huggingface_hub`에서 부분적으로 처리됩니다. | +| 다운로드 횟수 표시기 | 수동으로 처리해야 합니다. | 클래스에 `config` 속성이 있다면 기본적으로 활성화됩니다. | +| 모델 카드 | 수동으로 처리해야 합니다. | library_name, tags 등을 활용하여 기본적으로 생성됩니다. | diff --git a/docs/source/ko/guides/repository.md b/docs/source/ko/guides/repository.md deleted file mode 100644 index 343dec799a..0000000000 --- a/docs/source/ko/guides/repository.md +++ /dev/null @@ -1,217 +0,0 @@ - - -# 리포지토리 생성과 관리[[create-and-manage-a-repository]] - -Hugging Face Hub는 Git 리포지토리 모음입니다. [Git](https://git-scm.com/)은 협업을 할 때 여러 프로젝트 버전을 쉽게 관리하기 위해 널리 사용되는 소프트웨어 개발 도구입니다. 이 가이드에서는 Hub의 리포지토리 사용법인 다음 내용을 다룹니다: - -- 리포지토리 생성과 삭제. -- 태그 및 브랜치 관리. -- 리포지토리 이름 변경. -- 리포지토리 공개 여부. -- 리포지토리 복사본 관리. - -> [!WARNING] -> GitLab/GitHub/Bitbucket과 같은 플랫폼을 사용해 본 경험이 있다면, 모델 리포지토리를 관리하기 위해 `git` CLI를 사용해 git 리포지토리를 클론(`git clone`)하고 변경 사항을 커밋(`git add, git commit`)하고 커밋한 내용을 푸시(`git push`) 하는것이 가장 먼저 떠오를 것입니다. 이 명령어들은 Hugging Face Hub에서도 사용할 수 있습니다. 하지만 소프트웨어 엔지니어링과 머신러닝은 동일한 요구 사항과 워크플로우를 공유하지 않습니다. 모델 리포지토리는 다양한 프레임워크와 도구를 위한 대규모 모델 가중치 파일을 유지관리 할 수 있으므로, 리포지토리를 복제하면 대규모 로컬 폴더를 유지관리하고 막대한 크기의 파일을 다루게 될 수 있습니다. 결과적으로 Hugging Face의 커스텀 HTTP 방법을 사용하는 것이 더욱 효율적일 수 있습니다. 더 자세한 내용은 [Git vs HTTP paradigm](../concepts/git_vs_http) 문서를 참조하세요. - -Hub에 리포지토리를 생성하고 관리하려면, 로그인이 되어 있어야 합니다. 로그인이 안 되어있다면 [이 문서](../quick-start#authentication)를 참고해 주세요. 이 가이드에서는 로그인이 되어있다는 가정하에 진행됩니다. - -## 리포지토리 생성 및 삭제[[repo-creation-and-deletion]] - -첫 번째 단계는 어떻게 리포지토리를 생성하고 삭제하는지를 알아야 합니다. 사용자 이름 네임스페이스 아래에 소유한 리포지토리 또는 쓰기 권한이 있는 조직의 리포지토리만 관리할 수 있습니다. - -### 리포지토리 생성[[create-a-repository]] - -[`create_repo`] 함수로 함께 빈 리포지토리를 만들고 `repo_id` 매개변수를 사용하여 이름을 정하세요. `repo_id`는 사용자 이름 또는 조직 이름 뒤에 리포지토리 이름이 따라옵니다: `username_or_org/repo_name`. - -```py ->>> from huggingface_hub import create_repo ->>> create_repo("lysandre/test-model") -'https://huggingface.co/lysandre/test-model' -``` - -기본적으로 [`create_repo`]는 모델 리포지토리를 만듭니다. 하지만 `repo_type` 매개변수를 사용하여 다른 유형의 리포지토리를 지정할 수 있습니다. 예를 들어 데이터셋 리포지토리를 만들고 싶다면: - -```py ->>> from huggingface_hub import create_repo ->>> create_repo("lysandre/test-dataset", repo_type="dataset") -'https://huggingface.co/datasets/lysandre/test-dataset' -``` - -리포지토리를 만들 때, `private` 매개변수를 사용하여 가시성을 설정할 수 있습니다. - -```py ->>> from huggingface_hub import create_repo ->>> create_repo("lysandre/test-private", private=True) -``` - -추후 리포지토리 가시성을 변경하고 싶다면, [`update_repo_settings`] 함수를 이용해 바꿀 수 있습니다. - -### 리포지토리 삭제[[delete-a-repository]] - -[`delete_repo`]를 사용하여 리포지토리를 삭제할 수 있습니다. 리포지토리를 삭제하기 전에 신중히 결정하세요. 왜냐하면, 삭제하고 나서 다시 되돌릴 수 없는 프로세스이기 때문입니다! - -삭제하려는 리포지토리의 `repo_id`를 지정하세요: - -```py ->>> delete_repo(repo_id="lysandre/my-corrupted-dataset", repo_type="dataset") -``` - -### 리포지토리 복제(Spaces 전용)[[duplicate-a-repository-only-for-spaces]] - -가끔 다른 누군가의 리포지토리를 복사하여, 상황에 맞게 수정하고 싶을 때가 있습니다. 이는 [`duplicate_space`]를 사용하여 Space에 복사할 수 있습니다. 이 함수를 사용하면 리포지토리 전체를 복제할 수 있습니다. 그러나 여전히 하드웨어, 절전 시간, 리포지토리, 변수 및 비밀번호와 같은 자체 설정을 구성해야 합니다. 자세한 내용은 [Manage your Space](./manage-spaces) 문서를 참조하십시오. - -```py ->>> from huggingface_hub import duplicate_space ->>> duplicate_space("multimodalart/dreambooth-training", private=False) -RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...) -``` - -## 파일 다운로드와 업로드[[upload-and-download-files]] - -이제 리포지토리를 생성했으므로, 변경 사항을 푸시하고 파일을 다운로드하는 것에 관심이 있을 것입니다. - -이 두 가지 주제는 각각 자체 가이드가 필요합니다. 리포지토리 사용하는 방법에 대해 알아보려면 [업로드](./upload) 및 [다운로드](./download) 문서를 참조하세요. - -## 브랜치와 태그[[branches-and-tags]] - -Git 리포지토리는 동일한 리포지토리의 다른 버전을 저장하기 위해 브랜치들을 사용합니다. 태그는 버전을 출시할 때와 같이 리포지토리의 특정 상태를 표시하는 데 사용될 수도 있습니다. 일반적으로 브랜치와 태그는 [git 참조](https://git-scm.com/book/en/v2/Git-Internals-Git-References) -로 참조됩니다. - -### 브랜치 생성과 태그[[create-branches-and-tags]] - -[`create_branch`]와 [`create_tag`]를 이용하여 새로운 브랜치와 태그를 생성할 수 있습니다. - -```py ->>> from huggingface_hub import create_branch, create_tag - -# `main` 브랜치를 기반으로 Space 저장소에 새 브랜치를 생성합니다. ->>> create_branch("Matthijs/speecht5-tts-demo", repo_type="space", branch="handle-dog-speaker") - -# `v0.1-release` 브랜치를 기반으로 Dataset 저장소에 태그를 생성합니다. ->>> create_tag("bigcode/the-stack", repo_type="dataset", revision="v0.1-release", tag="v0.1.1", tag_message="Bump release version.") -``` - -같은 방식으로 [`delete_branch`]와 [`delete_tag`] 함수를 사용하여 브랜치 또는 태그를 삭제할 수 있습니다. - -### 모든 브랜치와 태그 나열[[list-all-branches-and-tags]] - -[`list_repo_refs`]를 사용하여 리포지토리로부터 현재 존재하는 git 참조를 나열할 수 있습니다: - -```py ->>> from huggingface_hub import list_repo_refs ->>> list_repo_refs("bigcode/the-stack", repo_type="dataset") -GitRefs( - branches=[ - GitRefInfo(name='main', ref='refs/heads/main', target_commit='18edc1591d9ce72aa82f56c4431b3c969b210ae3'), - GitRefInfo(name='v1.1.a1', ref='refs/heads/v1.1.a1', target_commit='f9826b862d1567f3822d3d25649b0d6d22ace714') - ], - converts=[], - tags=[ - GitRefInfo(name='v1.0', ref='refs/tags/v1.0', target_commit='c37a8cd1e382064d8aced5e05543c5f7753834da') - ] -) -``` - -## 리포지토리 설정 변경[[change-repository-settings]] - -리포지토리는 구성할 수 있는 몇 가지 설정이 있습니다. 대부분의 경우 브라우저의 리포지토리 설정 페이지에서 직접 설정할 것입니다. 설정을 바꾸려면 리포지토리에 대한 쓰기 액세스 권한이 있어야 합니다(사용자 리포지토리거나, 조직의 구성원이어야 함). 이 주제에서는 `huggingface_hub`를 사용하여 프로그래밍 방식으로 구성할 수 있는 설정을 알아보겠습니다. - -Spaces를 위한 특정 설정들(하드웨어, 환경변수 등)을 구성하기 위해서는 [Manage your Spaces](../guides/manage-spaces) 문서를 참조하세요. - -### 가시성 업데이트[[update-visibility]] - -리포지토리는 공개 또는 비공개로 설정할 수 있습니다. 비공개 리포지토리는 해당 저장소의 사용자 혹은 소속된 조직의 구성원만 볼 수 있습니다. 다음과 같이 리포지토리를 비공개로 변경할 수 있습니다. - -```py ->>> from huggingface_hub import update_repo_settings ->>> update_repo_settings(repo_id=repo_id, private=True) -``` - -### 리포지토리 이름 변경[[rename-your-repository]] - -[`move_repo`]를 사용하여 Hub에 있는 리포지토리 이름을 변경할 수 있습니다. 이 함수를 사용하여 개인에서 조직 리포지토리로 이동할 수도 있습니다. 이렇게 하면 [일부 제한 사항](https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo)이 있으므로 주의해야 합니다. 예를 들어, 다른 사용자에게 리포지토리를 이전할 수는 없습니다. - -```py ->>> from huggingface_hub import move_repo ->>> move_repo(from_id="Wauplin/cool-model", to_id="huggingface/cool-model") -``` - -## 리포지토리의 로컬 복사본 관리[[manage-a-local-copy-of-your-repository]] - -위에 설명한 모든 작업은 HTTP 요청을 사용하여 작업할 수 있습니다. 그러나 경우에 따라 로컬 복사본을 가지고 익숙한 Git 명령어를 사용하여 상호 작용하는 것이 편리할 수 있습니다. - -[`Repository`] 클래스는 Git 명령어와 유사한 기능을 제공하는 함수를 사용하여 Hub의 파일 및 리포지토리와 상호 작용할 수 있습니다. 이는 이미 알고 있고 좋아하는 Git 및 Git-LFS 방법을 사용하는 래퍼(wrapper)입니다. 시작하기 전에 Git-LFS가 설치되어 있는지 확인하세요([여기서](https://git-lfs.github.com/) 설치 지침을 확인할 수 있습니다). - -> [!WARNING] -> [`Repository`]는 [`HfApi`]에 구현된 HTTP 기반 대안을 선호하여 중단되었습니다. 아직 많은 레거시 코드에서 사용되고 있기 때문에 [`Repository`]가 완전히 제거되는 건 `v1.0` 릴리스에서만 이루어집니다. 자세한 내용은 [해당 설명 페이지](./concepts/git_vs_http)를 참조하세요. - -### 로컬 리포지토리 사용[[use-a-local-repository]] - -로컬 리포지토리 경로를 사용하여 [`Repository`] 객체를 생성하세요: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="//") -``` - -### 복제[[clone]] - -`clone_from` 매개변수는 Hugging Face 리포지토리 ID에서 로컬 디렉터리로 리포지토리를 복제합니다. 이때 `local_dir` 매개변수를 사용하여 로컬 디렉터리에 저장합니다: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="w2v2", clone_from="facebook/wav2vec2-large-960h-lv60") -``` - -`clone_from`은 URL을 사용해 리포지토리를 복제할 수 있습니다. - -```py ->>> repo = Repository(local_dir="huggingface-hub", clone_from="https://huggingface.co/facebook/wav2vec2-large-960h-lv60") -``` - -`clone_from` 매개변수를 [`create_repo`]와 결합하여 리포지토리를 만들고 복제할 수 있습니다. - -```py ->>> repo_url = create_repo(repo_id="repo_name") ->>> repo = Repository(local_dir="repo_local_path", clone_from=repo_url) -``` - -리포지토리를 복제할 때 `git_user` 및 `git_email` 매개변수를 지정함으로써 복제한 리포지토리에 Git 사용자 이름과 이메일을 설정할 수 있습니다. 사용자가 해당 리포지토리에 커밋하면 Git은 커밋 작성자를 인식합니다. - -```py ->>> repo = Repository( -... "my-dataset", -... clone_from="/", -... token=True, -... repo_type="dataset", -... git_user="MyName", -... git_email="me@cool.mail" -... ) -``` - -### 브랜치[[branch]] - -브랜치는 현재 코드와 파일에 영향을 미치지 않으면서 협업과 실험에 중요합니다.[`~Repository.git_checkout`]을 사용하여 브랜치 간에 전환할 수 있습니다. 예를 들어, `branch1`에서 `branch2`로 전환하려면: - -```py ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="huggingface-hub", clone_from="/", revision='branch1') ->>> repo.git_checkout("branch2") -``` - -### 끌어오기[[pull]] - -[`~Repository.git_pull`]은 원격 리포지토리로부터의 변경사항을 현재 로컬 브랜치에 업데이트하게 합니다. - -```py ->>> from huggingface_hub import Repository ->>> repo.git_pull() -``` - -브랜치가 원격에서의 새 커밋으로 업데이트 된 후에 로컬 커밋을 수행하고자 한다면 `rebase=True`를 설정하세요: - -```py ->>> repo.git_pull(rebase=True) -``` diff --git a/docs/source/ko/guides/upload.md b/docs/source/ko/guides/upload.md index a55d14c646..13de9b04f8 100644 --- a/docs/source/ko/guides/upload.md +++ b/docs/source/ko/guides/upload.md @@ -4,12 +4,7 @@ rendered properly in your Markdown viewer. # Hub에 파일 업로드하기[[upload-files-to-the-hub]] -파일과 작업물을 공유하는 것은 Hub의 주요 특성 중 하나입니다. `huggingface_hub`는 Hub에 파일을 업로드하기 위한 몇 가지 옵션을 제공합니다. 이러한 기능을 단독으로 사용하거나 라이브러리에 통합하여 해당 라이브러리의 사용자가 Hub와 더 편리하게 상호작용할 수 있도록 도울 수 있습니다. 이 가이드에서는 파일을 푸시하는 다양한 방법에 대해 설명합니다: - -- Git을 사용하지 않고 푸시하기. -- [Git LFS](https://git-lfs.github.com/)를 사용하여 매우 큰 파일을 푸시하기. -- `commit` 컨텍스트 매니저를 사용하여 푸시하기. -- [`~Repository.push_to_hub`] 함수를 사용하여 푸시하기. +파일과 작업물을 공유하는 것은 Hub의 주요 특성 중 하나입니다. `huggingface_hub`는 Hub에 파일을 업로드하기 위한 몇 가지 옵션을 제공합니다. 이러한 기능을 단독으로 사용하거나 라이브러리에 통합하여 해당 라이브러리의 사용자가 Hub와 더 편리하게 상호작용할 수 있도록 도울 수 있습니다. Hub에 파일을 업로드 하려면 허깅페이스 계정으로 로그인해야 합니다. 인증에 대한 자세한 내용은 [이 페이지](../quick-start#authentication)를 참조해 주세요. @@ -420,115 +415,3 @@ Hub에서 리포지토리를 구성하는 방법에 대한 모범 사례는 [리 > `hf_transfer`는 고급 사용자 도구입니다! > 테스트 및 프로덕션 준비가 완료되었지만, 고급 오류 처리나 프록시와 같은 사용자 친화적인 기능이 부족합니다. > 자세한 내용은 [이 섹션](https://huggingface.co/docs/huggingface_hub/hf_transfer)을 참조하세요. - -## (레거시) Git LFS로 파일 업로드하기[[legacy-upload-files-with-git-lfs]] - -위에서 설명한 모든 방법은 Hub의 API를 사용하여 파일을 업로드하며, 이는 Hub에 파일을 업로드하는 데 권장되는 방법입니다. -이뿐만 아니라 로컬 리포지토리를 관리하기 위하여 git 도구의 래퍼인 [`Repository`]또한 제공합니다. - -> [!WARNING] -> [`Repository`]는 공식적으로 지원 종료된 것은 아니지만, 가급적이면 위에서 설명한 HTTP 기반 방법들을 사용할 것을 권장합니다. -> 이 권장 사항에 대한 자세한 내용은 HTTP 기반 방식과 Git 기반 방식 간의 핵심적인 차이점을 설명하는 [이 가이드](../concepts/git_vs_http)를 참조하세요. - -Git LFS는 10MB보다 큰 파일을 자동으로 처리합니다. 하지만 매우 큰 파일(5GB 이상)의 경우, Git LFS용 사용자 지정 전송 에이전트를 설치해야 합니다: - -```bash -hf lfs-enable-largefiles -``` - -매우 큰 파일이 있는 각 리포지토리에 대해 이 옵션을 설치해야 합니다. -설치가 완료되면 5GB보다 큰 파일을 푸시할 수 있습니다. - -### 커밋 컨텍스트 관리자[[commit-context-manager]] - -`commit` 컨텍스트 관리자는 가장 일반적인 네 가지 Git 명령인 pull, add, commit, push를 처리합니다. -`git-lfs`는 10MB보다 큰 파일을 자동으로 추적합니다. -다음 예제에서는 `commit` 컨텍스트 관리자가 다음과 같은 작업을 수행합니다: - -1. `text-files` 리포지토리에서 pull. -2. `file.txt`에 변경 내용을 add. -3. 변경 내용을 commit. -4. 변경 내용을 `text-files` 리포지토리에 push. - -```python ->>> from huggingface_hub import Repository ->>> with Repository(local_dir="text-files", clone_from="/text-files").commit(commit_message="My first file :)"): -... with open("file.txt", "w+") as f: -... f.write(json.dumps({"hey": 8})) -``` - -다음은 `commit` 컨텍스트 관리자를 사용하여 파일을 저장하고 리포지토리에 업로드하는 방법의 또 다른 예입니다: - -```python ->>> import torch ->>> model = torch.nn.Transformer() ->>> with Repository("torch-model", clone_from="/torch-model", token=True).commit(commit_message="My cool model :)"): -... torch.save(model.state_dict(), "model.pt") -``` - -커밋을 비동기적으로 푸시하려면 `blocking=False`를 설정하세요. -커밋을 푸시하는 동안 스크립트를 계속 실행하고 싶을 때 논 블로킹 동작이 유용합니다. - -```python ->>> with repo.commit(commit_message="My cool model :)", blocking=False) -``` - -`command_queue` 메서드로 푸시 상태를 확인할 수 있습니다: - -```python ->>> last_command = repo.command_queue[-1] ->>> last_command.status -``` - -가능한 상태는 아래 표를 참조하세요: - -| 상태 | 설명 | -| -------- | ----------------------------- | -| -1 | 푸시가 진행 중입니다. | -| 0 | 푸시가 성공적으로 완료되었습니다.| -| Non-zero | 오류가 발생했습니다. | - -`blocking=False`인 경우, 명령이 추적되며 스크립트에서 다른 오류가 발생하더라도 모든 푸시가 완료된 경우에만 스크립트가 종료됩니다. -푸시 상태를 확인하는 데 유용한 몇 가지 추가 명령은 다음과 같습니다: - -```python -# 오류를 검사합니다. ->>> last_command.stderr - -# 푸시 진행여부를 확인합니다. ->>> last_command.is_done - -# 푸시 명령의 에러여부를 파악합니다. ->>> last_command.failed -``` - -### push_to_hub[[pushtohub]] - -[`Repository`] 클래스에는 파일을 추가하고 커밋한 후 리포지토리로 푸시하는 [`~Repository.push_to_hub`] 함수가 있습니다. [`~Repository.push_to_hub`]는 `commit` 컨텍스트 관리자와는 달리 호출하기 전에 먼저 리포지토리에서 업데이트(pull) 작업을 수행 해야 합니다. - -예를 들어 Hub에서 리포지토리를 이미 복제했다면 로컬 디렉터리에서 `repo`를 초기화할 수 있습니다: - -```python ->>> from huggingface_hub import Repository ->>> repo = Repository(local_dir="path/to/local/repo") -``` - -로컬 클론을 [`~Repository.git_pull`]로 업데이트한 다음 파일을 Hub로 푸시합니다: - -```py ->>> repo.git_pull() ->>> repo.push_to_hub(commit_message="Commit my-awesome-file to the Hub") -``` - -그러나 아직 파일을 푸시할 준비가 되지 않았다면 [`~Repository.git_add`] 와 [`~Repository.git_commit`]을 사용하여 파일만 추가하고 커밋할 수 있습니다: - -```py ->>> repo.git_add("path/to/file") ->>> repo.git_commit(commit_message="add my first model config file :)") -``` - -준비가 완료되면 [`~Repository.git_push`]를 사용하여 파일을 리포지토리에 푸시합니다: - -```py ->>> repo.git_push() -``` diff --git a/docs/source/ko/installation.md b/docs/source/ko/installation.md index 720346b1a1..d9cd8a46dd 100644 --- a/docs/source/ko/installation.md +++ b/docs/source/ko/installation.md @@ -6,7 +6,7 @@ rendered properly in your Markdown viewer. 시작하기 전에 적절한 패키지를 설치하여 환경을 설정해야 합니다. -`huggingface_hub`는 **Python 3.8+**에서 테스트되었습니다. +`huggingface_hub`는 **Python 3.9+**에서 테스트되었습니다. ## pip로 설치하기 [[install-with-pip]] @@ -46,17 +46,13 @@ pip install --upgrade huggingface_hub 선택적 의존성은 `pip`을 통해 설치할 수 있습니다: ```bash -# TensorFlow 관련 기능에 대한 의존성을 설치합니다. -# /!\ 경고: `pip install tensorflow`와 동일하지 않습니다. -pip install 'huggingface_hub[tensorflow]' - # PyTorch와 CLI와 관련된 기능에 대한 의존성을 모두 설치합니다. pip install 'huggingface_hub[cli,torch]' ``` 다음은 `huggingface_hub`의 선택 의존성 목록입니다: - `cli`: 보다 편리한 `huggingface_hub`의 CLI 인터페이스입니다. -- `fastai`, `torch`, `tensorflow`: 프레임워크별 기능을 실행하려면 필요합니다. +- `fastai`, `torch`: 프레임워크별 기능을 실행하려면 필요합니다. - `dev`: 라이브러리에 기여하고 싶다면 필요합니다. 테스트 실행을 위한 `testing`, 타입 검사기 실행을 위한 `typing`, 린터 실행을 위한 `quality`가 포함됩니다. ### 소스에서 설치 [[install-from-source]] diff --git a/docs/source/ko/package_reference/inference_client.md b/docs/source/ko/package_reference/inference_client.md index 686c9282a9..0930a75351 100644 --- a/docs/source/ko/package_reference/inference_client.md +++ b/docs/source/ko/package_reference/inference_client.md @@ -35,13 +35,3 @@ pip install --upgrade huggingface_hub[inference] ## 반환 유형[[return-types]] 대부분의 작업에 대해, 반환 값은 내장된 유형(string, list, image...)을 갖습니다. 보다 복잡한 유형을 위한 목록은 다음과 같습니다. - - -## 추론 API[[huggingface_hub.InferenceApi]] - -[`InferenceAPI`]는 추론 API를 호출하는 레거시 방식입니다. 이 인터페이스는 더 간단하며 각 작업의 입력 매개변수와 출력 형식을 알아야 합니다. 또한 추론 엔드포인트나 AWS SageMaker와 같은 다른 서비스에 연결할 수 있는 기능이 없습니다. [`InferenceAPI`]는 곧 폐지될 예정이므로 가능한 경우 [`InferenceClient`]를 사용하는 것을 권장합니다. 스크립트에서 [`InferenceAPI`]를 [`InferenceClient`]로 전환하는 방법에 대해 알아보려면 [이 가이드](../guides/inference#legacy-inferenceapi-client)를 참조하세요. - -[[autodoc]] InferenceApi - - __init__ - - __call__ - - all diff --git a/docs/source/ko/package_reference/mixins.md b/docs/source/ko/package_reference/mixins.md index 4a4a84ad9e..a5f8162eff 100644 --- a/docs/source/ko/package_reference/mixins.md +++ b/docs/source/ko/package_reference/mixins.md @@ -20,16 +20,6 @@ ML 프레임워크를 Hub와 통합하는 방법은 [통합 가이드](../guides [[autodoc]] PyTorchModelHubMixin -### Keras[[huggingface_hub.KerasModelHubMixin]] - -[[autodoc]] KerasModelHubMixin - -[[autodoc]] from_pretrained_keras - -[[autodoc]] push_to_hub_keras - -[[autodoc]] save_pretrained_keras - ### Fastai[[huggingface_hub.from_pretrained_fastai]] [[autodoc]] from_pretrained_fastai diff --git a/docs/source/ko/package_reference/repository.md b/docs/source/ko/package_reference/repository.md deleted file mode 100644 index fc70e3e203..0000000000 --- a/docs/source/ko/package_reference/repository.md +++ /dev/null @@ -1,49 +0,0 @@ - - -# 로컬 및 온라인 리포지토리 관리[[managing-local-and-online-repositories]] - -`Repository` 클래스는 `git` 및 `git-lfs` 명령을 감싸는 도우미 클래스로, 매우 큰 리포지토리를 관리하는 데 적합한 툴링을 제공합니다. - -`git` 작업이 포함되거나 리포지토리에서의 협업이 중점이 될 때 권장되는 도구입니다. - -## 리포지토리 클래스[[the-repository-class]] - -[[autodoc]] Repository - - __init__ - - current_branch - - all - -## 도우미 메소드[[helper-methods]] - -[[autodoc]] huggingface_hub.repository.is_git_repo - -[[autodoc]] huggingface_hub.repository.is_local_clone - -[[autodoc]] huggingface_hub.repository.is_tracked_with_lfs - -[[autodoc]] huggingface_hub.repository.is_git_ignored - -[[autodoc]] huggingface_hub.repository.files_to_be_staged - -[[autodoc]] huggingface_hub.repository.is_tracked_upstream - -[[autodoc]] huggingface_hub.repository.commits_to_push - -## 후속 비동기 명령[[following-asynchronous-commands]] - -`Repository` 유틸리티는 비동기적으로 시작할 수 있는 여러 메소드를 제공합니다. -- `git_push` -- `git_pull` -- `push_to_hub` -- `commit` 컨텍스트 관리자 - -이러한 비동기 메소드를 관리하는 유틸리티는 아래를 참조하세요. - -[[autodoc]] Repository - - commands_failed - - commands_in_progress - - wait_for_commands - -[[autodoc]] huggingface_hub.repository.CommandInProgress diff --git a/docs/source/ko/package_reference/serialization.md b/docs/source/ko/package_reference/serialization.md index 25901237bf..9dd7a6ce7b 100644 --- a/docs/source/ko/package_reference/serialization.md +++ b/docs/source/ko/package_reference/serialization.md @@ -8,11 +8,7 @@ rendered properly in your Markdown viewer. ## 상태 사전을 샤드로 나누기[[split-state-dict-into-shards]] -현재 이 모듈은 상태 딕셔너리(예: 레이어 이름과 관련 텐서 간의 매핑)를 받아 여러 샤드로 나누고, 이 과정에서 적절한 인덱스를 생성하는 단일 헬퍼를 포함하고 있습니다. 이 헬퍼는 `torch`, `tensorflow`, `numpy` 텐서에 사용 가능하며, 다른 ML 프레임워크로 쉽게 확장될 수 있도록 설계되었습니다. - -### split_tf_state_dict_into_shards[[huggingface_hub.split_tf_state_dict_into_shards]] - -[[autodoc]] huggingface_hub.split_tf_state_dict_into_shards +현재 이 모듈은 상태 딕셔너리(예: 레이어 이름과 관련 텐서 간의 매핑)를 받아 여러 샤드로 나누고, 이 과정에서 적절한 인덱스를 생성하는 단일 헬퍼를 포함하고 있습니다. 이 헬퍼는 `torch` 텐서에 사용 가능하며, 다른 ML 프레임워크로 쉽게 확장될 수 있도록 설계되었습니다. ### split_torch_state_dict_into_shards[[huggingface_hub.split_torch_state_dict_into_shards]] diff --git a/docs/source/ko/package_reference/utilities.md b/docs/source/ko/package_reference/utilities.md index a76e9d474b..4390a90718 100644 --- a/docs/source/ko/package_reference/utilities.md +++ b/docs/source/ko/package_reference/utilities.md @@ -84,16 +84,6 @@ True [[autodoc]] huggingface_hub.utils.enable_progress_bars -## HTTP 백엔드 구성[[huggingface_hub.configure_http_backend]] - -일부 환경에서는 HTTP 호출이 이루어지는 방식을 구성할 수 있습니다. 예를 들어, 프록시를 사용하는 경우가 그렇습니다. `huggingface_hub`는 [`configure_http_backend`]를 사용하여 전역적으로 이를 구성할 수 있게 합니다. 그러면 Hub로의 모든 요청이 사용자가 설정한 설정을 사용합니다. 내부적으로 `huggingface_hub`는 `requests.Session`을 사용하므로 사용 가능한 매개변수에 대해 자세히 알아보려면 [requests 문서](https://requests.readthedocs.io/en/latest/user/advanced)를 참조하는 것이 좋습니다. - -`requests.Session`이 스레드 안전을 보장하지 않기 때문에 `huggingface_hub`는 스레드당 하나의 세션 인스턴스를 생성합니다. 세션을 사용하면 HTTP 호출 사이에 연결을 유지하고 최종적으로 시간을 절약할 수 있습니다. `huggingface_hub`를 서드 파티 라이브러리에 통합하고 사용자 지정 호출을 Hub로 만들려는 경우, [`get_session`]을 사용하여 사용자가 구성한 세션을 가져옵니다 (즉, 모든 `requests.get(...)` 호출을 `get_session().get(...)`으로 대체합니다). - -[[autodoc]] configure_http_backend - -[[autodoc]] get_session - ## HTTP 오류 다루기[[handle-http-errors]] @@ -125,39 +115,43 @@ except HfHubHTTPError as e: 여기에는 `huggingface_hub`에서 발생하는 HTTP 오류 목록이 있습니다. -#### HfHubHTTPError[[huggingface_hub.utils.HfHubHTTPError]] +#### HfHubHTTPError[[huggingface_hub.errors.HfHubHTTPError]] `HfHubHTTPError`는 HF Hub HTTP 오류에 대한 부모 클래스입니다. 이 클래스는 서버 응답을 구문 분석하고 오류 메시지를 형식화하여 사용자에게 가능한 많은 정보를 제공합니다. -[[autodoc]] huggingface_hub.utils.HfHubHTTPError +[[autodoc]] huggingface_hub.errors.HfHubHTTPError + +#### RepositoryNotFoundError[[huggingface_hub.errors.RepositoryNotFoundError]] -#### RepositoryNotFoundError[[huggingface_hub.utils.RepositoryNotFoundError]] +[[autodoc]] huggingface_hub.errors.RepositoryNotFoundError -[[autodoc]] huggingface_hub.utils.RepositoryNotFoundError +#### GatedRepoError[[huggingface_hub.errors.GatedRepoError]] -#### GatedRepoError[[huggingface_hub.utils.GatedRepoError]] +[[autodoc]] huggingface_hub.errors.GatedRepoError -[[autodoc]] huggingface_hub.utils.GatedRepoError +#### RevisionNotFoundError[[huggingface_hub.errors.RevisionNotFoundError]] -#### RevisionNotFoundError[[huggingface_hub.utils.RevisionNotFoundError]] +[[autodoc]] huggingface_hub.errors.RevisionNotFoundError -[[autodoc]] huggingface_hub.utils.RevisionNotFoundError +#### BadRequestError[[huggingface_hub.errors.BadRequestError]] -#### EntryNotFoundError[[huggingface_hub.utils.EntryNotFoundError]] +[[autodoc]] huggingface_hub.errors.BadRequestError -[[autodoc]] huggingface_hub.utils.EntryNotFoundError +#### EntryNotFoundError[[huggingface_hub.errors.EntryNotFoundError]] -#### BadRequestError[[huggingface_hub.utils.BadRequestError]] +[[autodoc]] huggingface_hub.errors.EntryNotFoundError -[[autodoc]] huggingface_hub.utils.BadRequestError +#### RemoteEntryNotFoundError[[huggingface_hub.errors.RemoteEntryNotFoundError]] -#### LocalEntryNotFoundError[[huggingface_hub.utils.LocalEntryNotFoundError]] +[[autodoc]] huggingface_hub.errors.RemoteEntryNotFoundError -[[autodoc]] huggingface_hub.utils.LocalEntryNotFoundError +#### LocalEntryNotFoundError[[huggingface_hub.errors.LocalEntryNotFoundError]] -#### OfflineModeIsEnabledd[[huggingface_hub.utils.OfflineModeIsEnabled]] +[[autodoc]] huggingface_hub.errors.LocalEntryNotFoundError -[[autodoc]] huggingface_hub.utils.OfflineModeIsEnabled +#### OfflineModeIsEnabledd[[huggingface_hub.errors.OfflineModeIsEnabled]] + +[[autodoc]] huggingface_hub.errors.OfflineModeIsEnabled ## 원격 측정[[huggingface_hub.utils.send_telemetry]] @@ -195,20 +189,6 @@ huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in rep >>> my_cool_method(repo_id="other..repo..id") huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'. - ->>> @validate_hf_hub_args -... def my_cool_auth_method(token: str): -... print(token) - ->>> my_cool_auth_method(token="a token") -"a token" - ->>> my_cool_auth_method(use_auth_token="a use_auth_token") -"a use_auth_token" - ->>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token") -UserWarning: Both `token` and `use_auth_token` are passed (...). `use_auth_token` value will be ignored. -"a token" ``` #### validate_hf_hub_args[[huggingface_hub.utils.validate_hf_hub_args]] @@ -226,9 +206,3 @@ UserWarning: Both `token` and `use_auth_token` are passed (...). `use_auth_token #### repo_id[[huggingface_hub.utils.validate_repo_id]] [[autodoc]] utils.validate_repo_id - -#### smoothly_deprecate_use_auth_token[[huggingface_hub.utils.smoothly_deprecate_use_auth_token]] - -정확히 검증기는 아니지만, 잘 실행됩니다. - -[[autodoc]] utils.smoothly_deprecate_use_auth_token diff --git a/docs/source/tm/installation.md b/docs/source/tm/installation.md index f16ac74667..479b2c3e4c 100644 --- a/docs/source/tm/installation.md +++ b/docs/source/tm/installation.md @@ -2,7 +2,7 @@ நீங்கள் தொடங்குவதற்கு முன், தகுந்த தொகுப்புகளை நிறுவுவதன் மூலம் உங்கள் சூழலை அமைக்க வேண்டும். -`huggingface_hub` **Python 3.8+** மின்பொருள்களில் சோதிக்கப்பட்டுள்ளது. +`huggingface_hub` **Python 3.9+** மின்பொருள்களில் சோதிக்கப்பட்டுள்ளது. ### பிப் மூலம் நிறுவு @@ -43,17 +43,13 @@ pip install --upgrade huggingface_hub நீங்கள் விருப்பத் தேவைப்படும் சார்புகளை `pip` மூலம் நிறுவலாம்: ```bash -# டென்சர்‌ஃபிளோவுக்கான குறிப்பிட்ட அம்சங்களுக்கு சார்ந்த பொறுப்பு நிறுவவும் -# /!\ எச்சரிக்கை: இது `pip install tensorflow` க்கு சமமாகக் கருதப்படாது -pip install 'huggingface_hub[tensorflow]' - # டார்ச்-குறிப்பிட்ட மற்றும் CLI-குறிப்பிட்ட அம்சங்களுக்கு தேவையான பொறுப்புகளை நிறுவவும். pip install 'huggingface_hub[cli,torch]' ``` `huggingface_hub`-இல் உள்ள விருப்பத் தேவைப்படும் சார்புகளின் பட்டியல்: - `cli`: `huggingface_hub`-க்கு மிகவும் வசதியான CLI இடைமுகத்தை வழங்குகிறது. -- `fastai`, `torch`, `tensorflow`: வடிவமைப்பு குறிப்பிட்ட அம்சங்களை இயக்க தேவையான சார்புகள். +- `fastai`, `torch`: வடிவமைப்பு குறிப்பிட்ட அம்சங்களை இயக்க தேவையான சார்புகள். - `dev`: நூலகத்திற்கு பங்களிக்க தேவையான சார்புகள். இதில் சோதனை (சோதனைகளை இயக்க), வகை சோதனை (வகை சரிபார்ப்பு ஐ இயக்க) மற்றும் தரம் (லிண்டர்கள் ஐ இயக்க) உள்ளன. ### மூலத்திலிருந்து நிறுவல் diff --git a/setup.py b/setup.py index 028c67be08..9862deb896 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +import sys + from setuptools import find_packages, setup @@ -17,8 +19,9 @@ def get_version() -> str: "hf-xet>=1.1.3,<2.0.0; platform_machine=='x86_64' or platform_machine=='amd64' or platform_machine=='arm64' or platform_machine=='aarch64'", "packaging>=20.9", "pyyaml>=5.1", - "requests", + "httpx>=0.23.0, <1", "tqdm>=4.42.1", + "typer-slim", "typing-extensions>=3.7.4.3", # to be able to import TypeAlias ] @@ -26,6 +29,7 @@ def get_version() -> str: extras["cli"] = [ "InquirerPy==0.3.4", # Note: installs `prompt-toolkit` in the background + "shellingham", ] extras["inference"] = [ @@ -52,17 +56,6 @@ def get_version() -> str: "fastcore>=1.3.27", ] -extras["tensorflow"] = [ - "tensorflow", - "pydot", - "graphviz", -] - -extras["tensorflow-testing"] = [ - "tensorflow", - "keras<3.0", -] - extras["hf_xet"] = ["hf-xet>=1.1.2,<2.0.0"] extras["mcp"] = [ @@ -77,7 +70,7 @@ def get_version() -> str: + [ "jedi", "Jinja2", - "pytest>=8.1.1,<8.2.2", # at least until 8.2.3 is released with https://github.com/pytest-dev/pytest/pull/12436 + "pytest>=8.4.2", # we need https://github.com/pytest-dev/pytest/pull/12436 "pytest-cov", "pytest-env", "pytest-xdist", @@ -88,18 +81,23 @@ def get_version() -> str: "urllib3<2.0", # VCR.py broken with urllib3 2.0 (see https://urllib3.readthedocs.io/en/stable/v2-migration-guide.html) "soundfile", "Pillow", - "gradio>=4.0.0", # to test webhooks # pin to avoid issue on Python3.12 + "requests", # for gradio "numpy", # for embeddings "fastapi", # To build the documentation ] ) +if sys.version_info >= (3, 10): + # We need gradio to test webhooks server + # But gradio 5.0+ only supports python 3.10+ so we don't want to test earlier versions + extras["testing"].append("gradio>=5.0.0") + extras["testing"].append("requests") # see https://github.com/gradio-app/gradio/pull/11830 + # Typing extra dependencies list is duplicated in `.pre-commit-config.yaml` # Please make sure to update the list there when adding a new typing dependency. extras["typing"] = [ "typing-extensions>=4.8.0", "types-PyYAML", - "types-requests", "types-simplejson", "types-toml", "types-tqdm", @@ -108,8 +106,7 @@ def get_version() -> str: extras["quality"] = [ "ruff>=0.9.0", - "mypy>=1.14.1,<1.15.0; python_version=='3.8'", - "mypy==1.15.0; python_version>='3.9'", + "mypy==1.15.0", "libcst>=1.4.0", "ty", ] @@ -134,13 +131,12 @@ def get_version() -> str: extras_require=extras, entry_points={ "console_scripts": [ - "huggingface-cli=huggingface_hub.commands.huggingface_cli:main", "hf=huggingface_hub.cli.hf:main", "tiny-agents=huggingface_hub.inference._mcp.cli:app", ], "fsspec.specs": "hf=huggingface_hub.HfFileSystem", }, - python_requires=">=3.8.0", + python_requires=">=3.9.0", install_requires=install_requires, classifiers=[ "Intended Audience :: Developers", @@ -150,7 +146,6 @@ def get_version() -> str: "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/src/huggingface_hub/README.md b/src/huggingface_hub/README.md index cd5c1e2beb..b0e5cd65d9 100644 --- a/src/huggingface_hub/README.md +++ b/src/huggingface_hub/README.md @@ -112,242 +112,3 @@ With the `HfApi` class there are methods to query models, datasets, and Spaces b - `space_info()` These lightly wrap around the API Endpoints. Documentation for valid parameters and descriptions can be found [here](https://huggingface.co/docs/hub/endpoints). - - -### Advanced programmatic repository management - -The `Repository` class helps manage both offline Git repositories and Hugging -Face Hub repositories. Using the `Repository` class requires `git` and `git-lfs` -to be installed. - -Instantiate a `Repository` object by calling it with a path to a local Git -clone/repository: - -```python ->>> from huggingface_hub import Repository ->>> repo = Repository("//") -``` - -The `Repository` takes a `clone_from` string as parameter. This can stay as -`None` for offline management, but can also be set to any URL pointing to a Git -repo to clone that repository in the specified directory: - -```python ->>> repo = Repository("huggingface-hub", clone_from="https://github.com/huggingface/huggingface_hub") -``` - -The `clone_from` method can also take any Hugging Face model ID as input, and -will clone that repository: - -```python ->>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60") -``` - -If the repository you're cloning is one of yours or one of your organisation's, then having the ability to commit and push to that repository is important. In order to do that, you should make sure to be logged-in using `hf auth login`,: - -```python ->>> repo = Repository("my-model", clone_from="/") -``` - -This works for models, datasets and spaces repositories; but you will need to -explicitely specify the type for the last two options: - -```python ->>> repo = Repository("my-dataset", clone_from="/", repo_type="dataset") -``` - -You can also change between branches: - -```python ->>> repo = Repository("huggingface-hub", clone_from="/", revision='branch1') ->>> repo.git_checkout("branch2") -``` - -The `clone_from` method can also take any Hugging Face model ID as input, and -will clone that repository: - -```python ->>> repo = Repository("w2v2", clone_from="facebook/wav2vec2-large-960h-lv60") -``` - -Finally, you can choose to specify the Git username and email attributed to that -clone directly by using the `git_user` and `git_email` parameters. When -committing to that repository, Git will therefore be aware of who you are and -who will be the author of the commits: - -```python ->>> repo = Repository( -... "my-dataset", -... clone_from="/", -... repo_type="dataset", -... git_user="MyName", -... git_email="me@cool.mail" -... ) -``` - -The repository can be managed through this object, through wrappers of -traditional Git methods: - -- `git_add(pattern: str, auto_lfs_track: bool)`. The `auto_lfs_track` flag - triggers auto tracking of large files (>10MB) with `git-lfs` -- `git_commit(commit_message: str)` -- `git_pull(rebase: bool)` -- `git_push()` -- `git_checkout(branch)` - -The `git_push` method has a parameter `blocking` which is `True` by default. When set to `False`, the push will -happen behind the scenes - which can be helpful if you would like your script to continue on while the push is -happening. - -LFS-tracking methods: - -- `lfs_track(pattern: Union[str, List[str]], filename: bool)`. Setting - `filename` to `True` will use the `--filename` parameter, which will consider - the pattern(s) as filenames, even if they contain special glob characters. -- `lfs_untrack()`. -- `auto_track_large_files()`: automatically tracks files that are larger than - 10MB. Make sure to call this after adding files to the index. - -On top of these unitary methods lie some useful additional methods: - -- `push_to_hub(commit_message)`: consecutively does `git_add`, `git_commit` and - `git_push`. -- `commit(commit_message: str, track_large_files: bool)`: this is a context - manager utility that handles committing to a repository. This automatically - tracks large files (>10Mb) with `git-lfs`. The `track_large_files` argument can - be set to `False` if you wish to ignore that behavior. - -These two methods also have support for the `blocking` parameter. - -Examples using the `commit` context manager: -```python ->>> with Repository("text-files", clone_from="/text-files").commit("My first file :)"): -... with open("file.txt", "w+") as f: -... f.write(json.dumps({"hey": 8})) -``` - -```python ->>> import torch ->>> model = torch.nn.Transformer() ->>> with Repository("torch-model", clone_from="/torch-model").commit("My cool model :)"): -... torch.save(model.state_dict(), "model.pt") - ``` - -### Non-blocking behavior - -The pushing methods have access to a `blocking` boolean parameter to indicate whether the push should happen -asynchronously. - -In order to see if the push has finished or its status code (to spot a failure), one should use the `command_queue` -property on the `Repository` object. - -For example: - -```python -from huggingface_hub import Repository - -repo = Repository("", clone_from="/") - -with repo.commit("Commit message", blocking=False): - # Save data - -last_command = repo.command_queue[-1] - -# Status of the push command -last_command.status -# Will return the status code -# -> -1 will indicate the push is still ongoing -# -> 0 will indicate the push has completed successfully -# -> non-zero code indicates the error code if there was an error - -# if there was an error, the stderr may be inspected -last_command.stderr - -# Whether the command finished or if it is still ongoing -last_command.is_done - -# Whether the command errored-out. -last_command.failed -``` - -When using `blocking=False`, the commands will be tracked and your script will exit only when all pushes are done, even -if other errors happen in your script (a failed push counts as done). - - -### Need to upload very large (>5GB) files? - -To upload large files (>5GB 🔥) from git command-line, you need to install the custom transfer agent -for git-lfs, bundled in this package. - -To install, just run: - -```bash -$ hf lfs-enable-largefiles . -``` - -This should be executed once for each model repo that contains a model file ->5GB. If you just try to push a file bigger than 5GB without running that -command, you will get an error with a message reminding you to run it. - -Finally, there's a `hf lfs-multipart-upload` command but that one -is internal (called by lfs directly) and is not meant to be called by the user. - -
- -## Using the Inference API wrapper - -`huggingface_hub` comes with a wrapper client to make calls to the Inference -API! You can find some examples below, but we encourage you to visit the -Inference API -[documentation](https://api-inference.huggingface.co/docs/python/html/detailed_parameters.html) -to review the specific parameters for the different tasks. - -When you instantiate the wrapper to the Inference API, you specify the model -repository id. The pipeline (`text-classification`, `text-to-speech`, etc) is -automatically extracted from the -[repository](https://huggingface.co/docs/hub/main#how-is-a-models-type-of-inference-api-and-widget-determined), -but you can also override it as shown below. - - -### Examples - -Here is a basic example of calling the Inference API for a `fill-mask` task -using the `bert-base-uncased` model. The `fill-mask` task only expects a string -(or list of strings) as input. - -```python -from huggingface_hub.inference_api import InferenceApi -inference = InferenceApi("bert-base-uncased", token=API_TOKEN) -inference(inputs="The goal of life is [MASK].") ->> [{'sequence': 'the goal of life is life.', 'score': 0.10933292657136917, 'token': 2166, 'token_str': 'life'}] -``` - -This is an example of a task (`question-answering`) which requires a dictionary -as input thas has the `question` and `context` keys. - -```python -inference = InferenceApi("deepset/roberta-base-squad2", token=API_TOKEN) -inputs = {"question":"What's my name?", "context":"My name is Clara and I live in Berkeley."} -inference(inputs) ->> {'score': 0.9326569437980652, 'start': 11, 'end': 16, 'answer': 'Clara'} -``` - -Some tasks might also require additional params in the request. Here is an -example using a `zero-shot-classification` model. - -```python -inference = InferenceApi("typeform/distilbert-base-uncased-mnli", token=API_TOKEN) -inputs = "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!" -params = {"candidate_labels":["refund", "legal", "faq"]} -inference(inputs, params) ->> {'sequence': 'Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!', 'labels': ['refund', 'faq', 'legal'], 'scores': [0.9378499388694763, 0.04914155602455139, 0.013008488342165947]} -``` - -Finally, there are some models that might support multiple tasks. For example, -`sentence-transformers` models can do `sentence-similarity` and -`feature-extraction`. You can override the configured task when initializing the -API. - -```python -inference = InferenceApi("bert-base-uncased", task="feature-extraction", token=API_TOKEN) -``` diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 2f1c10a873..2dde730333 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -46,7 +46,7 @@ from typing import TYPE_CHECKING -__version__ = "0.36.0.dev0" +__version__ = "1.0.0.rc4" # Alphabetical order of definitions is ensured in tests # WARNING: any comment added in this dictionary definition will be lost when @@ -138,6 +138,7 @@ "push_to_hub_fastai", ], "file_download": [ + "DryRunFileInfo", "HfFileMetadata", "_CACHED_NO_EXIST", "get_hf_file_metadata", @@ -217,7 +218,6 @@ "get_safetensors_metadata", "get_space_runtime", "get_space_variables", - "get_token_permission", "get_user_overview", "get_webhook", "grant_access", @@ -278,7 +278,6 @@ "update_collection_metadata", "update_inference_endpoint", "update_repo_settings", - "update_repo_visibility", "update_webhook", "upload_file", "upload_folder", @@ -471,15 +470,6 @@ "inference._mcp.mcp_client": [ "MCPClient", ], - "inference_api": [ - "InferenceApi", - ], - "keras_mixin": [ - "KerasModelHubMixin", - "from_pretrained_keras", - "push_to_hub_keras", - "save_pretrained_keras", - ], "repocard": [ "DatasetCard", "ModelCard", @@ -497,12 +487,8 @@ "ModelCardData", "SpaceCardData", ], - "repository": [ - "Repository", - ], "serialization": [ "StateDictSplit", - "get_tf_storage_size", "get_torch_storage_id", "get_torch_storage_size", "load_state_dict_from_file", @@ -510,7 +496,6 @@ "save_torch_model", "save_torch_state_dict", "split_state_dict_into_shards_factory", - "split_tf_state_dict_into_shards", "split_torch_state_dict_into_shards", ], "serialization._dduf": [ @@ -520,6 +505,8 @@ "read_dduf_file", ], "utils": [ + "ASYNC_CLIENT_FACTORY_T", + "CLIENT_FACTORY_T", "CacheNotFound", "CachedFileInfo", "CachedRepoInfo", @@ -527,14 +514,17 @@ "CorruptedCacheException", "DeleteCacheStrategy", "HFCacheInfo", - "HfFolder", "cached_assets_path", - "configure_http_backend", + "close_session", "dump_environment_info", + "get_async_session", "get_session", "get_token", + "hf_raise_for_status", "logging", "scan_cache_dir", + "set_async_client_factory", + "set_client_factory", ], } @@ -550,6 +540,7 @@ # ``` __all__ = [ + "ASYNC_CLIENT_FACTORY_T", "Agent", "AsyncInferenceClient", "AudioClassificationInput", @@ -564,6 +555,7 @@ "AutomaticSpeechRecognitionOutput", "AutomaticSpeechRecognitionOutputChunk", "AutomaticSpeechRecognitionParameters", + "CLIENT_FACTORY_T", "CONFIG_NAME", "CacheNotFound", "CachedFileInfo", @@ -632,6 +624,7 @@ "DocumentQuestionAnsweringInputData", "DocumentQuestionAnsweringOutputElement", "DocumentQuestionAnsweringParameters", + "DryRunFileInfo", "EvalResult", "FLAX_WEIGHTS_NAME", "FeatureExtractionInput", @@ -652,7 +645,6 @@ "HfFileSystemFile", "HfFileSystemResolvedPath", "HfFileSystemStreamFile", - "HfFolder", "ImageClassificationInput", "ImageClassificationOutputElement", "ImageClassificationOutputTransform", @@ -674,7 +666,6 @@ "ImageToVideoOutput", "ImageToVideoParameters", "ImageToVideoTargetSize", - "InferenceApi", "InferenceClient", "InferenceEndpoint", "InferenceEndpointError", @@ -686,7 +677,6 @@ "JobOwner", "JobStage", "JobStatus", - "KerasModelHubMixin", "MCPClient", "ModelCard", "ModelCardData", @@ -711,7 +701,6 @@ "REPO_TYPE_SPACE", "RepoCard", "RepoUrl", - "Repository", "SentenceSimilarityInput", "SentenceSimilarityInputData", "SpaceCard", @@ -824,8 +813,8 @@ "cancel_access_request", "cancel_job", "change_discussion_status", + "close_session", "comment_discussion", - "configure_http_backend", "create_branch", "create_collection", "create_commit", @@ -862,7 +851,7 @@ "fetch_job_logs", "file_exists", "from_pretrained_fastai", - "from_pretrained_keras", + "get_async_session", "get_collection", "get_dataset_tags", "get_discussion_details", @@ -876,9 +865,7 @@ "get_session", "get_space_runtime", "get_space_variables", - "get_tf_storage_size", "get_token", - "get_token_permission", "get_torch_storage_id", "get_torch_storage_size", "get_user_overview", @@ -886,6 +873,7 @@ "grant_access", "hf_hub_download", "hf_hub_url", + "hf_raise_for_status", "inspect_job", "inspect_scheduled_job", "interpreter_login", @@ -932,7 +920,6 @@ "permanently_delete_lfs_files", "preupload_lfs_files", "push_to_hub_fastai", - "push_to_hub_keras", "read_dduf_file", "reject_access_request", "rename_discussion", @@ -948,16 +935,16 @@ "run_as_future", "run_job", "run_uv_job", - "save_pretrained_keras", "save_torch_model", "save_torch_state_dict", "scale_to_zero_inference_endpoint", "scan_cache_dir", + "set_async_client_factory", + "set_client_factory", "set_space_sleep_time", "snapshot_download", "space_info", "split_state_dict_into_shards_factory", - "split_tf_state_dict_into_shards", "split_torch_state_dict_into_shards", "super_squash_history", "suspend_scheduled_job", @@ -967,7 +954,6 @@ "update_collection_metadata", "update_inference_endpoint", "update_repo_settings", - "update_repo_visibility", "update_webhook", "upload_file", "upload_folder", @@ -1159,6 +1145,7 @@ def __dir__(): ) from .file_download import ( _CACHED_NO_EXIST, # noqa: F401 + DryRunFileInfo, # noqa: F401 HfFileMetadata, # noqa: F401 get_hf_file_metadata, # noqa: F401 hf_hub_download, # noqa: F401 @@ -1237,7 +1224,6 @@ def __dir__(): get_safetensors_metadata, # noqa: F401 get_space_runtime, # noqa: F401 get_space_variables, # noqa: F401 - get_token_permission, # noqa: F401 get_user_overview, # noqa: F401 get_webhook, # noqa: F401 grant_access, # noqa: F401 @@ -1298,7 +1284,6 @@ def __dir__(): update_collection_metadata, # noqa: F401 update_inference_endpoint, # noqa: F401 update_repo_settings, # noqa: F401 - update_repo_visibility, # noqa: F401 update_webhook, # noqa: F401 upload_file, # noqa: F401 upload_folder, # noqa: F401 @@ -1485,13 +1470,6 @@ def __dir__(): ) from .inference._mcp.agent import Agent # noqa: F401 from .inference._mcp.mcp_client import MCPClient # noqa: F401 - from .inference_api import InferenceApi # noqa: F401 - from .keras_mixin import ( - KerasModelHubMixin, # noqa: F401 - from_pretrained_keras, # noqa: F401 - push_to_hub_keras, # noqa: F401 - save_pretrained_keras, # noqa: F401 - ) from .repocard import ( DatasetCard, # noqa: F401 ModelCard, # noqa: F401 @@ -1509,10 +1487,8 @@ def __dir__(): ModelCardData, # noqa: F401 SpaceCardData, # noqa: F401 ) - from .repository import Repository # noqa: F401 from .serialization import ( StateDictSplit, # noqa: F401 - get_tf_storage_size, # noqa: F401 get_torch_storage_id, # noqa: F401 get_torch_storage_size, # noqa: F401 load_state_dict_from_file, # noqa: F401 @@ -1520,7 +1496,6 @@ def __dir__(): save_torch_model, # noqa: F401 save_torch_state_dict, # noqa: F401 split_state_dict_into_shards_factory, # noqa: F401 - split_tf_state_dict_into_shards, # noqa: F401 split_torch_state_dict_into_shards, # noqa: F401 ) from .serialization._dduf import ( @@ -1530,6 +1505,8 @@ def __dir__(): read_dduf_file, # noqa: F401 ) from .utils import ( + ASYNC_CLIENT_FACTORY_T, # noqa: F401 + CLIENT_FACTORY_T, # noqa: F401 CachedFileInfo, # noqa: F401 CachedRepoInfo, # noqa: F401 CachedRevisionInfo, # noqa: F401 @@ -1537,12 +1514,15 @@ def __dir__(): CorruptedCacheException, # noqa: F401 DeleteCacheStrategy, # noqa: F401 HFCacheInfo, # noqa: F401 - HfFolder, # noqa: F401 cached_assets_path, # noqa: F401 - configure_http_backend, # noqa: F401 + close_session, # noqa: F401 dump_environment_info, # noqa: F401 + get_async_session, # noqa: F401 get_session, # noqa: F401 get_token, # noqa: F401 + hf_raise_for_status, # noqa: F401 logging, # noqa: F401 scan_cache_dir, # noqa: F401 + set_async_client_factory, # noqa: F401 + set_client_factory, # noqa: F401 ) diff --git a/src/huggingface_hub/_commit_api.py b/src/huggingface_hub/_commit_api.py index 7ed64b0e5e..ecd7e0a2b5 100644 --- a/src/huggingface_hub/_commit_api.py +++ b/src/huggingface_hub/_commit_api.py @@ -11,7 +11,7 @@ from dataclasses import dataclass, field from itertools import groupby from pathlib import Path, PurePosixPath -from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Iterator, Literal, Optional, Union from tqdm.contrib.concurrent import thread_map @@ -236,7 +236,7 @@ def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]: config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s] >>> with operation.as_file(with_tqdm=True) as file: - ... requests.put(..., data=file) + ... httpx.put(..., data=file) config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s] ``` """ @@ -307,7 +307,7 @@ def _validate_path_in_repo(path_in_repo: str) -> str: CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete] -def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None: +def _warn_on_overwriting_operations(operations: list[CommitOperation]) -> None: """ Warn user when a list of operations is expected to overwrite itself in a single commit. @@ -322,7 +322,7 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None: delete before upload) but can happen if a user deletes an entire folder and then add new files to it. """ - nb_additions_per_path: Dict[str, int] = defaultdict(int) + nb_additions_per_path: dict[str, int] = defaultdict(int) for operation in operations: path_in_repo = operation.path_in_repo if isinstance(operation, CommitOperationAdd): @@ -356,10 +356,10 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None: @validate_hf_hub_args def _upload_files( *, - additions: List[CommitOperationAdd], + additions: list[CommitOperationAdd], repo_type: str, repo_id: str, - headers: Dict[str, str], + headers: dict[str, str], endpoint: Optional[str] = None, num_threads: int = 5, revision: Optional[str] = None, @@ -368,14 +368,14 @@ def _upload_files( """ Negotiates per-file transfer (LFS vs Xet) and uploads in batches. """ - xet_additions: List[CommitOperationAdd] = [] - lfs_actions: List[Dict] = [] - lfs_oid2addop: Dict[str, CommitOperationAdd] = {} + xet_additions: list[CommitOperationAdd] = [] + lfs_actions: list[dict[str, Any]] = [] + lfs_oid2addop: dict[str, CommitOperationAdd] = {} for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES): chunk_list = [op for op in chunk] - transfers: List[str] = ["basic", "multipart"] + transfers: list[str] = ["basic", "multipart"] has_buffered_io_data = any(isinstance(op.path_or_fileobj, io.BufferedIOBase) for op in chunk_list) if is_xet_available(): if not has_buffered_io_data: @@ -438,9 +438,9 @@ def _upload_files( @validate_hf_hub_args def _upload_lfs_files( *, - actions: List[Dict], - oid2addop: Dict[str, CommitOperationAdd], - headers: Dict[str, str], + actions: list[dict[str, Any]], + oid2addop: dict[str, CommitOperationAdd], + headers: dict[str, str], endpoint: Optional[str] = None, num_threads: int = 5, ): @@ -451,11 +451,11 @@ def _upload_lfs_files( - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md Args: - actions (`List[Dict]`): + actions (`list[dict[str, Any]]`): LFS batch actions returned by the server. - oid2addop (`Dict[str, CommitOperationAdd]`): + oid2addop (`dict[str, CommitOperationAdd]`): A dictionary mapping the OID of the file to the corresponding `CommitOperationAdd` object. - headers (`Dict[str, str]`): + headers (`dict[str, str]`): Headers to use for the request, including authorization headers and user agent. endpoint (`str`, *optional*): The endpoint to use for the request. Defaults to `constants.ENDPOINT`. @@ -470,7 +470,7 @@ def _upload_lfs_files( repo_id (`str`): A namespace (user or an organization) and a repo name separated by a `/`. - headers (`Dict[str, str]`): + headers (`dict[str, str]`): Headers to use for the request, including authorization headers and user agent. num_threads (`int`, *optional*): The number of concurrent threads to use when uploading. Defaults to 5. @@ -482,7 +482,7 @@ def _upload_lfs_files( If an upload failed for any reason [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) If the server returns malformed responses - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + [`HfHubHTTPError`] If the LFS batch endpoint returned an HTTP error. """ # Filter out files already present upstream @@ -526,10 +526,10 @@ def _wrapped_lfs_upload(batch_action) -> None: @validate_hf_hub_args def _upload_xet_files( *, - additions: List[CommitOperationAdd], + additions: list[CommitOperationAdd], repo_type: str, repo_id: str, - headers: Dict[str, str], + headers: dict[str, str], endpoint: Optional[str] = None, revision: Optional[str] = None, create_pr: Optional[bool] = None, @@ -539,14 +539,14 @@ def _upload_xet_files( This chunks the files and deduplicates the chunks before uploading them to xetcas storage. Args: - additions (`List` of `CommitOperationAdd`): + additions (`` of `CommitOperationAdd`): The files to be uploaded. repo_type (`str`): Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`. repo_id (`str`): A namespace (user or an organization) and a repo name separated by a `/`. - headers (`Dict[str, str]`): + headers (`dict[str, str]`): Headers to use for the request, including authorization headers and user agent. endpoint: (`str`, *optional*): The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`. @@ -560,7 +560,7 @@ def _upload_xet_files( If an upload failed for any reason. [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) If the server returns malformed responses or if the user is unauthorized to upload to xet storage. - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) + [`HfHubHTTPError`] If the LFS batch endpoint returned an HTTP error. **How it works:** @@ -615,7 +615,7 @@ def _upload_xet_files( xet_endpoint = xet_connection_info.endpoint access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch) - def token_refresher() -> Tuple[str, int]: + def token_refresher() -> tuple[str, int]: new_xet_connection = fetch_xet_connection_info_from_repo_info( token_type=XetTokenType.WRITE, repo_id=repo_id, @@ -688,7 +688,7 @@ def _fetch_upload_modes( additions: Iterable[CommitOperationAdd], repo_type: str, repo_id: str, - headers: Dict[str, str], + headers: dict[str, str], revision: str, endpoint: Optional[str] = None, create_pr: bool = False, @@ -707,7 +707,7 @@ def _fetch_upload_modes( repo_id (`str`): A namespace (user or an organization) and a repo name separated by a `/`. - headers (`Dict[str, str]`): + headers (`dict[str, str]`): Headers to use for the request, including authorization headers and user agent. revision (`str`): The git revision to upload the files to. Can be any valid git revision. @@ -725,12 +725,12 @@ def _fetch_upload_modes( endpoint = endpoint if endpoint is not None else constants.ENDPOINT # Fetch upload mode (LFS or regular) chunk by chunk. - upload_modes: Dict[str, UploadMode] = {} - should_ignore_info: Dict[str, bool] = {} - oid_info: Dict[str, Optional[str]] = {} + upload_modes: dict[str, UploadMode] = {} + should_ignore_info: dict[str, bool] = {} + oid_info: dict[str, Optional[str]] = {} for chunk in chunk_iterable(additions, 256): - payload: Dict = { + payload: dict = { "files": [ { "path": op.path_in_repo, @@ -773,10 +773,10 @@ def _fetch_files_to_copy( copies: Iterable[CommitOperationCopy], repo_type: str, repo_id: str, - headers: Dict[str, str], + headers: dict[str, str], revision: str, endpoint: Optional[str] = None, -) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]: +) -> dict[tuple[str, Optional[str]], Union["RepoFile", bytes]]: """ Fetch information about the files to copy. @@ -792,12 +792,12 @@ def _fetch_files_to_copy( repo_id (`str`): A namespace (user or an organization) and a repo name separated by a `/`. - headers (`Dict[str, str]`): + headers (`dict[str, str]`): Headers to use for the request, including authorization headers and user agent. revision (`str`): The git revision to upload the files to. Can be any valid git revision. - Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]` + Returns: `dict[tuple[str, Optional[str]], Union[RepoFile, bytes]]]` Key is the file path and revision of the file to copy. Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files). @@ -810,9 +810,9 @@ def _fetch_files_to_copy( from .hf_api import HfApi, RepoFolder hf_api = HfApi(endpoint=endpoint, headers=headers) - files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {} + files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]] = {} # Store (path, revision) -> oid mapping - oid_info: Dict[Tuple[str, Optional[str]], Optional[str]] = {} + oid_info: dict[tuple[str, Optional[str]], Optional[str]] = {} # 1. Fetch OIDs for destination paths in batches. dest_paths = [op.path_in_repo for op in copies] for offset in range(0, len(dest_paths), FETCH_LFS_BATCH_SIZE): @@ -872,11 +872,11 @@ def _fetch_files_to_copy( def _prepare_commit_payload( operations: Iterable[CommitOperation], - files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]], + files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]], commit_message: str, commit_description: Optional[str] = None, parent_commit: Optional[str] = None, -) -> Iterable[Dict[str, Any]]: +) -> Iterable[dict[str, Any]]: """ Builds the payload to POST to the `/commit` API of the Hub. diff --git a/src/huggingface_hub/_commit_scheduler.py b/src/huggingface_hub/_commit_scheduler.py index 1bc8db6a8a..497c9a0be5 100644 --- a/src/huggingface_hub/_commit_scheduler.py +++ b/src/huggingface_hub/_commit_scheduler.py @@ -7,7 +7,7 @@ from io import SEEK_END, SEEK_SET, BytesIO from pathlib import Path from threading import Lock, Thread -from typing import Dict, List, Optional, Union +from typing import Optional, Union from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi from .utils import filter_repo_objects @@ -53,9 +53,9 @@ class CommitScheduler: Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. token (`str`, *optional*): The token to use to commit to the repo. Defaults to the token saved on the machine. - allow_patterns (`List[str]` or `str`, *optional*): + allow_patterns (`list[str]` or `str`, *optional*): If provided, only files matching at least one pattern are uploaded. - ignore_patterns (`List[str]` or `str`, *optional*): + ignore_patterns (`list[str]` or `str`, *optional*): If provided, files matching any of the patterns are not uploaded. squash_history (`bool`, *optional*): Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is @@ -108,8 +108,8 @@ def __init__( revision: Optional[str] = None, private: Optional[bool] = None, token: Optional[str] = None, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, + allow_patterns: Optional[Union[list[str], str]] = None, + ignore_patterns: Optional[Union[list[str], str]] = None, squash_history: bool = False, hf_api: Optional["HfApi"] = None, ) -> None: @@ -138,7 +138,7 @@ def __init__( self.token = token # Keep track of already uploaded files - self.last_uploaded: Dict[Path, float] = {} # key is local path, value is timestamp + self.last_uploaded: dict[Path, float] = {} # key is local path, value is timestamp # Scheduler if not every > 0: @@ -229,7 +229,7 @@ def push_to_hub(self) -> Optional[CommitInfo]: prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else "" # Filter with pattern + filter out unchanged files + retrieve current file size - files_to_upload: List[_FileToUpload] = [] + files_to_upload: list[_FileToUpload] = [] for relpath in filter_repo_objects( relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns ): @@ -312,10 +312,13 @@ def __len__(self) -> int: return self._size_limit def __getattribute__(self, name: str): - if name.startswith("_") or name in ("read", "tell", "seek"): # only 3 public methods supported + if name.startswith("_") or name in ("read", "tell", "seek", "fileno"): # only 4 public methods supported return super().__getattribute__(name) raise NotImplementedError(f"PartialFileIO does not support '{name}'.") + def fileno(self): + raise AttributeError("PartialFileIO does not have a fileno.") + def tell(self) -> int: """Return the current file position.""" return self._file.tell() diff --git a/src/huggingface_hub/_inference_endpoints.py b/src/huggingface_hub/_inference_endpoints.py index 37f772bfbe..4422cac7c3 100644 --- a/src/huggingface_hub/_inference_endpoints.py +++ b/src/huggingface_hub/_inference_endpoints.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from datetime import datetime from enum import Enum -from typing import TYPE_CHECKING, Dict, Optional, Union +from typing import TYPE_CHECKING, Optional, Union from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError @@ -62,7 +62,7 @@ class InferenceEndpoint: The timestamp of the last update of the Inference Endpoint. type ([`InferenceEndpointType`]): The type of the Inference Endpoint (public, protected, private). - raw (`Dict`): + raw (`dict`): The raw dictionary data returned from the API. token (`str` or `bool`, *optional*): Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the @@ -112,7 +112,7 @@ class InferenceEndpoint: type: InferenceEndpointType = field(repr=False, init=False) # Raw dict from the API - raw: Dict = field(repr=False) + raw: dict = field(repr=False) # Internal fields _token: Union[str, bool, None] = field(repr=False, compare=False) @@ -120,7 +120,7 @@ class InferenceEndpoint: @classmethod def from_raw( - cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None + cls, raw: dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None ) -> "InferenceEndpoint": """Initialize object from raw dictionary.""" if api is None: @@ -260,8 +260,8 @@ def update( framework: Optional[str] = None, revision: Optional[str] = None, task: Optional[str] = None, - custom_image: Optional[Dict] = None, - secrets: Optional[Dict[str, str]] = None, + custom_image: Optional[dict] = None, + secrets: Optional[dict[str, str]] = None, ) -> "InferenceEndpoint": """Update the Inference Endpoint. @@ -293,10 +293,10 @@ def update( The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). task (`str`, *optional*): The task on which to deploy the model (e.g. `"text-classification"`). - custom_image (`Dict`, *optional*): + custom_image (`dict`, *optional*): A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). - secrets (`Dict[str, str]`, *optional*): + secrets (`dict[str, str]`, *optional*): Secret values to inject in the container environment. Returns: [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data. diff --git a/src/huggingface_hub/_jobs_api.py b/src/huggingface_hub/_jobs_api.py index 623fd9dc9d..c85324ce1c 100644 --- a/src/huggingface_hub/_jobs_api.py +++ b/src/huggingface_hub/_jobs_api.py @@ -15,7 +15,7 @@ from dataclasses import dataclass from datetime import datetime from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, Optional, Union from huggingface_hub import constants from huggingface_hub._space_api import SpaceHardware @@ -71,13 +71,13 @@ class JobInfo: space_id (`str` or `None`): The Docker image from Hugging Face Spaces used for the Job. Can be None if docker_image is present instead. - command (`List[str]` or `None`): + command (`list[str]` or `None`): Command of the Job, e.g. `["python", "-c", "print('hello world')"]` - arguments (`List[str]` or `None`): + arguments (`list[str]` or `None`): Arguments passed to the command - environment (`Dict[str]` or `None`): + environment (`dict[str]` or `None`): Environment variables of the Job as a dictionary. - secrets (`Dict[str]` or `None`): + secrets (`dict[str]` or `None`): Secret environment variables of the Job (encrypted). flavor (`str` or `None`): Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. @@ -111,10 +111,10 @@ class JobInfo: created_at: Optional[datetime] docker_image: Optional[str] space_id: Optional[str] - command: Optional[List[str]] - arguments: Optional[List[str]] - environment: Optional[Dict[str, Any]] - secrets: Optional[Dict[str, Any]] + command: Optional[list[str]] + arguments: Optional[list[str]] + environment: Optional[dict[str, Any]] + secrets: Optional[dict[str, Any]] flavor: Optional[SpaceHardware] status: JobStatus owner: JobOwner @@ -148,13 +148,13 @@ def __init__(self, **kwargs) -> None: class JobSpec: docker_image: Optional[str] space_id: Optional[str] - command: Optional[List[str]] - arguments: Optional[List[str]] - environment: Optional[Dict[str, Any]] - secrets: Optional[Dict[str, Any]] + command: Optional[list[str]] + arguments: Optional[list[str]] + environment: Optional[dict[str, Any]] + secrets: Optional[dict[str, Any]] flavor: Optional[SpaceHardware] timeout: Optional[int] - tags: Optional[List[str]] + tags: Optional[list[str]] arch: Optional[str] def __init__(self, **kwargs) -> None: @@ -202,7 +202,7 @@ class ScheduledJobInfo: Scheduled Job ID. created_at (`datetime` or `None`): When the scheduled Job was created. - tags (`List[str]` or `None`): + tags (`list[str]` or `None`): The tags of the scheduled Job. schedule (`str` or `None`): One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a @@ -263,14 +263,14 @@ def __init__(self, **kwargs) -> None: def _create_job_spec( *, image: str, - command: List[str], - env: Optional[Dict[str, Any]], - secrets: Optional[Dict[str, Any]], + command: list[str], + env: Optional[dict[str, Any]], + secrets: Optional[dict[str, Any]], flavor: Optional[SpaceHardware], timeout: Optional[Union[int, float, str]], -) -> Dict[str, Any]: +) -> dict[str, Any]: # prepare job spec to send to HF Jobs API - job_spec: Dict[str, Any] = { + job_spec: dict[str, Any] = { "command": command, "arguments": [], "environment": env or {}, diff --git a/src/huggingface_hub/_login.py b/src/huggingface_hub/_login.py index 8f721b6834..7700ab5b23 100644 --- a/src/huggingface_hub/_login.py +++ b/src/huggingface_hub/_login.py @@ -20,8 +20,8 @@ from typing import Optional from . import constants -from .commands._cli_utils import ANSI from .utils import ( + ANSI, capture_output, get_token, is_google_colab, @@ -41,7 +41,6 @@ _save_token, get_stored_tokens, ) -from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args logger = logging.get_logger(__name__) @@ -55,18 +54,11 @@ """ -@_deprecate_arguments( - version="1.0", - deprecated_args="write_permission", - custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", -) -@_deprecate_positional_args(version="1.0") def login( token: Optional[str] = None, *, add_to_git_credential: bool = False, - new_session: bool = True, - write_permission: bool = False, + skip_if_logged_in: bool = False, ) -> None: """Login the machine to access the Hub. @@ -96,10 +88,8 @@ def login( is configured, a warning will be displayed to the user. If `token` is `None`, the value of `add_to_git_credential` is ignored and will be prompted again to the end user. - new_session (`bool`, defaults to `True`): - If `True`, will request a token even if one is already saved on the machine. - write_permission (`bool`): - Ignored and deprecated argument. + skip_if_logged_in (`bool`, defaults to `False`): + If `True`, do not prompt for token if user is already logged in. Raises: [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) If an organization token is passed. Only personal account tokens are valid @@ -119,9 +109,9 @@ def login( ) _login(token, add_to_git_credential=add_to_git_credential) elif is_notebook(): - notebook_login(new_session=new_session) + notebook_login(skip_if_logged_in=skip_if_logged_in) else: - interpreter_login(new_session=new_session) + interpreter_login(skip_if_logged_in=skip_if_logged_in) def logout(token_name: Optional[str] = None) -> None: @@ -236,13 +226,7 @@ def auth_list() -> None: ### -@_deprecate_arguments( - version="1.0", - deprecated_args="write_permission", - custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", -) -@_deprecate_positional_args(version="1.0") -def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None: +def interpreter_login(*, skip_if_logged_in: bool = False) -> None: """ Displays a prompt to log in to the HF website and store the token. @@ -253,16 +237,14 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals For more details, see [`login`]. Args: - new_session (`bool`, defaults to `True`): - If `True`, will request a token even if one is already saved on the machine. - write_permission (`bool`): - Ignored and deprecated argument. + skip_if_logged_in (`bool`, defaults to `False`): + If `True`, do not prompt for token if user is already logged in. """ - if not new_session and get_token() is not None: + if not skip_if_logged_in and get_token() is not None: logger.info("User is already logged in.") return - from .commands.delete_cache import _ask_for_confirmation_no_tui + from .cli.cache import _ask_for_confirmation_no_tui print(_HF_LOGO_ASCII) if get_token() is not None: @@ -308,13 +290,7 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals notebooks. """ -@_deprecate_arguments( - version="1.0", - deprecated_args="write_permission", - custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.", -) -@_deprecate_positional_args(version="1.0") -def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None: +def notebook_login(*, skip_if_logged_in: bool = False) -> None: """ Displays a widget to log in to the HF website and store the token. @@ -325,10 +301,8 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False) For more details, see [`login`]. Args: - new_session (`bool`, defaults to `True`): - If `True`, will request a token even if one is already saved on the machine. - write_permission (`bool`): - Ignored and deprecated argument. + skip_if_logged_in (`bool`, defaults to `False`): + If `True`, do not prompt for token if user is already logged in. """ try: import ipywidgets.widgets as widgets # type: ignore @@ -338,7 +312,7 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False) "The `notebook_login` function can only be used in a notebook (Jupyter or" " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`." ) - if not new_session and get_token() is not None: + if not skip_if_logged_in and get_token() is not None: logger.info("User is already logged in.") return diff --git a/src/huggingface_hub/_oauth.py b/src/huggingface_hub/_oauth.py index 9f8eb60796..7bdfa6a058 100644 --- a/src/huggingface_hub/_oauth.py +++ b/src/huggingface_hub/_oauth.py @@ -6,7 +6,7 @@ import urllib.parse import warnings from dataclasses import dataclass -from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union +from typing import TYPE_CHECKING, Literal, Optional, Union from . import constants from .hf_api import whoami @@ -39,7 +39,7 @@ class OAuthOrgInfo: Whether the org has a payment method set up. Hugging Face field. role_in_org (`Optional[str]`, *optional*): The user's role in the org. Hugging Face field. - security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*): + security_restrictions (`Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*): Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field. """ @@ -50,7 +50,7 @@ class OAuthOrgInfo: is_enterprise: bool can_pay: Optional[bool] = None role_in_org: Optional[str] = None - security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None + security_restrictions: Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]] = None @dataclass @@ -79,7 +79,7 @@ class OAuthUserInfo: Whether the user is a pro user. Hugging Face field. can_pay (`Optional[bool]`, *optional*): Whether the user has a payment method set up. Hugging Face field. - orgs (`Optional[List[OrgInfo]]`, *optional*): + orgs (`Optional[list[OrgInfo]]`, *optional*): List of organizations the user is part of. Hugging Face field. """ @@ -93,7 +93,7 @@ class OAuthUserInfo: website: Optional[str] is_pro: bool can_pay: Optional[bool] - orgs: Optional[List[OAuthOrgInfo]] + orgs: Optional[list[OAuthOrgInfo]] @dataclass @@ -306,7 +306,7 @@ async def oauth_redirect_callback(request: fastapi.Request) -> RedirectResponse: target_url = request.query_params.get("_target_url") # Build redirect URI with the same query params as before and bump nb_redirects count - query_params: Dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1} + query_params: dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1} if target_url: query_params["_target_url"] = target_url @@ -406,7 +406,7 @@ def _get_redirect_target(request: "fastapi.Request", default_target: str = "/") return request.query_params.get("_target_url", default_target) -def _get_mocked_oauth_info() -> Dict: +def _get_mocked_oauth_info() -> dict: token = get_token() if token is None: raise ValueError( @@ -449,7 +449,7 @@ def _get_mocked_oauth_info() -> Dict: } -def _get_oauth_uris(route_prefix: str = "/") -> Tuple[str, str, str]: +def _get_oauth_uris(route_prefix: str = "/") -> tuple[str, str, str]: route_prefix = route_prefix.strip("/") if route_prefix: route_prefix = f"/{route_prefix}" diff --git a/src/huggingface_hub/_snapshot_download.py b/src/huggingface_hub/_snapshot_download.py index 0db8a29f7e..9b5d5cfbff 100644 --- a/src/huggingface_hub/_snapshot_download.py +++ b/src/huggingface_hub/_snapshot_download.py @@ -1,20 +1,21 @@ import os from pathlib import Path -from typing import Dict, Iterable, List, Literal, Optional, Type, Union +from typing import Iterable, List, Literal, Optional, Union, overload -import requests +import httpx from tqdm.auto import tqdm as base_tqdm from tqdm.contrib.concurrent import thread_map from . import constants from .errors import ( + DryRunError, GatedRepoError, HfHubHTTPError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError, ) -from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name +from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args from .utils import tqdm as hf_tqdm @@ -25,6 +26,81 @@ VERY_LARGE_REPO_THRESHOLD = 50000 # After this limit, we don't consider `repo_info.siblings` to be reliable enough +@overload +def snapshot_download( + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Optional[Union[dict, str]] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + force_download: bool = False, + token: Optional[Union[bool, str]] = None, + local_files_only: bool = False, + allow_patterns: Optional[Union[list[str], str]] = None, + ignore_patterns: Optional[Union[list[str], str]] = None, + max_workers: int = 8, + tqdm_class: Optional[type[base_tqdm]] = None, + headers: Optional[dict[str, str]] = None, + endpoint: Optional[str] = None, + dry_run: Literal[False] = False, +) -> str: ... + + +@overload +def snapshot_download( + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Optional[Union[dict, str]] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + force_download: bool = False, + token: Optional[Union[bool, str]] = None, + local_files_only: bool = False, + allow_patterns: Optional[Union[list[str], str]] = None, + ignore_patterns: Optional[Union[list[str], str]] = None, + max_workers: int = 8, + tqdm_class: Optional[type[base_tqdm]] = None, + headers: Optional[dict[str, str]] = None, + endpoint: Optional[str] = None, + dry_run: Literal[True] = True, +) -> list[DryRunFileInfo]: ... + + +@overload +def snapshot_download( + repo_id: str, + *, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + cache_dir: Union[str, Path, None] = None, + local_dir: Union[str, Path, None] = None, + library_name: Optional[str] = None, + library_version: Optional[str] = None, + user_agent: Optional[Union[dict, str]] = None, + etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, + force_download: bool = False, + token: Optional[Union[bool, str]] = None, + local_files_only: bool = False, + allow_patterns: Optional[Union[list[str], str]] = None, + ignore_patterns: Optional[Union[list[str], str]] = None, + max_workers: int = 8, + tqdm_class: Optional[type[base_tqdm]] = None, + headers: Optional[dict[str, str]] = None, + endpoint: Optional[str] = None, + dry_run: bool = False, +) -> Union[str, list[DryRunFileInfo]]: ... + + @validate_hf_hub_args def snapshot_download( repo_id: str, @@ -35,22 +111,19 @@ def snapshot_download( local_dir: Union[str, Path, None] = None, library_name: Optional[str] = None, library_version: Optional[str] = None, - user_agent: Optional[Union[Dict, str]] = None, - proxies: Optional[Dict] = None, + user_agent: Optional[Union[dict, str]] = None, etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, force_download: bool = False, token: Optional[Union[bool, str]] = None, local_files_only: bool = False, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, + allow_patterns: Optional[Union[list[str], str]] = None, + ignore_patterns: Optional[Union[list[str], str]] = None, max_workers: int = 8, - tqdm_class: Optional[Type[base_tqdm]] = None, - headers: Optional[Dict[str, str]] = None, + tqdm_class: Optional[type[base_tqdm]] = None, + headers: Optional[dict[str, str]] = None, endpoint: Optional[str] = None, - # Deprecated args - local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", - resume_download: Optional[bool] = None, -) -> str: + dry_run: bool = False, +) -> Union[str, list[DryRunFileInfo]]: """Download repo files. Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from @@ -85,12 +158,9 @@ def snapshot_download( The version of the library. user_agent (`str`, `dict`, *optional*): The user-agent info in the form of a dictionary or a string. - proxies (`dict`, *optional*): - Dictionary mapping protocol to the URL of the proxy passed to - `requests.request`. etag_timeout (`float`, *optional*, defaults to `10`): When fetching ETag, how many seconds to wait for the server to send - data before giving up which is passed to `requests.request`. + data before giving up which is passed to `httpx.request`. force_download (`bool`, *optional*, defaults to `False`): Whether the file should be downloaded even if it already exists in the local cache. token (`str`, `bool`, *optional*): @@ -103,9 +173,9 @@ def snapshot_download( local_files_only (`bool`, *optional*, defaults to `False`): If `True`, avoid downloading the file and return the path to the local cached file if it exists. - allow_patterns (`List[str]` or `str`, *optional*): + allow_patterns (`list[str]` or `str`, *optional*): If provided, only files matching at least one pattern are downloaded. - ignore_patterns (`List[str]` or `str`, *optional*): + ignore_patterns (`list[str]` or `str`, *optional*): If provided, files matching any of the patterns are not downloaded. max_workers (`int`, *optional*): Number of concurrent threads to download files (1 thread = 1 file download). @@ -116,9 +186,14 @@ def snapshot_download( Note that the `tqdm_class` is not passed to each individual download. Defaults to the custom HF progress bar that can be disabled by setting `HF_HUB_DISABLE_PROGRESS_BARS` environment variable. + dry_run (`bool`, *optional*, defaults to `False`): + If `True`, perform a dry run without actually downloading the files. Returns a list of + [`DryRunFileInfo`] objects containing information about what would be downloaded. Returns: - `str`: folder path of the repo snapshot. + `str` or list of [`DryRunFileInfo`]: + - If `dry_run=False`: Local snapshot path. + - If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information. Raises: [`~utils.RepositoryNotFoundError`] @@ -163,14 +238,10 @@ def snapshot_download( try: # if we have internet connection we want to list files to download repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision) - except (requests.exceptions.SSLError, requests.exceptions.ProxyError): - # Actually raise for those subclasses of ConnectionError + except httpx.ProxyError: + # Actually raise on proxy error raise - except ( - requests.exceptions.ConnectionError, - requests.exceptions.Timeout, - OfflineModeIsEnabled, - ) as error: + except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error: # Internet connection is down # => will try to use local files only api_call_error = error @@ -178,7 +249,7 @@ def snapshot_download( except RevisionNotFoundError: # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted) raise - except requests.HTTPError as error: + except HfHubHTTPError as error: # Multiple reasons for an http error: # - Repository is private and invalid/missing token sent # - Repository is gated and invalid/missing token sent @@ -198,6 +269,11 @@ def snapshot_download( # - f the specified revision is a branch or tag, look inside "refs". # => if local_dir is not None, we will return the path to the local folder if it exists. if repo_info is None: + if dry_run: + raise DryRunError( + "Dry run cannot be performed as the repository cannot be accessed. Please check your internet connection or authentication token." + ) from api_call_error + # Try to get which commit hash corresponds to the specified revision commit_hash = None if REGEX_COMMIT_HASH.match(revision): @@ -284,6 +360,8 @@ def snapshot_download( tqdm_desc = f"Fetching {len(filtered_repo_files)} files" else: tqdm_desc = "Fetching ... files" + if dry_run: + tqdm_desc = "[dry-run] " + tqdm_desc commit_hash = repo_info.sha snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash) @@ -299,31 +377,33 @@ def snapshot_download( except OSError as e: logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.") + results: List[Union[str, DryRunFileInfo]] = [] + # we pass the commit_hash to hf_hub_download # so no network call happens if we already # have the file locally. - def _inner_hf_hub_download(repo_file: str): - return hf_hub_download( - repo_id, - filename=repo_file, - repo_type=repo_type, - revision=commit_hash, - endpoint=endpoint, - cache_dir=cache_dir, - local_dir=local_dir, - local_dir_use_symlinks=local_dir_use_symlinks, - library_name=library_name, - library_version=library_version, - user_agent=user_agent, - proxies=proxies, - etag_timeout=etag_timeout, - resume_download=resume_download, - force_download=force_download, - token=token, - headers=headers, + def _inner_hf_hub_download(repo_file: str) -> None: + results.append( + hf_hub_download( # type: ignore[no-matching-overload] # ty not happy, don't know why :/ + repo_id, + filename=repo_file, + repo_type=repo_type, + revision=commit_hash, + endpoint=endpoint, + cache_dir=cache_dir, + local_dir=local_dir, + library_name=library_name, + library_version=library_version, + user_agent=user_agent, + etag_timeout=etag_timeout, + force_download=force_download, + token=token, + headers=headers, + dry_run=dry_run, + ) ) - if constants.HF_HUB_ENABLE_HF_TRANSFER: + if constants.HF_HUB_ENABLE_HF_TRANSFER and not dry_run: # when using hf_transfer we don't want extra parallelism # from the one hf_transfer provides for file in filtered_repo_files: @@ -338,6 +418,10 @@ def _inner_hf_hub_download(repo_file: str): tqdm_class=tqdm_class or hf_tqdm, ) + if dry_run: + assert all(isinstance(r, DryRunFileInfo) for r in results) + return results # type: ignore + if local_dir is not None: return str(os.path.realpath(local_dir)) return snapshot_folder diff --git a/src/huggingface_hub/_space_api.py b/src/huggingface_hub/_space_api.py index 05fccfbc1e..6dd7976329 100644 --- a/src/huggingface_hub/_space_api.py +++ b/src/huggingface_hub/_space_api.py @@ -15,7 +15,7 @@ from dataclasses import dataclass from datetime import datetime from enum import Enum -from typing import Dict, Optional +from typing import Optional from huggingface_hub.utils import parse_datetime @@ -128,9 +128,9 @@ class SpaceRuntime: requested_hardware: Optional[SpaceHardware] sleep_time: Optional[int] storage: Optional[SpaceStorage] - raw: Dict + raw: dict - def __init__(self, data: Dict) -> None: + def __init__(self, data: dict) -> None: self.stage = data["stage"] self.hardware = data.get("hardware", {}).get("current") self.requested_hardware = data.get("hardware", {}).get("requested") @@ -160,7 +160,7 @@ class SpaceVariable: description: Optional[str] updated_at: Optional[datetime] - def __init__(self, key: str, values: Dict) -> None: + def __init__(self, key: str, values: dict) -> None: self.key = key self.value = values["value"] self.description = values.get("description") diff --git a/src/huggingface_hub/_tensorboard_logger.py b/src/huggingface_hub/_tensorboard_logger.py index 4d9581d8ee..2783a25001 100644 --- a/src/huggingface_hub/_tensorboard_logger.py +++ b/src/huggingface_hub/_tensorboard_logger.py @@ -14,7 +14,7 @@ """Contains a logger to push training logs to the Hub, using Tensorboard.""" from pathlib import Path -from typing import List, Optional, Union +from typing import Optional, Union from ._commit_scheduler import CommitScheduler from .errors import EntryNotFoundError @@ -74,10 +74,10 @@ class HFSummaryWriter(_RuntimeSummaryWriter): Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. path_in_repo (`str`, *optional*): The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/". - repo_allow_patterns (`List[str]` or `str`, *optional*): + repo_allow_patterns (`list[str]` or `str`, *optional*): A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details. - repo_ignore_patterns (`List[str]` or `str`, *optional*): + repo_ignore_patterns (`list[str]` or `str`, *optional*): A list of patterns to exclude in the upload. Check out the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details. token (`str`, *optional*): @@ -134,8 +134,8 @@ def __init__( repo_revision: Optional[str] = None, repo_private: Optional[bool] = None, path_in_repo: Optional[str] = "tensorboard", - repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*", - repo_ignore_patterns: Optional[Union[List[str], str]] = None, + repo_allow_patterns: Optional[Union[list[str], str]] = "*.tfevents.*", + repo_ignore_patterns: Optional[Union[list[str], str]] = None, token: Optional[str] = None, **kwargs, ): diff --git a/src/huggingface_hub/_upload_large_folder.py b/src/huggingface_hub/_upload_large_folder.py index 1ccbc07d39..083b62f544 100644 --- a/src/huggingface_hub/_upload_large_folder.py +++ b/src/huggingface_hub/_upload_large_folder.py @@ -24,15 +24,14 @@ from datetime import datetime from pathlib import Path from threading import Lock -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Optional, Union from urllib.parse import quote from . import constants from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata from .constants import DEFAULT_REVISION, REPO_TYPES -from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm -from .utils._cache_manager import _format_size +from .utils import DEFAULT_IGNORE_PATTERNS, _format_size, filter_repo_objects, tqdm from .utils._runtime import is_xet_available from .utils.sha import sha_fileobj @@ -44,7 +43,7 @@ WAITING_TIME_IF_NO_TASKS = 10 # seconds MAX_NB_FILES_FETCH_UPLOAD_MODE = 100 -COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000] +COMMIT_SIZE_SCALE: list[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000] UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload @@ -56,7 +55,7 @@ RECOMMENDED_FILE_SIZE_GB = 20 # Recommended maximum for individual file size -def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None: +def _validate_upload_limits(paths_list: list[LocalUploadFilePaths]) -> None: """ Validate upload against repository limits and warn about potential issues. @@ -85,7 +84,7 @@ def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None: # Track immediate children (files and subdirs) for each folder from collections import defaultdict - entries_per_folder: Dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()}) + entries_per_folder: dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()}) for paths in paths_list: path = Path(paths.path_in_repo) @@ -160,8 +159,8 @@ def upload_large_folder_internal( repo_type: str, # Repo type is required! revision: Optional[str] = None, private: Optional[bool] = None, - allow_patterns: Optional[Union[List[str], str]] = None, - ignore_patterns: Optional[Union[List[str], str]] = None, + allow_patterns: Optional[Union[list[str], str]] = None, + ignore_patterns: Optional[Union[list[str], str]] = None, num_workers: Optional[int] = None, print_report: bool = True, print_report_every: int = 60, @@ -284,13 +283,13 @@ class WorkerJob(enum.Enum): WAIT = enum.auto() # if no tasks are available but we don't want to exit -JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata] +JOB_ITEM_T = tuple[LocalUploadFilePaths, LocalUploadFileMetadata] class LargeUploadStatus: """Contains information, queues and tasks for a large upload process.""" - def __init__(self, items: List[JOB_ITEM_T], upload_batch_size: int = 1): + def __init__(self, items: list[JOB_ITEM_T], upload_batch_size: int = 1): self.items = items self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue() self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue() @@ -423,7 +422,7 @@ def _worker_job( Read `upload_large_folder` docstring for more information on how tasks are prioritized. """ while True: - next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None + next_job: Optional[tuple[WorkerJob, list[JOB_ITEM_T]]] = None # Determine next task next_job = _determine_next_job(status) @@ -516,7 +515,7 @@ def _worker_job( status.nb_workers_waiting -= 1 -def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]: +def _determine_next_job(status: LargeUploadStatus) -> Optional[tuple[WorkerJob, list[JOB_ITEM_T]]]: with status.lock: # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file) if ( @@ -639,7 +638,7 @@ def _compute_sha256(item: JOB_ITEM_T) -> None: metadata.save(paths) -def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: +def _get_upload_mode(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: """Get upload mode for each file and update metadata. Also receive info if the file should be ignored. @@ -661,7 +660,7 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t metadata.save(paths) -def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: +def _preupload_lfs(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: """Preupload LFS files and update metadata.""" additions = [_build_hacky_operation(item) for item in items] api.preupload_lfs_files( @@ -676,7 +675,7 @@ def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_typ metadata.save(paths) -def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: +def _commit(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None: """Commit files to the repo.""" additions = [_build_hacky_operation(item) for item in items] api.create_commit( @@ -721,11 +720,11 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd: #################### -def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]: +def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> list[JOB_ITEM_T]: return [queue.get()] -def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]: +def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> list[JOB_ITEM_T]: return [queue.get() for _ in range(min(queue.qsize(), n))] diff --git a/src/huggingface_hub/_webhooks_payload.py b/src/huggingface_hub/_webhooks_payload.py index 288f4b08b9..90f12425cb 100644 --- a/src/huggingface_hub/_webhooks_payload.py +++ b/src/huggingface_hub/_webhooks_payload.py @@ -14,7 +14,7 @@ # limitations under the License. """Contains data structures to parse the webhooks payload.""" -from typing import List, Literal, Optional +from typing import Literal, Optional from .utils import is_pydantic_available @@ -116,7 +116,7 @@ class WebhookPayloadRepo(ObjectId): name: str private: bool subdomain: Optional[str] = None - tags: Optional[List[str]] = None + tags: Optional[list[str]] = None type: Literal["dataset", "model", "space"] url: WebhookPayloadUrl @@ -134,4 +134,4 @@ class WebhookPayload(BaseModel): comment: Optional[WebhookPayloadComment] = None webhook: WebhookPayloadWebhook movedTo: Optional[WebhookPayloadMovedTo] = None - updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None + updatedRefs: Optional[list[WebhookPayloadUpdatedRef]] = None diff --git a/src/huggingface_hub/_webhooks_server.py b/src/huggingface_hub/_webhooks_server.py index b1a89c37cd..6c761fd977 100644 --- a/src/huggingface_hub/_webhooks_server.py +++ b/src/huggingface_hub/_webhooks_server.py @@ -18,7 +18,7 @@ import inspect import os from functools import wraps -from typing import TYPE_CHECKING, Any, Callable, Dict, Optional +from typing import TYPE_CHECKING, Any, Callable, Optional from .utils import experimental, is_fastapi_available, is_gradio_available @@ -109,7 +109,7 @@ def __init__( self._ui = ui self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET") - self.registered_webhooks: Dict[str, Callable] = {} + self.registered_webhooks: dict[str, Callable] = {} _warn_on_empty_secret(self.webhook_secret) def add_webhook(self, path: Optional[str] = None) -> Callable: diff --git a/src/huggingface_hub/cli/__init__.py b/src/huggingface_hub/cli/__init__.py index 7a1a8d793b..8568c82be1 100644 --- a/src/huggingface_hub/cli/__init__.py +++ b/src/huggingface_hub/cli/__init__.py @@ -11,17 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from abc import ABC, abstractmethod -from argparse import _SubParsersAction - - -class BaseHuggingfaceCLICommand(ABC): - @staticmethod - @abstractmethod - def register_subcommand(parser: _SubParsersAction): - raise NotImplementedError() - - @abstractmethod - def run(self): - raise NotImplementedError() diff --git a/src/huggingface_hub/cli/_cli_utils.py b/src/huggingface_hub/cli/_cli_utils.py index bd56ad6896..de34f1973c 100644 --- a/src/huggingface_hub/cli/_cli_utils.py +++ b/src/huggingface_hub/cli/_cli_utils.py @@ -11,59 +11,163 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Contains a utility for good-looking prints.""" +"""Contains CLI utilities (styling, helpers).""" +import importlib.metadata import os -from typing import List, Union +import time +from enum import Enum +from pathlib import Path +from typing import TYPE_CHECKING, Annotated, Optional +import click +import typer -class ANSI: +from huggingface_hub import __version__, constants +from huggingface_hub.utils import ANSI, get_session, hf_raise_for_status, installation_method, logging + + +logger = logging.get_logger() + + +if TYPE_CHECKING: + from huggingface_hub.hf_api import HfApi + + +def get_hf_api(token: Optional[str] = None) -> "HfApi": + # Import here to avoid circular import + from huggingface_hub.hf_api import HfApi + + return HfApi(token=token, library_name="hf", library_version=__version__) + + +#### TYPER UTILS + + +class AlphabeticalMixedGroup(typer.core.TyperGroup): """ - Helper for en.wikipedia.org/wiki/ANSI_escape_code + Typer Group that lists commands and sub-apps mixed and alphabetically. """ - _bold = "\u001b[1m" - _gray = "\u001b[90m" - _red = "\u001b[31m" - _reset = "\u001b[0m" - _yellow = "\u001b[33m" + def list_commands(self, ctx: click.Context) -> list[str]: # type: ignore[name-defined] + # click.Group stores both commands and sub-groups in `self.commands` + return sorted(self.commands.keys()) + + +def typer_factory(help: str) -> typer.Typer: + return typer.Typer( + help=help, + add_completion=True, + no_args_is_help=True, + cls=AlphabeticalMixedGroup, + # Disable rich completely for consistent experience + rich_markup_mode=None, + rich_help_panel=None, + pretty_exceptions_enable=False, + ) + - @classmethod - def bold(cls, s: str) -> str: - return cls._format(s, cls._bold) +class RepoType(str, Enum): + model = "model" + dataset = "dataset" + space = "space" - @classmethod - def gray(cls, s: str) -> str: - return cls._format(s, cls._gray) - @classmethod - def red(cls, s: str) -> str: - return cls._format(s, cls._bold + cls._red) +RepoIdArg = Annotated[ + str, + typer.Argument( + help="The ID of the repo (e.g. `username/repo-name`).", + ), +] - @classmethod - def yellow(cls, s: str) -> str: - return cls._format(s, cls._yellow) - @classmethod - def _format(cls, s: str, code: str) -> str: - if os.environ.get("NO_COLOR"): - # See https://no-color.org/ - return s - return f"{code}{s}{cls._reset}" +RepoTypeOpt = Annotated[ + RepoType, + typer.Option( + help="The type of repository (model, dataset, or space).", + ), +] +TokenOpt = Annotated[ + Optional[str], + typer.Option( + help="A User Access Token generated from https://huggingface.co/settings/tokens.", + ), +] -def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str: +PrivateOpt = Annotated[ + bool, + typer.Option( + help="Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists.", + ), +] + +RevisionOpt = Annotated[ + Optional[str], + typer.Option( + help="Git revision id which can be a branch name, a tag, or a commit hash.", + ), +] + + +### PyPI VERSION CHECKER + + +def check_cli_update() -> None: """ - Inspired by: + Check whether a newer version of `huggingface_hub` is available on PyPI. + + If a newer version is found, notify the user and suggest updating. + If current version is a pre-release (e.g. `1.0.0.rc1`), or a dev version (e.g. `1.0.0.dev1`), no check is performed. - - stackoverflow.com/a/8356620/593036 - - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data + This function is called at the entry point of the CLI. It only performs the check once every 24 hours, and any error + during the check is caught and logged, to avoid breaking the CLI. """ - col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)] - row_format = ("{{:{}}} " * len(headers)).format(*col_widths) - lines = [] - lines.append(row_format.format(*headers)) - lines.append(row_format.format(*["-" * w for w in col_widths])) - for row in rows: - lines.append(row_format.format(*row)) - return "\n".join(lines) + try: + _check_cli_update() + except Exception: + # We don't want the CLI to fail on version checks, no matter the reason. + logger.debug("Error while checking for CLI update.", exc_info=True) + + +def _check_cli_update() -> None: + current_version = importlib.metadata.version("huggingface_hub") + + # Skip if current version is a pre-release or dev version + if any(tag in current_version for tag in ["rc", "dev"]): + return + + # Skip if already checked in the last 24 hours + if os.path.exists(constants.CHECK_FOR_UPDATE_DONE_PATH): + mtime = os.path.getmtime(constants.CHECK_FOR_UPDATE_DONE_PATH) + if (time.time() - mtime) < 24 * 3600: + return + + # Touch the file to mark that we did the check now + Path(constants.CHECK_FOR_UPDATE_DONE_PATH).touch() + + # Check latest version from PyPI + response = get_session().get("https://pypi.org/pypi/huggingface_hub/json", timeout=2) + hf_raise_for_status(response) + data = response.json() + latest_version = data["info"]["version"] + + # If latest version is different from current, notify user + if current_version != latest_version: + method = installation_method() + if method == "brew": + update_command = "brew upgrade huggingface-cli" + elif method == "hf_installer" and os.name == "nt": + update_command = 'powershell -NoProfile -Command "iwr -useb https://hf.co/cli/install.ps1 | iex"' + elif method == "hf_installer": + update_command = "curl -LsSf https://hf.co/cli/install.sh | sh -" + else: # unknown => likely pip + update_command = "pip install -U huggingface_hub" + + click.echo( + ANSI.yellow( + f"A new version of huggingface_hub ({latest_version}) is available! " + f"You are using version {current_version}.\n" + f"To update, run: {ANSI.bold(update_command)}\n", + ) + ) diff --git a/src/huggingface_hub/cli/auth.py b/src/huggingface_hub/cli/auth.py index bbf475a4f8..cb522c918c 100644 --- a/src/huggingface_hub/cli/auth.py +++ b/src/huggingface_hub/cli/auth.py @@ -30,18 +30,17 @@ hf auth whoami """ -from argparse import _SubParsersAction -from typing import List, Optional +from typing import Annotated, Optional -from requests.exceptions import HTTPError +import typer -from huggingface_hub.commands import BaseHuggingfaceCLICommand from huggingface_hub.constants import ENDPOINT -from huggingface_hub.hf_api import HfApi +from huggingface_hub.errors import HfHubHTTPError +from huggingface_hub.hf_api import whoami from .._login import auth_list, auth_switch, login, logout -from ..utils import get_stored_tokens, get_token, logging -from ._cli_utils import ANSI +from ..utils import ANSI, get_stored_tokens, get_token, logging +from ._cli_utils import TokenOpt, typer_factory logger = logging.get_logger(__name__) @@ -55,125 +54,42 @@ _inquirer_py_available = False -class AuthCommands(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction): - # Create the main 'auth' command - auth_parser = parser.add_parser("auth", help="Manage authentication (login, logout, etc.).") - auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands") - - # Show help if no subcommand is provided - auth_parser.set_defaults(func=lambda args: auth_parser.print_help()) - - # Add 'login' as a subcommand of 'auth' - login_parser = auth_subparsers.add_parser( - "login", help="Log in using a token from huggingface.co/settings/tokens" - ) - login_parser.add_argument( - "--token", - type=str, - help="Token generated from https://huggingface.co/settings/tokens", - ) - login_parser.add_argument( - "--add-to-git-credential", - action="store_true", - help="Optional: Save token to git credential helper.", - ) - login_parser.set_defaults(func=lambda args: AuthLogin(args)) - - # Add 'logout' as a subcommand of 'auth' - logout_parser = auth_subparsers.add_parser("logout", help="Log out") - logout_parser.add_argument( - "--token-name", - type=str, - help="Optional: Name of the access token to log out from.", - ) - logout_parser.set_defaults(func=lambda args: AuthLogout(args)) - - # Add 'whoami' as a subcommand of 'auth' - whoami_parser = auth_subparsers.add_parser( - "whoami", help="Find out which huggingface.co account you are logged in as." - ) - whoami_parser.set_defaults(func=lambda args: AuthWhoami(args)) - - # Existing subcommands - auth_switch_parser = auth_subparsers.add_parser("switch", help="Switch between access tokens") - auth_switch_parser.add_argument( - "--token-name", - type=str, - help="Optional: Name of the access token to switch to.", - ) - auth_switch_parser.add_argument( - "--add-to-git-credential", - action="store_true", - help="Optional: Save token to git credential helper.", - ) - auth_switch_parser.set_defaults(func=lambda args: AuthSwitch(args)) - - auth_list_parser = auth_subparsers.add_parser("list", help="List all stored access tokens") - auth_list_parser.set_defaults(func=lambda args: AuthList(args)) - - -class BaseAuthCommand: - def __init__(self, args): - self.args = args - self._api = HfApi() - - -class AuthLogin(BaseAuthCommand): - def run(self): - logging.set_verbosity_info() - login( - token=self.args.token, - add_to_git_credential=self.args.add_to_git_credential, - ) - - -class AuthLogout(BaseAuthCommand): - def run(self): - logging.set_verbosity_info() - logout(token_name=self.args.token_name) - - -class AuthSwitch(BaseAuthCommand): - def run(self): - logging.set_verbosity_info() - token_name = self.args.token_name - if token_name is None: - token_name = self._select_token_name() - - if token_name is None: - print("No token name provided. Aborting.") - exit() - auth_switch(token_name, add_to_git_credential=self.args.add_to_git_credential) - - def _select_token_name(self) -> Optional[str]: - token_names = list(get_stored_tokens().keys()) - - if not token_names: - logger.error("No stored tokens found. Please login first.") - return None +auth_cli = typer_factory(help="Manage authentication (login, logout, etc.).") + + +@auth_cli.command("login", help="Login using a token from huggingface.co/settings/tokens") +def auth_login( + token: TokenOpt = None, + add_to_git_credential: Annotated[ + bool, + typer.Option( + help="Save to git credential helper. Useful only if you plan to run git commands directly.", + ), + ] = False, +) -> None: + login(token=token, add_to_git_credential=add_to_git_credential) + + +@auth_cli.command("logout", help="Logout from a specific token") +def auth_logout( + token_name: Annotated[ + Optional[str], + typer.Option( + help="Name of token to logout", + ), + ] = None, +) -> None: + logout(token_name=token_name) + - if _inquirer_py_available: - return self._select_token_name_tui(token_names) - # if inquirer is not available, use a simpler terminal UI - print("Available stored tokens:") - for i, token_name in enumerate(token_names, 1): - print(f"{i}. {token_name}") - while True: - try: - choice = input("Enter the number of the token to switch to (or 'q' to quit): ") - if choice.lower() == "q": - return None - index = int(choice) - 1 - if 0 <= index < len(token_names): - return token_names[index] - else: - print("Invalid selection. Please try again.") - except ValueError: - print("Invalid input. Please enter a number or 'q' to quit.") - - def _select_token_name_tui(self, token_names: List[str]) -> Optional[str]: +def _select_token_name() -> Optional[str]: + token_names = list(get_stored_tokens().keys()) + + if not token_names: + logger.error("No stored tokens found. Please login first.") + return None + + if _inquirer_py_available: choices = [Choice(token_name, name=token_name) for token_name in token_names] try: return inquirer.select( @@ -184,30 +100,68 @@ def _select_token_name_tui(self, token_names: List[str]) -> Optional[str]: except KeyboardInterrupt: logger.info("Token selection cancelled.") return None - - -class AuthList(BaseAuthCommand): - def run(self): - logging.set_verbosity_info() - auth_list() - - -class AuthWhoami(BaseAuthCommand): - def run(self): - token = get_token() - if token is None: - print("Not logged in") - exit() + # if inquirer is not available, use a simpler terminal UI + print("Available stored tokens:") + for i, token_name in enumerate(token_names, 1): + print(f"{i}. {token_name}") + while True: try: - info = self._api.whoami(token) - print(ANSI.bold("user: "), info["name"]) - orgs = [org["name"] for org in info["orgs"]] - if orgs: - print(ANSI.bold("orgs: "), ",".join(orgs)) - - if ENDPOINT != "https://huggingface.co": - print(f"Authenticated through private endpoint: {ENDPOINT}") - except HTTPError as e: - print(e) - print(ANSI.red(e.response.text)) - exit(1) + choice = input("Enter the number of the token to switch to (or 'q' to quit): ") + if choice.lower() == "q": + return None + index = int(choice) - 1 + if 0 <= index < len(token_names): + return token_names[index] + else: + print("Invalid selection. Please try again.") + except ValueError: + print("Invalid input. Please enter a number or 'q' to quit.") + + +@auth_cli.command("switch", help="Switch between access tokens") +def auth_switch_cmd( + token_name: Annotated[ + Optional[str], + typer.Option( + help="Name of the token to switch to", + ), + ] = None, + add_to_git_credential: Annotated[ + bool, + typer.Option( + help="Save to git credential helper. Useful only if you plan to run git commands directly.", + ), + ] = False, +) -> None: + if token_name is None: + token_name = _select_token_name() + if token_name is None: + print("No token name provided. Aborting.") + raise typer.Exit() + auth_switch(token_name, add_to_git_credential=add_to_git_credential) + + +@auth_cli.command("list", help="List all stored access tokens") +def auth_list_cmd() -> None: + auth_list() + + +@auth_cli.command("whoami", help="Find out which huggingface.co account you are logged in as.") +def auth_whoami() -> None: + token = get_token() + if token is None: + print("Not logged in") + raise typer.Exit() + try: + info = whoami(token) + print(ANSI.bold("user: "), info["name"]) + orgs = [org["name"] for org in info["orgs"]] + if orgs: + print(ANSI.bold("orgs: "), ",".join(orgs)) + + if ENDPOINT != "https://huggingface.co": + print(f"Authenticated through private endpoint: {ENDPOINT}") + except HfHubHTTPError as e: + print(e) + print(ANSI.red(e.response.text)) + raise typer.Exit(code=1) diff --git a/src/huggingface_hub/cli/cache.py b/src/huggingface_hub/cli/cache.py index cc36ef5efd..35f7540821 100644 --- a/src/huggingface_hub/cli/cache.py +++ b/src/huggingface_hub/cli/cache.py @@ -16,14 +16,15 @@ import os import time -from argparse import Namespace, _SubParsersAction +from enum import Enum from functools import wraps from tempfile import mkstemp -from typing import Any, Callable, Iterable, List, Literal, Optional, Union +from typing import Annotated, Any, Callable, Iterable, Optional, Union -from ..utils import CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir -from . import BaseHuggingfaceCLICommand -from ._cli_utils import ANSI, tabulate +import typer + +from ..utils import ANSI, CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir, tabulate +from ._cli_utils import typer_factory # --- DELETE helpers (from delete_cache.py) --- @@ -36,10 +37,16 @@ except ImportError: _inquirer_py_available = False -SortingOption_T = Literal["alphabetical", "lastUpdated", "lastUsed", "size"] _CANCEL_DELETION_STR = "CANCEL_DELETION" +class SortingOption(str, Enum): + alphabetical = "alphabetical" + lastUpdated = "lastUpdated" + lastUsed = "lastUsed" + size = "size" + + def require_inquirer_py(fn: Callable) -> Callable: @wraps(fn) def _inner(*args, **kwargs): @@ -54,122 +61,93 @@ def _inner(*args, **kwargs): return _inner -class CacheCommand(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction): - cache_parser = parser.add_parser("cache", help="Manage local cache directory.") - cache_subparsers = cache_parser.add_subparsers(dest="cache_command", help="Cache subcommands") +cache_cli = typer_factory(help="Manage local cache directory.") - # Show help if no subcommand is provided - cache_parser.set_defaults(func=lambda args: cache_parser.print_help()) - # Scan subcommand - scan_parser = cache_subparsers.add_parser("scan", help="Scan cache directory.") - scan_parser.add_argument( - "--dir", - type=str, - default=None, - help="cache directory to scan (optional). Default to the default HuggingFace cache.", - ) - scan_parser.add_argument( +@cache_cli.command("scan", help="Scan the cache directory") +def cache_scan( + dir: Annotated[ + Optional[str], + typer.Option( + help="Cache directory to scan (defaults to Hugging Face cache).", + ), + ] = None, + verbose: Annotated[ + int, + typer.Option( "-v", "--verbose", - action="count", - default=0, - help="show a more verbose output", - ) - scan_parser.set_defaults(func=CacheCommand, cache_command="scan") - # Delete subcommand - delete_parser = cache_subparsers.add_parser("delete", help="Delete revisions from the cache directory.") - delete_parser.add_argument( - "--dir", - type=str, - default=None, - help="cache directory (optional). Default to the default HuggingFace cache.", - ) - delete_parser.add_argument( - "--disable-tui", - action="store_true", - help=( - "Disable Terminal User Interface (TUI) mode. Useful if your platform/terminal doesn't support the multiselect menu." - ), - ) - delete_parser.add_argument( - "--sort", - nargs="?", - choices=["alphabetical", "lastUpdated", "lastUsed", "size"], - help=( - "Sort repositories by the specified criteria. Options: " - "'alphabetical' (A-Z), " - "'lastUpdated' (newest first), " - "'lastUsed' (most recent first), " - "'size' (largest first)." - ), - ) - delete_parser.set_defaults(func=CacheCommand, cache_command="delete") - - def __init__(self, args: Namespace) -> None: - self.args = args - self.verbosity: int = getattr(args, "verbose", 0) - self.cache_dir: Optional[str] = getattr(args, "dir", None) - self.disable_tui: bool = getattr(args, "disable_tui", False) - self.sort_by: Optional[SortingOption_T] = getattr(args, "sort", None) - self.cache_command: Optional[str] = getattr(args, "cache_command", None) - - def run(self): - if self.cache_command == "scan": - self._run_scan() - elif self.cache_command == "delete": - self._run_delete() + count=True, + help="Increase verbosity (-v, -vv, -vvv).", + ), + ] = 0, +) -> None: + try: + t0 = time.time() + hf_cache_info = scan_cache_dir(dir) + t1 = time.time() + except CacheNotFound as exc: + print(f"Cache directory not found: {str(exc.cache_dir)}") + return + print(get_table(hf_cache_info, verbosity=verbose)) + print( + f"\nDone in {round(t1 - t0, 1)}s. Scanned {len(hf_cache_info.repos)} repo(s)" + f" for a total of {ANSI.red(hf_cache_info.size_on_disk_str)}." + ) + if len(hf_cache_info.warnings) > 0: + message = f"Got {len(hf_cache_info.warnings)} warning(s) while scanning." + if verbose >= 3: + print(ANSI.gray(message)) + for warning in hf_cache_info.warnings: + print(ANSI.gray(str(warning))) else: - print("Please specify a cache subcommand (scan or delete). Use -h for help.") - - def _run_scan(self): - try: - t0 = time.time() - hf_cache_info = scan_cache_dir(self.cache_dir) - t1 = time.time() - except CacheNotFound as exc: - cache_dir = exc.cache_dir - print(f"Cache directory not found: {cache_dir}") - return - print(get_table(hf_cache_info, verbosity=self.verbosity)) - print( - f"\nDone in {round(t1 - t0, 1)}s. Scanned {len(hf_cache_info.repos)} repo(s)" - f" for a total of {ANSI.red(hf_cache_info.size_on_disk_str)}." - ) - if len(hf_cache_info.warnings) > 0: - message = f"Got {len(hf_cache_info.warnings)} warning(s) while scanning." - if self.verbosity >= 3: - print(ANSI.gray(message)) - for warning in hf_cache_info.warnings: - print(ANSI.gray(str(warning))) - else: - print(ANSI.gray(message + " Use -vvv to print details.")) - - def _run_delete(self): - hf_cache_info = scan_cache_dir(self.cache_dir) - if self.disable_tui: - selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=self.sort_by) + print(ANSI.gray(message + " Use -vvv to print details.")) + + +@cache_cli.command("delete", help="Delete revisions from the cache directory") +def cache_delete( + dir: Annotated[ + Optional[str], + typer.Option( + help="Cache directory (defaults to Hugging Face cache).", + ), + ] = None, + disable_tui: Annotated[ + bool, + typer.Option( + help="Disable Terminal User Interface (TUI) mode. Useful if your platform/terminal doesn't support the multiselect menu.", + ), + ] = False, + sort: Annotated[ + Optional[SortingOption], + typer.Option( + help="Sort repositories by the specified criteria. Options: 'alphabetical' (A-Z), 'lastUpdated' (newest first), 'lastUsed' (most recent first), 'size' (largest first).", + ), + ] = None, +) -> None: + hf_cache_info = scan_cache_dir(dir) + sort_by = sort.value if sort is not None else None + if disable_tui: + selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=sort_by) + else: + selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=sort_by) + if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes: + confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?" + if disable_tui: + confirmed = _ask_for_confirmation_no_tui(confirm_message) else: - selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=self.sort_by) - if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes: - confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?" - if self.disable_tui: - confirmed = _ask_for_confirmation_no_tui(confirm_message) - else: - confirmed = _ask_for_confirmation_tui(confirm_message) - if confirmed: - strategy = hf_cache_info.delete_revisions(*selected_hashes) - print("Start deletion.") - strategy.execute() - print( - f"Done. Deleted {len(strategy.repos)} repo(s) and" - f" {len(strategy.snapshots)} revision(s) for a total of" - f" {strategy.expected_freed_size_str}." - ) - return - print("Deletion is cancelled. Do nothing.") + confirmed = _ask_for_confirmation_tui(confirm_message) + if confirmed: + strategy = hf_cache_info.delete_revisions(*selected_hashes) + print("Start deletion.") + strategy.execute() + print( + f"Done. Deleted {len(strategy.repos)} repo(s) and" + f" {len(strategy.snapshots)} revision(s) for a total of" + f" {strategy.expected_freed_size_str}." + ) + return + print("Deletion is cancelled. Do nothing.") def get_table(hf_cache_info: HFCacheInfo, *, verbosity: int = 0) -> str: @@ -228,7 +206,7 @@ def get_table(hf_cache_info: HFCacheInfo, *, verbosity: int = 0) -> str: ) -def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_T] = None): +def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[str] = None): if sort_by == "alphabetical": return (repo.repo_type, repo.repo_id.lower()) elif sort_by == "lastUpdated": @@ -242,9 +220,7 @@ def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_ @require_inquirer_py -def _manual_review_tui( - hf_cache_info: HFCacheInfo, preselected: List[str], sort_by: Optional[SortingOption_T] = None -) -> List[str]: +def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[str] = None) -> list[str]: choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected, sort_by=sort_by) checkbox = inquirer.checkbox( message="Select revisions to delete:", @@ -277,9 +253,9 @@ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool: def _get_tui_choices_from_scan( - repos: Iterable[CachedRepoInfo], preselected: List[str], sort_by: Optional[SortingOption_T] = None -) -> List: - choices: List[Union["Choice", "Separator"]] = [] + repos: Iterable[CachedRepoInfo], preselected: list[str], sort_by: Optional[str] = None +) -> list: + choices: list[Union["Choice", "Separator"]] = [] choices.append( Choice( _CANCEL_DELETION_STR, name="None of the following (if selected, nothing will be deleted).", enabled=False @@ -306,8 +282,8 @@ def _get_tui_choices_from_scan( def _manual_review_no_tui( - hf_cache_info: HFCacheInfo, preselected: List[str], sort_by: Optional[SortingOption_T] = None -) -> List[str]: + hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[str] = None +) -> list[str]: fd, tmp_path = mkstemp(suffix=".txt") os.close(fd) lines = [] @@ -358,14 +334,14 @@ def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool: print(f"Invalid input. Must be one of {ALL}") -def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: List[str]) -> str: +def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: list[str]) -> str: if _CANCEL_DELETION_STR in selected_hashes: return "Nothing will be deleted." strategy = hf_cache_info.delete_revisions(*selected_hashes) return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}." -def _read_manual_review_tmp_file(tmp_path: str) -> List[str]: +def _read_manual_review_tmp_file(tmp_path: str) -> list[str]: with open(tmp_path) as f: content = f.read() lines = [line.strip() for line in content.split("\n")] diff --git a/src/huggingface_hub/cli/download.py b/src/huggingface_hub/cli/download.py index 3e59233da1..0a80f0caed 100644 --- a/src/huggingface_hub/cli/download.py +++ b/src/huggingface_hub/cli/download.py @@ -37,145 +37,150 @@ """ import warnings -from argparse import Namespace, _SubParsersAction -from typing import List, Optional +from typing import Annotated, Optional, Union + +import typer from huggingface_hub import logging from huggingface_hub._snapshot_download import snapshot_download -from huggingface_hub.commands import BaseHuggingfaceCLICommand -from huggingface_hub.file_download import hf_hub_download -from huggingface_hub.utils import disable_progress_bars, enable_progress_bars - - -logger = logging.get_logger(__name__) - - -class DownloadCommand(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction): - download_parser = parser.add_parser("download", help="Download files from the Hub") - download_parser.add_argument( - "repo_id", type=str, help="ID of the repo to download from (e.g. `username/repo-name`)." - ) - download_parser.add_argument( - "filenames", type=str, nargs="*", help="Files to download (e.g. `config.json`, `data/metadata.jsonl`)." - ) - download_parser.add_argument( - "--repo-type", - choices=["model", "dataset", "space"], - default="model", - help="Type of repo to download from (defaults to 'model').", - ) - download_parser.add_argument( - "--revision", - type=str, - help="An optional Git revision id which can be a branch name, a tag, or a commit hash.", - ) - download_parser.add_argument( - "--include", nargs="*", type=str, help="Glob patterns to match files to download." - ) - download_parser.add_argument( - "--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to download." - ) - download_parser.add_argument( - "--cache-dir", type=str, help="Path to the directory where to save the downloaded files." - ) - download_parser.add_argument( - "--local-dir", - type=str, - help=( - "If set, the downloaded file will be placed under this directory. Check out" - " https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more" - " details." - ), - ) - download_parser.add_argument( - "--force-download", - action="store_true", +from huggingface_hub.file_download import DryRunFileInfo, hf_hub_download +from huggingface_hub.utils import _format_size, disable_progress_bars, enable_progress_bars, tabulate + +from ._cli_utils import RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt + + +def download( + repo_id: RepoIdArg, + filenames: Annotated[ + Optional[list[str]], + typer.Argument( + help="Files to download (e.g. `config.json`, `data/metadata.jsonl`).", + ), + ] = None, + repo_type: RepoTypeOpt = RepoTypeOpt.model, + revision: RevisionOpt = None, + include: Annotated[ + Optional[list[str]], + typer.Option( + help="Glob patterns to include from files to download. eg: *.json", + ), + ] = None, + exclude: Annotated[ + Optional[list[str]], + typer.Option( + help="Glob patterns to exclude from files to download.", + ), + ] = None, + cache_dir: Annotated[ + Optional[str], + typer.Option( + help="Directory where to save files.", + ), + ] = None, + local_dir: Annotated[ + Optional[str], + typer.Option( + help="If set, the downloaded file will be placed under this directory. Check out https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more details.", + ), + ] = None, + force_download: Annotated[ + bool, + typer.Option( help="If True, the files will be downloaded even if they are already cached.", - ) - download_parser.add_argument( - "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" - ) - download_parser.add_argument( - "--quiet", - action="store_true", + ), + ] = False, + dry_run: Annotated[ + bool, + typer.Option( + help="If True, perform a dry run without actually downloading the file.", + ), + ] = False, + token: TokenOpt = None, + quiet: Annotated[ + bool, + typer.Option( help="If True, progress bars are disabled and only the path to the download files is printed.", - ) - download_parser.add_argument( - "--max-workers", - type=int, - default=8, + ), + ] = False, + max_workers: Annotated[ + int, + typer.Option( help="Maximum number of workers to use for downloading files. Default is 8.", - ) - download_parser.set_defaults(func=DownloadCommand) - - def __init__(self, args: Namespace) -> None: - self.token = args.token - self.repo_id: str = args.repo_id - self.filenames: List[str] = args.filenames - self.repo_type: str = args.repo_type - self.revision: Optional[str] = args.revision - self.include: Optional[List[str]] = args.include - self.exclude: Optional[List[str]] = args.exclude - self.cache_dir: Optional[str] = args.cache_dir - self.local_dir: Optional[str] = args.local_dir - self.force_download: bool = args.force_download - self.quiet: bool = args.quiet - self.max_workers: int = args.max_workers - - def run(self) -> None: - if self.quiet: - disable_progress_bars() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - print(self._download()) # Print path to downloaded files - enable_progress_bars() - else: - logging.set_verbosity_info() - print(self._download()) # Print path to downloaded files - logging.set_verbosity_warning() + ), + ] = 8, +) -> None: + """Download files from the Hub.""" - def _download(self) -> str: + def run_download() -> Union[str, DryRunFileInfo, list[DryRunFileInfo]]: + filenames_list = filenames if filenames is not None else [] # Warn user if patterns are ignored - if len(self.filenames) > 0: - if self.include is not None and len(self.include) > 0: + if len(filenames_list) > 0: + if include is not None and len(include) > 0: warnings.warn("Ignoring `--include` since filenames have being explicitly set.") - if self.exclude is not None and len(self.exclude) > 0: + if exclude is not None and len(exclude) > 0: warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.") # Single file to download: use `hf_hub_download` - if len(self.filenames) == 1: + if len(filenames_list) == 1: return hf_hub_download( - repo_id=self.repo_id, - repo_type=self.repo_type, - revision=self.revision, - filename=self.filenames[0], - cache_dir=self.cache_dir, - force_download=self.force_download, - token=self.token, - local_dir=self.local_dir, + repo_id=repo_id, + repo_type=repo_type.value, + revision=revision, + filename=filenames_list[0], + cache_dir=cache_dir, + force_download=force_download, + token=token, + local_dir=local_dir, library_name="hf", + dry_run=dry_run, ) # Otherwise: use `snapshot_download` to ensure all files comes from same revision - elif len(self.filenames) == 0: - allow_patterns = self.include - ignore_patterns = self.exclude + if len(filenames_list) == 0: + allow_patterns = include + ignore_patterns = exclude else: - allow_patterns = self.filenames + allow_patterns = filenames_list ignore_patterns = None return snapshot_download( - repo_id=self.repo_id, - repo_type=self.repo_type, - revision=self.revision, + repo_id=repo_id, + repo_type=repo_type.value, + revision=revision, allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, - force_download=self.force_download, - cache_dir=self.cache_dir, - token=self.token, - local_dir=self.local_dir, + force_download=force_download, + cache_dir=cache_dir, + token=token, + local_dir=local_dir, library_name="hf", - max_workers=self.max_workers, + max_workers=max_workers, + dry_run=dry_run, + ) + + def _print_result(result: Union[str, DryRunFileInfo, list[DryRunFileInfo]]) -> None: + if isinstance(result, str): + print(result) + return + + # Print dry run info + if isinstance(result, DryRunFileInfo): + result = [result] + print( + f"[dry-run] Will download {len([r for r in result if r.will_download])} files (out of {len(result)}) totalling {_format_size(sum(r.file_size for r in result if r.will_download))}." ) + columns = ["File", "Bytes to download"] + items: list[list[Union[str, int]]] = [] + for info in sorted(result, key=lambda x: x.filename): + items.append([info.filename, _format_size(info.file_size) if info.will_download else "-"]) + print(tabulate(items, headers=columns)) + + if quiet: + disable_progress_bars() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + _print_result(run_download()) + enable_progress_bars() + else: + _print_result(run_download()) + logging.set_verbosity_warning() diff --git a/src/huggingface_hub/cli/hf.py b/src/huggingface_hub/cli/hf.py index 2587918b29..ce9205a6b8 100644 --- a/src/huggingface_hub/cli/hf.py +++ b/src/huggingface_hub/cli/hf.py @@ -12,51 +12,51 @@ # See the License for the specific language governing permissions and # limitations under the License. -from argparse import ArgumentParser -from huggingface_hub.cli.auth import AuthCommands -from huggingface_hub.cli.cache import CacheCommand -from huggingface_hub.cli.download import DownloadCommand -from huggingface_hub.cli.jobs import JobsCommands -from huggingface_hub.cli.lfs import LfsCommands -from huggingface_hub.cli.repo import RepoCommands -from huggingface_hub.cli.repo_files import RepoFilesCommand -from huggingface_hub.cli.system import EnvironmentCommand, VersionCommand -from huggingface_hub.cli.upload import UploadCommand -from huggingface_hub.cli.upload_large_folder import UploadLargeFolderCommand +from huggingface_hub.cli._cli_utils import check_cli_update, typer_factory +from huggingface_hub.cli.auth import auth_cli +from huggingface_hub.cli.cache import cache_cli +from huggingface_hub.cli.download import download +from huggingface_hub.cli.inference_endpoints import app as inference_endpoints_cli +from huggingface_hub.cli.jobs import jobs_cli +from huggingface_hub.cli.lfs import lfs_enable_largefiles, lfs_multipart_upload +from huggingface_hub.cli.repo import repo_cli +from huggingface_hub.cli.repo_files import repo_files_cli +from huggingface_hub.cli.system import env, version + +# from huggingface_hub.cli.jobs import jobs_app +from huggingface_hub.cli.upload import upload +from huggingface_hub.cli.upload_large_folder import upload_large_folder +from huggingface_hub.utils import logging + + +app = typer_factory(help="Hugging Face Hub CLI") + + +# top level single commands (defined in their respective files) +app.command(help="Download files from the Hub.")(download) +app.command(help="Upload a file or a folder to the Hub.")(upload) +app.command(help="Upload a large folder to the Hub. Recommended for resumable uploads.")(upload_large_folder) +app.command(name="env", help="Print information about the environment.")(env) +app.command(help="Print information about the hf version.")(version) +app.command(help="Configure your repository to enable upload of files > 5GB.", hidden=True)(lfs_enable_largefiles) +app.command(help="Upload large files to the Hub.", hidden=True)(lfs_multipart_upload) + + +# command groups +app.add_typer(auth_cli, name="auth") +app.add_typer(cache_cli, name="cache") +app.add_typer(repo_cli, name="repo") +app.add_typer(repo_files_cli, name="repo-files") +app.add_typer(jobs_cli, name="jobs") +app.add_typer(inference_endpoints_cli, name="endpoints") +app.add_typer(inference_endpoints_cli, name="inference-endpoints", hidden=True) def main(): - parser = ArgumentParser("hf", usage="hf []") - commands_parser = parser.add_subparsers(help="hf command helpers") - - # Register commands - AuthCommands.register_subcommand(commands_parser) - CacheCommand.register_subcommand(commands_parser) - DownloadCommand.register_subcommand(commands_parser) - JobsCommands.register_subcommand(commands_parser) - RepoCommands.register_subcommand(commands_parser) - RepoFilesCommand.register_subcommand(commands_parser) - UploadCommand.register_subcommand(commands_parser) - UploadLargeFolderCommand.register_subcommand(commands_parser) - - # System commands - EnvironmentCommand.register_subcommand(commands_parser) - VersionCommand.register_subcommand(commands_parser) - - # LFS commands (hidden in --help) - LfsCommands.register_subcommand(commands_parser) - - # Let's go - args = parser.parse_args() - if not hasattr(args, "func"): - parser.print_help() - exit(1) - - # Run - service = args.func(args) - if service is not None: - service.run() + logging.set_verbosity_info() + check_cli_update() + app() if __name__ == "__main__": diff --git a/src/huggingface_hub/cli/inference_endpoints.py b/src/huggingface_hub/cli/inference_endpoints.py new file mode 100644 index 0000000000..f0d2f17b7a --- /dev/null +++ b/src/huggingface_hub/cli/inference_endpoints.py @@ -0,0 +1,375 @@ +"""CLI commands for Hugging Face Inference Endpoints.""" + +import json +from typing import Annotated, Optional + +import typer + +from huggingface_hub._inference_endpoints import InferenceEndpoint +from huggingface_hub.errors import HfHubHTTPError + +from ._cli_utils import TokenOpt, get_hf_api, typer_factory + + +app = typer_factory(help="Manage Hugging Face Inference Endpoints.") + +catalog_app = typer_factory(help="Interact with the Inference Endpoints catalog.") + +NameArg = Annotated[ + str, + typer.Argument(help="Endpoint name."), +] + +NamespaceOpt = Annotated[ + Optional[str], + typer.Option( + help="The namespace associated with the Inference Endpoint. Defaults to the current user's namespace.", + ), +] + + +def _print_endpoint(endpoint: InferenceEndpoint) -> None: + typer.echo(json.dumps(endpoint.raw, indent=2, sort_keys=True)) + + +@app.command() +def ls( + namespace: NamespaceOpt = None, + token: TokenOpt = None, +) -> None: + """Lists all Inference Endpoints for the given namespace.""" + api = get_hf_api(token=token) + try: + endpoints = api.list_inference_endpoints(namespace=namespace, token=token) + except HfHubHTTPError as error: + typer.echo(f"Listing failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + typer.echo( + json.dumps( + {"items": [endpoint.raw for endpoint in endpoints]}, + indent=2, + sort_keys=True, + ) + ) + + +@app.command(name="deploy", help="Deploy an Inference Endpoint from a Hub repository.") +def deploy( + name: NameArg, + repo: Annotated[ + str, + typer.Option( + help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').", + ), + ], + framework: Annotated[ + str, + typer.Option( + help="The machine learning framework used for the model (e.g. 'vllm').", + ), + ], + accelerator: Annotated[ + str, + typer.Option( + help="The hardware accelerator to be used for inference (e.g. 'cpu').", + ), + ], + instance_size: Annotated[ + str, + typer.Option( + help="The size or type of the instance to be used for hosting the model (e.g. 'x4').", + ), + ], + instance_type: Annotated[ + str, + typer.Option( + help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').", + ), + ], + region: Annotated[ + str, + typer.Option( + help="The cloud region in which the Inference Endpoint will be created (e.g. 'us-east-1').", + ), + ], + vendor: Annotated[ + str, + typer.Option( + help="The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. 'aws').", + ), + ], + *, + namespace: NamespaceOpt = None, + task: Annotated[ + Optional[str], + typer.Option( + help="The task on which to deploy the model (e.g. 'text-classification').", + ), + ] = None, + token: TokenOpt = None, +) -> None: + api = get_hf_api(token=token) + try: + endpoint = api.create_inference_endpoint( + name=name, + repository=repo, + framework=framework, + accelerator=accelerator, + instance_size=instance_size, + instance_type=instance_type, + region=region, + vendor=vendor, + namespace=namespace, + task=task, + token=token, + ) + except HfHubHTTPError as error: + typer.echo(f"Deployment failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + _print_endpoint(endpoint) + + +@catalog_app.command(name="deploy", help="Deploy an Inference Endpoint from the Model Catalog.") +def deploy_from_catalog( + name: NameArg, + repo: Annotated[ + str, + typer.Option( + help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').", + ), + ], + namespace: NamespaceOpt = None, + token: TokenOpt = None, +) -> None: + api = get_hf_api(token=token) + try: + endpoint = api.create_inference_endpoint_from_catalog( + repo_id=repo, + name=name, + namespace=namespace, + token=token, + ) + except HfHubHTTPError as error: + typer.echo(f"Deployment failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + _print_endpoint(endpoint) + + +def list_catalog( + token: TokenOpt = None, +) -> None: + """List available Catalog models.""" + api = get_hf_api(token=token) + try: + models = api.list_inference_catalog(token=token) + except HfHubHTTPError as error: + typer.echo(f"Catalog fetch failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + typer.echo(json.dumps({"models": models}, indent=2, sort_keys=True)) + + +catalog_app.command(name="ls")(list_catalog) +app.command(name="list-catalog", help="List available Catalog models.", hidden=True)(list_catalog) + + +app.add_typer(catalog_app, name="catalog") + + +@app.command() +def describe( + name: NameArg, + namespace: NamespaceOpt = None, + token: TokenOpt = None, +) -> None: + """Get information about an existing endpoint.""" + api = get_hf_api(token=token) + try: + endpoint = api.get_inference_endpoint(name=name, namespace=namespace, token=token) + except HfHubHTTPError as error: + typer.echo(f"Fetch failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + _print_endpoint(endpoint) + + +@app.command() +def update( + name: NameArg, + namespace: NamespaceOpt = None, + repo: Annotated[ + Optional[str], + typer.Option( + help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').", + ), + ] = None, + accelerator: Annotated[ + Optional[str], + typer.Option( + help="The hardware accelerator to be used for inference (e.g. 'cpu').", + ), + ] = None, + instance_size: Annotated[ + Optional[str], + typer.Option( + help="The size or type of the instance to be used for hosting the model (e.g. 'x4').", + ), + ] = None, + instance_type: Annotated[ + Optional[str], + typer.Option( + help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').", + ), + ] = None, + framework: Annotated[ + Optional[str], + typer.Option( + help="The machine learning framework used for the model (e.g. 'custom').", + ), + ] = None, + revision: Annotated[ + Optional[str], + typer.Option( + help="The specific model revision to deploy on the Inference Endpoint (e.g. '6c0e6080953db56375760c0471a8c5f2929baf11').", + ), + ] = None, + task: Annotated[ + Optional[str], + typer.Option( + help="The task on which to deploy the model (e.g. 'text-classification').", + ), + ] = None, + min_replica: Annotated[ + Optional[int], + typer.Option( + help="The minimum number of replicas (instances) to keep running for the Inference Endpoint.", + ), + ] = None, + max_replica: Annotated[ + Optional[int], + typer.Option( + help="The maximum number of replicas (instances) to scale to for the Inference Endpoint.", + ), + ] = None, + scale_to_zero_timeout: Annotated[ + Optional[int], + typer.Option( + help="The duration in minutes before an inactive endpoint is scaled to zero.", + ), + ] = None, + token: TokenOpt = None, +) -> None: + """Update an existing endpoint.""" + api = get_hf_api(token=token) + try: + endpoint = api.update_inference_endpoint( + name=name, + namespace=namespace, + repository=repo, + framework=framework, + revision=revision, + task=task, + accelerator=accelerator, + instance_size=instance_size, + instance_type=instance_type, + min_replica=min_replica, + max_replica=max_replica, + scale_to_zero_timeout=scale_to_zero_timeout, + token=token, + ) + except HfHubHTTPError as error: + typer.echo(f"Update failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + _print_endpoint(endpoint) + + +@app.command() +def delete( + name: NameArg, + namespace: NamespaceOpt = None, + yes: Annotated[ + bool, + typer.Option("--yes", help="Skip confirmation prompts."), + ] = False, + token: TokenOpt = None, +) -> None: + """Delete an Inference Endpoint permanently.""" + if not yes: + confirmation = typer.prompt(f"Delete endpoint '{name}'? Type the name to confirm.") + if confirmation != name: + typer.echo("Aborted.") + raise typer.Exit(code=2) + + api = get_hf_api(token=token) + try: + api.delete_inference_endpoint(name=name, namespace=namespace, token=token) + except HfHubHTTPError as error: + typer.echo(f"Delete failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + typer.echo(f"Deleted '{name}'.") + + +@app.command() +def pause( + name: NameArg, + namespace: NamespaceOpt = None, + token: TokenOpt = None, +) -> None: + """Pause an Inference Endpoint.""" + api = get_hf_api(token=token) + try: + endpoint = api.pause_inference_endpoint(name=name, namespace=namespace, token=token) + except HfHubHTTPError as error: + typer.echo(f"Pause failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + _print_endpoint(endpoint) + + +@app.command() +def resume( + name: NameArg, + namespace: NamespaceOpt = None, + fail_if_already_running: Annotated[ + bool, + typer.Option( + "--fail-if-already-running", + help="If `True`, the method will raise an error if the Inference Endpoint is already running.", + ), + ] = False, + token: TokenOpt = None, +) -> None: + """Resume an Inference Endpoint.""" + api = get_hf_api(token=token) + try: + endpoint = api.resume_inference_endpoint( + name=name, + namespace=namespace, + token=token, + running_ok=not fail_if_already_running, + ) + except HfHubHTTPError as error: + typer.echo(f"Resume failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + _print_endpoint(endpoint) + + +@app.command() +def scale_to_zero( + name: NameArg, + namespace: NamespaceOpt = None, + token: TokenOpt = None, +) -> None: + """Scale an Inference Endpoint to zero.""" + api = get_hf_api(token=token) + try: + endpoint = api.scale_to_zero_inference_endpoint(name=name, namespace=namespace, token=token) + except HfHubHTTPError as error: + typer.echo(f"Scale To Zero failed: {error}") + raise typer.Exit(code=error.response.status_code) from error + + _print_endpoint(endpoint) diff --git a/src/huggingface_hub/cli/jobs.py b/src/huggingface_hub/cli/jobs.py index 3a661c7df7..07363a88a6 100644 --- a/src/huggingface_hub/cli/jobs.py +++ b/src/huggingface_hub/cli/jobs.py @@ -28,1073 +28,742 @@ # Cancel a running job hf jobs cancel + + # Run a UV script + hf jobs uv run