From 36171697bf7e850869062eeb751d50ca5bf090c9 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 1 Jan 2025 20:31:49 -0600 Subject: [PATCH 1/2] use ruff rules for isort, add codespell --- .github/workflows/main.yml | 4 ++-- .pre-commit-config.yaml | 16 ++++++++-------- README.md | 4 ++-- notebooks/testing/ranker-local.ipynb | 7 ++++--- notebooks/testing/sparse-inputs.ipynb | 7 ++++--- pyproject.toml | 2 ++ 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2ad0a60..f696b5d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,6 +1,6 @@ name: Continuous Integration -# alwas run CI on new commits to any branch +# always run CI on new commits to any branch on: push jobs: @@ -20,7 +20,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: - # 'driver: docker' allows one build to re-use images from a prior build + # 'driver: docker' allows one build to reuse images from a prior build # ref: https://github.com/docker/setup-buildx-action/issues/251 driver: docker install: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0d264ed..0670fca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,14 +11,8 @@ repos: - id: check-toml - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - name: isort (python) - args: ["--settings-path", "pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.14.0 + rev: v1.14.1 hooks: - id: mypy args: ["--config-file", "pyproject.toml"] @@ -31,7 +25,7 @@ repos: hooks: # Run the linter. - id: ruff - args: ["--config", "pyproject.toml"] + args: ["--config", "pyproject.toml", "--fix"] types_or: [jupyter, python] # Run the formatter. - id: ruff-format @@ -51,3 +45,9 @@ repos: rev: v1.35.1 hooks: - id: yamllint + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: [tomli] + args: ["--toml", "pyproject.toml"] diff --git a/README.md b/README.md index 00358fe..37c5e0d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Testing `lightgbm.dask` -[![GitHub Actions status](https://github.com/jameslamb/lightgbm-dask-testing/workflows/Continuous%20Integration/badge.svg?branch=main)](https://github.com/jameslamb/lightgbm-dask-testing/actions) +[![GitHub Actions](https://github.com/jameslamb/lightgbm-dask-testing/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/jameslamb/lightgbm-dask-testing/actions/workflows/main.yml) This repository can be used to test and develop changes to LightGBM's Dask integration. It contains the following useful features: @@ -110,7 +110,7 @@ pip install --upgrade awscli Next, configure your shell to make authenticated requests to AWS. If you've never done this, you can see [the AWS CLI docs](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html). -The rest of this section assums that the shell variables `AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` have been sett. +The rest of this section assumes that the shell variables `AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` have been sett. I like to set these by keeping them in a file diff --git a/notebooks/testing/ranker-local.ipynb b/notebooks/testing/ranker-local.ipynb index ea0f8db..cd69107 100644 --- a/notebooks/testing/ranker-local.ipynb +++ b/notebooks/testing/ranker-local.ipynb @@ -22,10 +22,11 @@ "import numpy as np\n", "import pandas as pd\n", "from dask.distributed import Client, LocalCluster\n", - "from lightgbm.dask import DaskLGBMRanker\n", - "from lightgbm.sklearn import LGBMRanker\n", "from scipy.stats import spearmanr\n", - "from sklearn.utils import check_random_state" + "from sklearn.utils import check_random_state\n", + "\n", + "from lightgbm.dask import DaskLGBMRanker\n", + "from lightgbm.sklearn import LGBMRanker" ] }, { diff --git a/notebooks/testing/sparse-inputs.ipynb b/notebooks/testing/sparse-inputs.ipynb index d85ebae..2ada8a0 100644 --- a/notebooks/testing/sparse-inputs.ipynb +++ b/notebooks/testing/sparse-inputs.ipynb @@ -16,10 +16,11 @@ "import dask.array as da\n", "import numpy as np\n", "from dask.distributed import Client, LocalCluster\n", - "from lightgbm.dask import DaskLGBMClassifier\n", - "from lightgbm.sklearn import LGBMClassifier\n", "from scipy.sparse import csc_matrix\n", - "from sklearn.datasets import make_blobs" + "from sklearn.datasets import make_blobs\n", + "\n", + "from lightgbm.dask import DaskLGBMClassifier\n", + "from lightgbm.sklearn import LGBMClassifier" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 1ccc244..ce106f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,8 @@ select = [ "E", # pyflakes "F", + # isort + "I", # NumPy-specific rules "NPY", # pylint From 14c2756a09eaa4b7e941df88502962f928391c8c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 1 Jan 2025 20:45:45 -0600 Subject: [PATCH 2/2] use known-third-party --- notebooks/testing/ranker-local.ipynb | 7 +++---- notebooks/testing/sparse-inputs.ipynb | 7 +++---- pyproject.toml | 13 +++++++++++++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/notebooks/testing/ranker-local.ipynb b/notebooks/testing/ranker-local.ipynb index cd69107..ea0f8db 100644 --- a/notebooks/testing/ranker-local.ipynb +++ b/notebooks/testing/ranker-local.ipynb @@ -22,11 +22,10 @@ "import numpy as np\n", "import pandas as pd\n", "from dask.distributed import Client, LocalCluster\n", - "from scipy.stats import spearmanr\n", - "from sklearn.utils import check_random_state\n", - "\n", "from lightgbm.dask import DaskLGBMRanker\n", - "from lightgbm.sklearn import LGBMRanker" + "from lightgbm.sklearn import LGBMRanker\n", + "from scipy.stats import spearmanr\n", + "from sklearn.utils import check_random_state" ] }, { diff --git a/notebooks/testing/sparse-inputs.ipynb b/notebooks/testing/sparse-inputs.ipynb index 2ada8a0..d85ebae 100644 --- a/notebooks/testing/sparse-inputs.ipynb +++ b/notebooks/testing/sparse-inputs.ipynb @@ -16,11 +16,10 @@ "import dask.array as da\n", "import numpy as np\n", "from dask.distributed import Client, LocalCluster\n", - "from scipy.sparse import csc_matrix\n", - "from sklearn.datasets import make_blobs\n", - "\n", "from lightgbm.dask import DaskLGBMClassifier\n", - "from lightgbm.sklearn import LGBMClassifier" + "from lightgbm.sklearn import LGBMClassifier\n", + "from scipy.sparse import csc_matrix\n", + "from sklearn.datasets import make_blobs" ] }, { diff --git a/pyproject.toml b/pyproject.toml index ce106f5..14d4f52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,19 @@ select = [ "SIM401", ] +[tool.ruff.lint.isort] + +# prevent ruff from thinking that 'lightgbm.dask' imports should +# come after all others +known-third-party = [ + "dask", + "dask_cloudprovider", + "lightgbm", + "pandas", + "scipy", + "sklearn", +] + [tool.ruff.lint.per-file-ignores] "*.ipynb" = [ # (pylint) Unnecessary list() call