From a9217b5ff0c5be3fbc4760d7a97d3e74dcd0d413 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 14:47:00 +0000 Subject: [PATCH 01/41] Update CI to Python 3.12 Signed-off-by: Arthit Suriyawongkul --- .github/ISSUE_TEMPLATE/feature_request.md | 10 ++-- .github/pull_request_template.md | 1 + .github/workflows/deploy_docs.yml | 11 ++-- .github/workflows/lint.yml | 4 +- .github/workflows/pypi-publish.yml | 4 +- .github/workflows/pypi-test.yml | 4 +- .../{macos-test.yml => test-macos.yml} | 26 ++++------ .../workflows/{test.yml => test-ubuntu.yml} | 13 +++-- .../{windows-test.yml => test-windows.yml} | 12 ++--- .gitignore | 2 + Dockerfile | 2 +- docker_requirements.txt | 52 +++++++++---------- requirements.txt | 6 +-- 13 files changed, 72 insertions(+), 75 deletions(-) rename .github/workflows/{macos-test.yml => test-macos.yml} (79%) rename .github/workflows/{test.yml => test-ubuntu.yml} (79%) rename .github/workflows/{windows-test.yml => test-windows.yml} (81%) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 0132862f9..de7414c14 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -4,6 +4,7 @@ about: Propose a change or an addition เสนอความสามาร --- ## Detailed description + ## Context @@ -14,7 +15,8 @@ about: Propose a change or an addition เสนอความสามาร ## Your environment -* PyThaiNLP version: -* Python version: -* Operating system and version (distro, 32/64-bit): -* More info (Docker, VM, etc.): + +- PyThaiNLP version: +- Python version: +- Operating system and version (distro, 32/64-bit): +- More info (Docker, VM, etc.): diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 61435ef5e..63f149aac 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -13,6 +13,7 @@ Description of how the changes fix the issue. Fixes #... ### Your checklist for this pull request + 🚨Please review the [guidelines for contributing](https://github.com/PyThaiNLP/pythainlp/blob/dev/CONTRIBUTING.md) to this repository. - [ ] Passed code styles and structures diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 72e1eb6ef..87d9d947e 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -10,13 +10,14 @@ on: jobs: release: name: Build - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5.3.0 with: - python-version: '3.8' + python-version: '3.12' - name: Install dependencies env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 296500a92..770ae906a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,10 +18,10 @@ on: jobs: ruff: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 - - uses: chartboost/ruff-action@v1 + - uses: astral-sh/ruff-action@v1 with: src: "./pythainlp" args: check --verbose --line-length 79 --select C901 diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 387d8ad63..d1fb8fb81 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -7,10 +7,10 @@ on: jobs: deploy: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.8] + python-version: [3.12] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 23181bf5f..c6691a8db 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -7,10 +7,10 @@ on: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.8] + python-version: [3.12] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/macos-test.yml b/.github/workflows/test-macos.yml similarity index 79% rename from .github/workflows/macos-test.yml rename to .github/workflows/test-macos.yml index 64b17524e..00b1367d4 100644 --- a/.github/workflows/macos-test.yml +++ b/.github/workflows/test-macos.yml @@ -1,4 +1,4 @@ -name: macOS Unit test and code coverage +name: Unit test and code coverage (macOS) on: push: @@ -22,11 +22,13 @@ jobs: fail-fast: false matrix: os: [macos-latest, self-hosted] - python-version: [3.8] + python-version: [3.12] steps: - - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v2 + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: conda-incubator/setup-miniconda@v3 with: python-version: ${{ matrix.python-version }} auto-activate-base: false @@ -45,20 +47,16 @@ jobs: shell: bash -l {0} run: | source ~/miniconda3/etc/profile.d/conda.sh - conda create -y -n pythainlpwork38 python=3.8 - conda activate pythainlpwork38 + conda create -y -n pythainlpwork312 python=3.12 + conda activate pythainlpwork312 conda info conda list python -m pip install --upgrade pip SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt conda install -y -c conda-forge protobuf - pip install pytest coverage coveralls typing_extensions==4.5.0 - pip install ssg epitran - pip install fastai==1.0.61 - pip install fairseq==0.10.2 + pip install pytest coverage coveralls conda install -y -c conda-forge icu conda install -y -c conda-forge pyicu - pip install deepcut tltk pip install .[full] python -m nltk.downloader omw-1.4 python -m pip cache purge @@ -69,11 +67,6 @@ jobs: conda info conda list if: matrix.os == 'self-hosted' - - name: Install PyTorch - shell: bash -l {0} - run: | - pip install torch==1.10.0 - if: matrix.os != 'self-hosted' - name: Install dependencies shell: bash -l {0} run: | @@ -82,7 +75,6 @@ jobs: conda install -c conda-forge icu conda install -c conda-forge pyicu SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt - pip install deepcut tltk pip install .[full] python -m nltk.downloader omw-1.4 python -m pip cache purge diff --git a/.github/workflows/test.yml b/.github/workflows/test-ubuntu.yml similarity index 79% rename from .github/workflows/test.yml rename to .github/workflows/test-ubuntu.yml index 352495b29..ca35a21ea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test-ubuntu.yml @@ -1,4 +1,4 @@ -name: Unit test and code coverage +name: Unit test and code coverage (Ubuntu) on: push: @@ -15,15 +15,16 @@ on: jobs: build: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.9] + python-version: [3.12] steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -32,10 +33,8 @@ jobs: python -m pip install backports.zoneinfo[tzdata] pip install pytest coverage coveralls SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt - pip install deepcut tltk pip install .[full] python -m nltk.downloader omw-1.4 - python -m pip install spacy deepcut tltk python -m pip cache purge - name: Test env: diff --git a/.github/workflows/windows-test.yml b/.github/workflows/test-windows.yml similarity index 81% rename from .github/workflows/windows-test.yml rename to .github/workflows/test-windows.yml index 4883aa22c..9ea69aa76 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/test-windows.yml @@ -1,4 +1,4 @@ -name: Windows Unit test and code coverage +name: Unit test and code coverage (Windows) on: push: @@ -22,11 +22,13 @@ jobs: fail-fast: false matrix: os: [windows-latest] - python-version: [3.8] + python-version: [3.12] steps: - - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v2 + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: conda-incubator/setup-miniconda@v3 with: python-version: ${{ matrix.python-version }} auto-activate-base: true @@ -49,11 +51,9 @@ jobs: python -m pip --version python -m pip install pytest coverage coveralls conda install -y -c conda-forge fairseq - python -m pip install https://www.dropbox.com/s/o6p2sj5z50iim1e/PyICU-2.3.1-cp38-cp38-win_amd64.whl?dl=1 python -m pip install -r docker_requirements.txt python -m pip install .[full] python -m nltk.downloader omw-1.4 - python -m pip install spacy deepcut tltk - name: Test shell: powershell env: diff --git a/.gitignore b/.gitignore index af16c6179..a7f5543b2 100644 --- a/.gitignore +++ b/.gitignore @@ -119,3 +119,5 @@ notebooks/iso_11940-dev.ipynb # vscode devcontainer .devcontainer/ notebooks/d.model + +logs/ diff --git a/Dockerfile b/Dockerfile index dc8162af7..3a8b325eb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 -FROM python:3.8-slim-buster +FROM python:3.12-slim COPY . . diff --git a/docker_requirements.txt b/docker_requirements.txt index fc6b7947b..017bf490a 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -1,41 +1,41 @@ PyYAML==5.4.1 attacut==1.0.6 -bpemb==0.3.4 +bpemb==0.3.6 deepcut==0.7.0.0 emoji==0.5.4 -epitran==1.9 -esupar==1.3.9 -fairseq==0.10.2 +epitran==1.25.1 +esupar==1.7.5 +fairseq==0.12.2 fastai==1.0.61 fastcoref==2.1.6 -gensim==4.3.2 -h5py==3.1.0 -khanaa==0.0.6 +gensim==4.3.3 +h5py==3.12.1 +khanaa==0.1.1 nlpo3==1.3.0 -nltk==3.6.6 -numpy==1.22.* +nltk==3.9.1 +numpy==1.26.4 OSKut==1.3 -pandas==1.4.* -panphon==0.20.0 +pandas==1.5.3 +panphon==0.21.2 phunspell==0.1.6 protobuf==3.20.3 -pyicu==2.8 -python-crfsuite==0.9.9 -requests==2.31.* -sacremoses==0.0.41 +pyicu==2.14 +python-crfsuite==0.9.11 +requests==2.32.* +sacremoses==0.1.1 sefr_cut==1.1 -sentence-transformers==2.2.2 -sentencepiece==0.1.99 -spacy_thai==0.7.1 -spacy==3.5.* -spylls==0.1.5 +sentence-transformers==2.7.0 +sentencepiece==0.2.0 +spacy_thai==0.7.7 +spacy==3.8.2 +spylls==0.1.7 ssg==0.0.8 -symspellpy==6.7.7 -tensorflow==2.13.1 +symspellpy==6.7.8 +tensorflow==2.18.0 thai-nner==0.3 -tltk==1.6.8 +tltk==1.9.1 torch==1.13.1 -transformers==4.38.0 -ufal.chu-liu-edmonds==1.0.2 -wtpsplit==1.0.1 +transformers==4.46.0 +ufal.chu-liu-edmonds==1.0.3 +wtpsplit==1.3.0 wunsen==0.0.3 diff --git a/requirements.txt b/requirements.txt index 9daa11484..29ed0d935 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -PyYAML==5.4.1 -numpy==1.22.* +PyYAML==5.4.* +numpy==1.26.* python-crfsuite==0.9.* -requests==2.31.* +requests==2.32.* From 1a41072c614676637793e1bf160e327ef5f8df19 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 14:57:30 +0000 Subject: [PATCH 02/41] Remove backports.zoneinfo (already in 3.9) Signed-off-by: Arthit Suriyawongkul --- .github/workflows/test-ubuntu.yml | 1 - .github/workflows/test-windows.yml | 1 - appveyor.yml | 148 ----------------------------- pythainlp/util/date.py | 7 +- setup.py | 4 +- 5 files changed, 3 insertions(+), 158 deletions(-) delete mode 100644 appveyor.yml diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index ca35a21ea..f108af82a 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -30,7 +30,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install backports.zoneinfo[tzdata] pip install pytest coverage coveralls SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt pip install .[full] diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 9ea69aa76..5b367251e 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -47,7 +47,6 @@ jobs: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | python -m pip install --disable-pip-version-check --user --upgrade pip setuptools - python -m pip install backports.zoneinfo[tzdata] python -m pip --version python -m pip install pytest coverage coveralls conda install -y -c conda-forge fairseq diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 954c027d3..000000000 --- a/appveyor.yml +++ /dev/null @@ -1,148 +0,0 @@ -#---------------------------------# -# general configuration # -#---------------------------------# - -#skip_commits: -# message: /[skip ci]/ # skip if the commit message contains "(skip ci)" - -#---------------------------------# -# environment configuration # -#---------------------------------# - -image: Visual Studio 2019 - -skip_branch_with_pr: true - -# scripts that are called at very beginning, before repo cloning -init: - # If there is a newer build queued for the same PR, cancel this one. - # The AppVeyor 'rollout builds' option is supposed to serve the same - # purpose but it is problematic because it tends to cancel builds pushed - # directly to master instead of just PR builds (or the converse). - # credits: JuliaLang developers. - - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` - https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` - Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` - throw "There are newer queued builds for this pull request, skipping build." } - - - ps: | - If (($env:SKIP_NOTAG -eq "true") -and ($env:APPVEYOR_REPO_TAG -ne "true")) { - Write-Host "Skipping build, not at a tag." - Exit-AppveyorBuild - } - - ECHO %APPVEYOR_BUILD_WORKER_IMAGE% - - "ECHO Python %PYTHON_VERSION% (%PYTHON_ARCH%bit) from %PYTHON%" - - ECHO %PYTHONIOENCODING% - - ECHO %ICU_VERSION% - - ECHO "Installed SDKs:" - - ps: "ls C:/Python*" - - ps: "ls \"C:/Program Files (x86)/Microsoft SDKs/Windows\"" - -# fetch repository as zip archive -# https://www.appveyor.com/docs/how-to/repository-shallow-clone/ -shallow_clone: true - -# set clone depth -clone_depth: 5 # clone entire repository history if not defined - -environment: - global: - APPVEYOR_SAVE_CACHE_ON_ERROR: false - APPVEYOR_SKIP_FINALIZE_ON_EXIT: true - CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" - PYTHONIOENCODING: "utf-8" - ICU_VERSION: "64.2" - DISTUTILS_USE_SDK: "1" - PYTHAINLP_DATA_DIR: "%LOCALAPPDATA%/pythainlp-data" - - matrix: - # - PYTHON: "C:/Python36" - # PYTHON_VERSION: "3.6" - # PYTHON_ARCH: "32" - # PYICU_PKG: "https://www.dropbox.com/s/pahorbq29y9cura/PyICU-2.3.1-cp36-cp36m-win32.whl?dl=1" - - - PYTHON: "C:\\Miniconda36-x64" - PYTHON_VERSION: "3.6" - PYTHON_ARCH: "64" - PYICU_PKG: "https://www.dropbox.com/s/7t0rrxwckqbgivi/PyICU-2.3.1-cp36-cp36m-win_amd64.whl?dl=1" - - # - PYTHON: "C:/Python37" - # PYTHON_VERSION: "3.7" - # PYTHON_ARCH: "32" - # PYICU_PKG: "https://www.dropbox.com/s/3xwdnwhdcu619x4/PyICU-2.3.1-cp37-cp37m-win32.whl?dl=1" - - # - PYTHON: "C:/Python37-x64" - # PYTHON_VERSION: "3.7" - # PYTHON_ARCH: "64" - # PYICU_PKG: "https://www.dropbox.com/s/le5dckc3231opqt/PyICU-2.3.1-cp37-cp37m-win_amd64.whl?dl=1" - - # - PYTHON: "C:\\Miniconda38-x64" - # PYTHON_VERSION: "3.8" - # PYTHON_ARCH: "64" - # PYICU_PKG: "https://www.dropbox.com/s/o6p2sj5z50iim1e/PyICU-2.3.1-cp38-cp38-win_amd64.whl?dl=1" - -matrix: - fast_finish: true - -#cache: -# - "%LOCALAPPDATA%/pip/Cache" -# - "%APPDATA%/nltk_data" -# - "%LOCALAPPDATA%/pythainlp-data" - -install: - - chcp 65001 - - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" -# - '"C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %PLATFORM%' -# - '"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" %PLATFORM%' - - '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" %PLATFORM%' - - ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 } - - ECHO %PATH% - - python --version - - python -m pip install --disable-pip-version-check --user --upgrade pip setuptools - - python -m pip --version - - python -m pip install pyyaml - - python -m pip install -U "h5py>=2.10.0,<3" "tensorflow>=2.3.1,<3" deepcut - - python -m pip install %PYICU_PKG% - - conda install -y -c conda-forge fairseq - - conda remove --force -y pytorch - - python -m pip install torch==1.7.1+cpu -f https://download.pytorch.org/whl/torch_stable.html - - python -m pip install -e .[full] - -#---------------------------------# -# build configuration # -#---------------------------------# - -platform: - - x64 - -# Skip .NET project specific build phase. -build: off - -#---------------------------------# -# tests configuration # -#---------------------------------# - -test_script: - - python -m unittest discover - -#---------------------------------# -# global handlers # -#---------------------------------# - -#on_success: -# # Remove old or huge cache files to hopefully not exceed the 1GB cache limit. -# # -# # If the cache limit is reached, the cache will not be updated (of not even -# # created in the first run). So this is a trade of between keeping the cache -# # current and having a cache at all. -# # NB: This is done only `on_success` since the cache in uploaded only on -# # success anyway. -# # Note: Cygwin is not available on Visual Studio 2019, can try Msys2. -# - "ECHO Remove old or huge cache" -# - C:\cygwin\bin\find "%LOCALAPPDATA%/pip" -type f -mtime +360 -delete -# - C:\cygwin\bin\find "%LOCALAPPDATA%/pip" -type f -size +50M -delete -# - C:\cygwin\bin\find "%LOCALAPPDATA%/pip" -empty -delete -# # Show size of cache -# - C:\cygwin\bin\du -hs "%LOCALAPPDATA%/pip/Cache" -# - C:\cygwin\bin\du -hs "%APPDATA%/nltk_data" -# - C:\cygwin\bin\du -hs "%LOCALAPPDATA%/pythainlp-data" diff --git a/pythainlp/util/date.py b/pythainlp/util/date.py index d2e03a11a..3d8c250b5 100644 --- a/pythainlp/util/date.py +++ b/pythainlp/util/date.py @@ -26,10 +26,7 @@ from typing import Union import re -try: - from zoneinfo import ZoneInfo -except ImportError: - from backports.zoneinfo import ZoneInfo +from zoneinfo import ZoneInfo thai_abbr_weekdays = ["จ", "อ", "พ", "พฤ", "ศ", "ส", "อา"] @@ -236,7 +233,7 @@ def thai_strptime( # 9, # 0, # 1, - # tzinfo=backports.zoneinfo.ZoneInfo(key='Asia/Bangkok') + # tzinfo=zoneinfo.ZoneInfo(key='Asia/Bangkok') # ) """ d = "" diff --git a/setup.py b/setup.py index 72897d889..7a9c1f5c2 100644 --- a/setup.py +++ b/setup.py @@ -38,8 +38,6 @@ requirements = [ "requests>=2.22.0", - "backports.zoneinfo; python_version<'3.9'", - "tzdata; sys_platform == 'win32'" ] extras = { @@ -170,7 +168,7 @@ url="https://github.com/PyThaiNLP/pythainlp", packages=find_packages(exclude=["tests", "tests.*"]), test_suite="tests", - python_requires=">=3.7", + python_requires=">=3.9", package_data={ "pythainlp": [ "corpus/*", From 92f2b31c92b82a24d706c5ef02fdb4b2014994f3 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:00:40 +0000 Subject: [PATCH 03/41] Upgrade PyYAML Signed-off-by: Arthit Suriyawongkul --- docker_requirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker_requirements.txt b/docker_requirements.txt index 017bf490a..50b7116cb 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -1,4 +1,4 @@ -PyYAML==5.4.1 +PyYAML==6.0.2 attacut==1.0.6 bpemb==0.3.6 deepcut==0.7.0.0 diff --git a/requirements.txt b/requirements.txt index 29ed0d935..ca1dab699 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -PyYAML==5.4.* +PyYAML==6.0.* numpy==1.26.* python-crfsuite==0.9.* requests==2.32.* From d6a41141ec957ce29f28ce611bbca82ccdb1de61 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:05:19 +0000 Subject: [PATCH 04/41] Use Python 3.10 because Torch 1.x torch==1.x Requires-Python >=3.7,<3.11 Signed-off-by: Arthit Suriyawongkul --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 6 +++--- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 87d9d947e..9f53d37ab 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5.3.0 with: - python-version: '3.12' + python-version: '3.10' - name: Install dependencies env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index d1fb8fb81..824d4c04c 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.12] + python-version: [3.10] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index c6691a8db..04b915358 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.12] + python-version: [3.10] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 00b1367d4..39aeb1c9c 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, self-hosted] - python-version: [3.12] + python-version: [3.10] steps: - name: Checkout @@ -47,8 +47,8 @@ jobs: shell: bash -l {0} run: | source ~/miniconda3/etc/profile.d/conda.sh - conda create -y -n pythainlpwork312 python=3.12 - conda activate pythainlpwork312 + conda create -y -n pythainlpwork310 python=3.10 + conda activate pythainlpwork310 conda info conda list python -m pip install --upgrade pip diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index f108af82a..0a0760ed3 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.12] + python-version: [3.10] steps: - name: Checkout diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 5b367251e..81aee546c 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [windows-latest] - python-version: [3.12] + python-version: [3.10] steps: - name: Checkout diff --git a/Dockerfile b/Dockerfile index 3a8b325eb..2361983f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 -FROM python:3.12-slim +FROM python:3.10-slim COPY . . From e24713fd2a524f815db6e8d554f8fe2865945ada Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:11:55 +0000 Subject: [PATCH 05/41] Remove redundant pip install Signed-off-by: Arthit Suriyawongkul --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/lint.yml | 6 ++++-- .github/workflows/pypi-publish.yml | 5 +++-- .github/workflows/pypi-test.yml | 6 +++--- .github/workflows/test-windows.yml | 4 ---- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 9f53d37ab..7f35c04ea 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -15,7 +15,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v5.3.0 + uses: actions/setup-python@v5 with: python-version: '3.10' - name: Install dependencies diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 770ae906a..82fbd0f77 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,8 +20,10 @@ jobs: ruff: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 - - uses: astral-sh/ruff-action@v1 + - name: Checkout + uses: actions/checkout@v4 + - name: Ruff + uses: astral-sh/ruff-action@v1 with: src: "./pythainlp" args: check --verbose --line-length 79 --select C901 diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 824d4c04c..a2678e540 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -13,9 +13,10 @@ jobs: python-version: [3.10] steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 04b915358..82d4d7c70 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -13,9 +13,10 @@ jobs: python-version: [3.10] steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -23,7 +24,6 @@ jobs: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | python -m pip install --upgrade pip - pip install deepcut tltk SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 81aee546c..3a5ef5a48 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -37,10 +37,6 @@ jobs: run: | conda info conda list - - name: Install PyTorch - shell: powershell - run: | - pip install torch==1.8.1 - name: Install dependencies shell: powershell env: From c851a4a05fc519d035cb3c91150dce3b49bf64f3 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:16:40 +0000 Subject: [PATCH 06/41] specify the patch version for Python setup on Ubuntu Signed-off-by: Arthit Suriyawongkul --- .github/workflows/test-ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 0a0760ed3..9c5940157 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.10] + python-version: [3.10.15] # Due an issue here https://github.com/actions/setup-python/issues/401, has to specify the patch version steps: - name: Checkout From 6850498de6e6e7d2ff7e9bf33d37917729b0f3b8 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:22:00 +0000 Subject: [PATCH 07/41] Use ubuntu-22.04 Due an issue here https://github.com/actions/setup-python/issues/401, cannot use ubuntu-24.04 Signed-off-by: Arthit Suriyawongkul --- .github/workflows/test-ubuntu.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 9c5940157..903573686 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -15,10 +15,10 @@ on: jobs: build: - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 # Due an issue here https://github.com/actions/setup-python/issues/401, cannot use ubuntu-24.04 strategy: matrix: - python-version: [3.10.15] # Due an issue here https://github.com/actions/setup-python/issues/401, has to specify the patch version + python-version: [3.10] steps: - name: Checkout From 7fa39c441406a6e06bfdc0be5cb01c04bd6413b8 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:23:34 +0000 Subject: [PATCH 08/41] Use Python 3.9 on Ubutnu Due an issue here https://github.com/actions/setup-python/issues/401 Signed-off-by: Arthit Suriyawongkul --- .github/workflows/test-ubuntu.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 903573686..ee2145729 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -15,10 +15,10 @@ on: jobs: build: - runs-on: ubuntu-22.04 # Due an issue here https://github.com/actions/setup-python/issues/401, cannot use ubuntu-24.04 + runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.10] + python-version: [3.9] # Due an issue here https://github.com/actions/setup-python/issues/401, cannot use 3.10 steps: - name: Checkout From c02d387906dcafeb80276f5d25ab0005351a86ce Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:36:11 +0000 Subject: [PATCH 09/41] Try using container for Python 3.10 on Ubuntu isntead Signed-off-by: Arthit Suriyawongkul --- .github/workflows/test-ubuntu.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index ee2145729..7ff8fe489 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -16,17 +16,19 @@ jobs: build: runs-on: ubuntu-24.04 - strategy: - matrix: - python-version: [3.9] # Due an issue here https://github.com/actions/setup-python/issues/401, cannot use 3.10 + container: + image: python:3.10-slim +# strategy: +# matrix: +# python-version: [3.10] # Due an issue here https://github.com/actions/setup-python/issues/401, cannot use 3.10 steps: - name: Checkout uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} +# - name: Set up Python ${{ matrix.python-version }} +# uses: actions/setup-python@v5 +# with: +# python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip From ab9d81973c9b4a0a51a264f75d66d5b6cc50e8a5 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 15:41:49 +0000 Subject: [PATCH 10/41] 3.10 has to be inside quote as "3.10" Signed-off-by: Arthit Suriyawongkul --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 16 +++++++--------- .github/workflows/test-windows.yml | 2 +- 6 files changed, 12 insertions(+), 14 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 7f35c04ea..e31c2e229 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: "3.10" - name: Install dependencies env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index a2678e540..d9bae1103 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.10] + python-version: ["3.10"] steps: - name: Checkout diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 82d4d7c70..0b5ed6353 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.10] + python-version: ["3.10"] steps: - name: Checkout diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 39aeb1c9c..70f69a1f5 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, self-hosted] - python-version: [3.10] + python-version: ["3.10"] steps: - name: Checkout diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 7ff8fe489..08f93b4a3 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -16,19 +16,17 @@ jobs: build: runs-on: ubuntu-24.04 - container: - image: python:3.10-slim -# strategy: -# matrix: -# python-version: [3.10] # Due an issue here https://github.com/actions/setup-python/issues/401, cannot use 3.10 + strategy: + matrix: + python-version: ["3.10"] steps: - name: Checkout uses: actions/checkout@v4 -# - name: Set up Python ${{ matrix.python-version }} -# uses: actions/setup-python@v5 -# with: -# python-version: ${{ matrix.python-version }} + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 3a5ef5a48..c5756c205 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [windows-latest] - python-version: [3.10] + python-version: ["3.10"] steps: - name: Checkout From 3b5020aaff0ca48cc2e2c6fbd90539606540a912 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 16:10:26 +0000 Subject: [PATCH 11/41] Use pip==24.0 Signed-off-by: Arthit Suriyawongkul --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index e31c2e229..ece7f75c8 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,7 +22,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip==24.0 pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install deepcut diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index d9bae1103..5a5b0f664 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip==24.0 pip install setuptools wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 0b5ed6353..1acddfb24 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip==24.0 SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 70f69a1f5..522f782ee 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -51,7 +51,7 @@ jobs: conda activate pythainlpwork310 conda info conda list - python -m pip install --upgrade pip + python -m pip install --upgrade pip==24.0 SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt conda install -y -c conda-forge protobuf pip install pytest coverage coveralls diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 08f93b4a3..9b95bc092 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip==24.0 pip install pytest coverage coveralls SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt pip install .[full] diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index c5756c205..2d5828bb7 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -42,7 +42,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade pip setuptools + python -m pip install --disable-pip-version-check --user --upgrade pip==24.0 setuptools python -m pip --version python -m pip install pytest coverage coveralls conda install -y -c conda-forge fairseq diff --git a/Dockerfile b/Dockerfile index 2361983f3..b40e6f5f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade pip setuptools +RUN pip3 install --upgrade pip==24.0 setuptools RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge From 48085e72b57d4a208d5dc53cb8c410fed8fd4eba Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 16:22:43 +0000 Subject: [PATCH 12/41] Use setuptools<65 Follow https://stackoverflow.com/questions/79063140/modulenotfounderror-no-module-named-distutils-msvccompiler-when-trying-to-ins Signe-off-by: Arthit Suriyawongkul --- .github/workflows/pypi-publish.yml | 4 ++-- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 5a5b0f664..6a8f2ab6b 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,8 +21,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip==24.0 - pip install setuptools wheel twine + python -m pip install --upgrade pip==24.0 setuptools<65 + pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 2d5828bb7..7e1e68ca0 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -42,7 +42,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade pip==24.0 setuptools + python -m pip install --disable-pip-version-check --user --upgrade pip==24.0 setuptools<65 python -m pip --version python -m pip install pytest coverage coveralls conda install -y -c conda-forge fairseq diff --git a/Dockerfile b/Dockerfile index b40e6f5f8..83507f358 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade pip==24.0 setuptools +RUN pip3 install --upgrade pip==24.0 setuptools<65 RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge From 89911b0686a381bed30b20ea3a048ba437bf642b Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 16:26:03 +0000 Subject: [PATCH 13/41] Use setuptools<65 Signed-off-by: Arthit Suriyawongkul --- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 4 ++-- .github/workflows/test-ubuntu.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 1acddfb24..7e90600c4 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade pip==24.0 + python -m pip install --upgrade pip==24.0 setuptools<65 SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 522f782ee..b4fc5584e 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -51,7 +51,7 @@ jobs: conda activate pythainlpwork310 conda info conda list - python -m pip install --upgrade pip==24.0 + python -m pip install --upgrade pip==24.0 setuptools<65 SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt conda install -y -c conda-forge protobuf pip install pytest coverage coveralls @@ -70,7 +70,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip==24.0 setuptools<65 pip install pytest coverage coveralls conda install -c conda-forge icu conda install -c conda-forge pyicu diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 9b95bc092..13fda6bbf 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip==24.0 + python -m pip install --upgrade pip==24.0 setuptools<65 pip install pytest coverage coveralls SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt pip install .[full] From 1351a02f1a6764213bf50d5091a1554971e030b8 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 16:32:16 +0000 Subject: [PATCH 14/41] Put quotes around versions in pip install cmd lnie Signed-off-by: Arthit Suriyawongkul --- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 4 ++-- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 6a8f2ab6b..dcc1b7802 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip==24.0 setuptools<65 + python -m pip install --upgrade "pip==24.0" "setuptools<65" pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 7e90600c4..c2dd596f6 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade pip==24.0 setuptools<65 + python -m pip install --upgrade "pip==24.0" "setuptools<65" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index b4fc5584e..0c475b98d 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -51,7 +51,7 @@ jobs: conda activate pythainlpwork310 conda info conda list - python -m pip install --upgrade pip==24.0 setuptools<65 + python -m pip install --upgrade "pip==24.0" "setuptools<65" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt conda install -y -c conda-forge protobuf pip install pytest coverage coveralls @@ -70,7 +70,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade pip==24.0 setuptools<65 + python -m pip install --upgrade "pip==24.0" "setuptools<65" pip install pytest coverage coveralls conda install -c conda-forge icu conda install -c conda-forge pyicu diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 13fda6bbf..f289d8952 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip==24.0 setuptools<65 + python -m pip install --upgrade "pip==24.0" "setuptools<65" pip install pytest coverage coveralls SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt pip install .[full] diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 7e1e68ca0..d91399811 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -42,7 +42,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade pip==24.0 setuptools<65 + python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools<65" python -m pip --version python -m pip install pytest coverage coveralls conda install -y -c conda-forge fairseq diff --git a/Dockerfile b/Dockerfile index 83507f358..2a2593c93 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade pip==24.0 setuptools<65 +RUN pip3 install --upgrade "pip==24.0" "setuptools<65" RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge From 6f89046751ebd80dab02730f41e982a8a29b1477 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 16:37:30 +0000 Subject: [PATCH 15/41] Use setuptools<65 Signed-off-by: Arthit Suriyawongkul --- .github/workflows/deploy_docs.yml | 3 +-- .github/workflows/test-windows.yml | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index ece7f75c8..b1b5adc89 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,10 +22,9 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade pip==24.0 + python -m pip install --upgrade "pip==24.0" "setuptools<65" pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi - pip install deepcut pip install .[full] pip install boto smart_open sphinx sphinx-rtd-theme python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index d91399811..61655cbc4 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -46,6 +46,7 @@ jobs: python -m pip --version python -m pip install pytest coverage coveralls conda install -y -c conda-forge fairseq + conda install "setuptools <65" python -m pip install -r docker_requirements.txt python -m pip install .[full] python -m nltk.downloader omw-1.4 From 741479cff7ebac7d45b658941f14ac5c62b4d85f Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 23:38:02 +0000 Subject: [PATCH 16/41] Try setuptools==65.7.0 Signed-off-by: Arthit Suriyawongkul --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 4 ++-- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 5 +++-- Dockerfile | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index b1b5adc89..de4f123eb 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,7 +22,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools<65" + python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install .[full] diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index dcc1b7802..dbc8b9e5e 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools<65" + python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index c2dd596f6..72b917d88 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools<65" + python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 0c475b98d..0b98d98d4 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -51,7 +51,7 @@ jobs: conda activate pythainlpwork310 conda info conda list - python -m pip install --upgrade "pip==24.0" "setuptools<65" + python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt conda install -y -c conda-forge protobuf pip install pytest coverage coveralls @@ -70,7 +70,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade "pip==24.0" "setuptools<65" + python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" pip install pytest coverage coveralls conda install -c conda-forge icu conda install -c conda-forge pyicu diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index f289d8952..ae13bbc86 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools<65" + python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" pip install pytest coverage coveralls SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt pip install .[full] diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 61655cbc4..29585f686 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -42,11 +42,12 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools<65" + python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools==65.7.0" python -m pip --version + python -m pip show setuptools python -m pip install pytest coverage coveralls conda install -y -c conda-forge fairseq - conda install "setuptools <65" + conda install "setuptools==65.7.0" python -m pip install -r docker_requirements.txt python -m pip install .[full] python -m nltk.downloader omw-1.4 diff --git a/Dockerfile b/Dockerfile index 2a2593c93..33bad7668 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade "pip==24.0" "setuptools<65" +RUN pip3 install --upgrade "pip==24.0" "setuptools==65.7.0" RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge From 47ce495c381c5cd25f77d89ac80ed028cae876f0 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 23:49:56 +0000 Subject: [PATCH 17/41] Remove macos-selfhosted Signed-off-by: Arthit Suriyawongkul --- .github/workflows/test-macos.yml | 48 ++++--------------------------- .github/workflows/test-ubuntu.yml | 8 ++++-- 2 files changed, 11 insertions(+), 45 deletions(-) diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 0b98d98d4..d47e3fa10 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-latest, self-hosted] + os: [macos-latest] python-version: ["3.10"] steps: @@ -33,52 +33,17 @@ jobs: python-version: ${{ matrix.python-version }} auto-activate-base: false auto-update-conda: true - if: matrix.os == 'macos-latest' -# - name: Install mac m1 -# run: | -# mkdir -p ~/miniconda3 -# wget https://repo.anaconda.com/miniconda/Miniconda3-py38_4.12.0-MacOSX-arm64.sh -# chmod +x Miniconda3-py38_4.12.0-MacOSX-arm64.sh -# bash Miniconda3-py38_4.12.0-MacOSX-arm64.sh -b -u -p ~/miniconda3 -# ~/miniconda3/bin/conda init bash -# ~/miniconda3/bin/conda init zsh -# if: matrix.os == 'self-hosted' - - name: Test PyThaiNLP - M1 - shell: bash -l {0} - run: | - source ~/miniconda3/etc/profile.d/conda.sh - conda create -y -n pythainlpwork310 python=3.10 - conda activate pythainlpwork310 - conda info - conda list - python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" - SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt - conda install -y -c conda-forge protobuf - pip install pytest coverage coveralls - conda install -y -c conda-forge icu - conda install -y -c conda-forge pyicu - pip install .[full] - python -m nltk.downloader omw-1.4 - python -m pip cache purge - python -m unittest discover - if: matrix.os == 'self-hosted' - - shell: bash -l {0} - run: | - conda info - conda list - if: matrix.os == 'self-hosted' - name: Install dependencies shell: bash -l {0} run: | python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" - pip install pytest coverage coveralls - conda install -c conda-forge icu - conda install -c conda-forge pyicu - SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt - pip install .[full] + python -m pip --version + python -m pip show setuptools + python -m pip install pytest coverage coveralls + SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True python -m pip install -r docker_requirements.txt + python -m pip install .[full] python -m nltk.downloader omw-1.4 python -m pip cache purge - if: matrix.os != 'self-hosted' - name: Test shell: bash -l {0} env: @@ -87,4 +52,3 @@ jobs: run: | coverage run -m unittest discover coveralls - if: matrix.os != 'self-hosted' diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index ae13bbc86..070333520 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -30,9 +30,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" - pip install pytest coverage coveralls - SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt - pip install .[full] + python -m pip --version + python -m pip show setuptools + python -m pip install pytest coverage coveralls + SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True python -m pip install -r docker_requirements.txt + python -m pip install .[full] python -m nltk.downloader omw-1.4 python -m pip cache purge - name: Test From e4cfeb72adb1295feb6118c8b1552d149ea24fa4 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 23:52:55 +0000 Subject: [PATCH 18/41] pyicu==2.13 --- docker_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_requirements.txt b/docker_requirements.txt index 50b7116cb..9533d13f3 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -19,7 +19,7 @@ pandas==1.5.3 panphon==0.21.2 phunspell==0.1.6 protobuf==3.20.3 -pyicu==2.14 +pyicu==2.13 python-crfsuite==0.9.11 requests==2.32.* sacremoses==0.1.1 From baec6464be257b222b444d9a836d264b9261722e Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 27 Oct 2024 23:56:23 +0000 Subject: [PATCH 19/41] use setup-python instead of setup-mimiconda --- .github/workflows/test-macos.yml | 4 +--- .github/workflows/test-windows.yml | 10 +--------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index d47e3fa10..3f453f855 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -28,11 +28,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: conda-incubator/setup-miniconda@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - auto-activate-base: false - auto-update-conda: true - name: Install dependencies shell: bash -l {0} run: | diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 29585f686..6b4ece11b 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -28,15 +28,9 @@ jobs: - name: Checkout uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: conda-incubator/setup-miniconda@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - auto-activate-base: true - auto-update-conda: true - - shell: powershell - run: | - conda info - conda list - name: Install dependencies shell: powershell env: @@ -46,8 +40,6 @@ jobs: python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls - conda install -y -c conda-forge fairseq - conda install "setuptools==65.7.0" python -m pip install -r docker_requirements.txt python -m pip install .[full] python -m nltk.downloader omw-1.4 From dcc60e6fbb44ee1422c206121d0e659fb22cae1a Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:05:51 +0000 Subject: [PATCH 20/41] setuptools<60.0 --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index de4f123eb..99901167b 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,7 +22,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" + python -m pip install --upgrade "pip==24.0" "setuptools<60.0" pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install .[full] diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index dbc8b9e5e..31ec498a6 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" + python -m pip install --upgrade "pip==24.0" "setuptools<60.0" pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 72b917d88..e6c2c2518 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" + python -m pip install --upgrade "pip==24.0" "setuptools<60.0" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 3f453f855..6383b6999 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" + python -m pip install --upgrade "pip==24.0" "setuptools<60.0" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 070333520..8e0a09c86 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.7.0" + python -m pip install --upgrade "pip==24.0" "setuptools<60.0" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 6b4ece11b..532d94be6 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -36,7 +36,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools==65.7.0" + python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools<60.0" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/Dockerfile b/Dockerfile index 33bad7668..d0e4af748 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade "pip==24.0" "setuptools==65.7.0" +RUN pip3 install --upgrade "pip==24.0" "setuptools<60.0" RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge From 2370b16bcb62708498f4e6d6cdb32c531c4f61b7 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:11:06 +0000 Subject: [PATCH 21/41] numpy==1.22.* --- docker_requirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker_requirements.txt b/docker_requirements.txt index 9533d13f3..174a660b3 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -13,7 +13,7 @@ h5py==3.12.1 khanaa==0.1.1 nlpo3==1.3.0 nltk==3.9.1 -numpy==1.26.4 +numpy==1.22.* # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 OSKut==1.3 pandas==1.5.3 panphon==0.21.2 diff --git a/requirements.txt b/requirements.txt index ca1dab699..7d765bc3b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ PyYAML==6.0.* -numpy==1.26.* +numpy==1.22.* # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 python-crfsuite==0.9.* requests==2.32.* From 26eaa61e70b899198876e4876d46fc314602b733 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:21:58 +0000 Subject: [PATCH 22/41] Update docker_requirements.txt --- docker_requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker_requirements.txt b/docker_requirements.txt index 174a660b3..57256e031 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -13,7 +13,7 @@ h5py==3.12.1 khanaa==0.1.1 nlpo3==1.3.0 nltk==3.9.1 -numpy==1.22.* # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 +numpy==1.26.4 # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 OSKut==1.3 pandas==1.5.3 panphon==0.21.2 @@ -21,7 +21,7 @@ phunspell==0.1.6 protobuf==3.20.3 pyicu==2.13 python-crfsuite==0.9.11 -requests==2.32.* +requests==2.32.3 sacremoses==0.1.1 sefr_cut==1.1 sentence-transformers==2.7.0 From 4e01acdba0037cd7e24428365618976ad48379c6 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:26:18 +0000 Subject: [PATCH 23/41] remove deepcut --- docker_requirements.txt | 6 +++--- docs/api/tokenize.rst | 7 ------- pythainlp/tokenize/core.py | 19 ++++--------------- pythainlp/tokenize/deepcut.py | 35 ----------------------------------- tests/test_tokenize.py | 16 +--------------- 5 files changed, 8 insertions(+), 75 deletions(-) delete mode 100644 pythainlp/tokenize/deepcut.py diff --git a/docker_requirements.txt b/docker_requirements.txt index 57256e031..f4d2f99e8 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -1,7 +1,7 @@ PyYAML==6.0.2 attacut==1.0.6 bpemb==0.3.6 -deepcut==0.7.0.0 +#deepcut==0.7.0.0 emoji==0.5.4 epitran==1.25.1 esupar==1.7.5 @@ -13,7 +13,7 @@ h5py==3.12.1 khanaa==0.1.1 nlpo3==1.3.0 nltk==3.9.1 -numpy==1.26.4 # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 +numpy==1.22.4 # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 OSKut==1.3 pandas==1.5.3 panphon==0.21.2 @@ -31,7 +31,7 @@ spacy==3.8.2 spylls==0.1.7 ssg==0.0.8 symspellpy==6.7.8 -tensorflow==2.18.0 +#tensorflow==2.18.0 thai-nner==0.3 tltk==1.9.1 torch==1.13.1 diff --git a/docs/api/tokenize.rst b/docs/api/tokenize.rst index 1f42ab128..57c1263e4 100644 --- a/docs/api/tokenize.rst +++ b/docs/api/tokenize.rst @@ -80,13 +80,6 @@ Word level A tokenizer designed for word-level segmentation. It provides accurate word boundary detection in Thai text. -**deepcut** - -.. automodule:: pythainlp.tokenize.deepcut - :members: - - Utilizes deep learning techniques for word segmentation, achieving high accuracy and performance. - **multi_cut** .. automodule:: pythainlp.tokenize.multi_cut diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py index 993bcff64..636d709bb 100644 --- a/pythainlp/tokenize/core.py +++ b/pythainlp/tokenize/core.py @@ -141,9 +141,6 @@ def word_tokenize( * *attacut* - wrapper for `AttaCut `_., learning-based approach - * *deepcut* - wrapper for - `DeepCut `_, - learning-based approach * *icu* - wrapper for a word tokenizer in `PyICU `_., from ICU (International Components for Unicode), @@ -173,7 +170,7 @@ def word_tokenize( maximum collocation approach :Note: - The **custom_dict** parameter only works for \ - *deepcut*, *longest*, *newmm*, and *newmm-safe* engines. + *longest*, *newmm*, and *newmm-safe* engines. :Example: Tokenize text with different tokenizers:: @@ -260,14 +257,6 @@ def word_tokenize( from pythainlp.tokenize.multi_cut import segment segments = segment(text, custom_dict) - elif engine == "deepcut": # deepcut can optionally use dictionary - from pythainlp.tokenize.deepcut import segment - - if custom_dict: - custom_dict = list(custom_dict) - segments = segment(text, custom_dict) - else: - segments = segment(text) elif engine == "icu": from pythainlp.tokenize.pyicu import segment @@ -747,7 +736,7 @@ def __init__( used to create a trie, or an instantiated :class:`pythainlp.util.Trie` object. :param str engine: choose between different options of tokenizer engines - (i.e. *newmm*, *mm*, *longest*, *deepcut*) + (i.e. *newmm*, *mm*, *longest*) :param bool keep_whitespace: True to keep whitespace, a common mark for end of phrase in Thai """ @@ -757,7 +746,7 @@ def __init__( else: self.__trie_dict = DEFAULT_WORD_DICT_TRIE self.__engine = engine - if self.__engine not in ["newmm", "mm", "longest", "deepcut"]: + if self.__engine not in ["newmm", "mm", "longest"]: raise NotImplementedError( """ The Tokenizer class is not support %s for custom tokenizer @@ -788,6 +777,6 @@ def set_tokenize_engine(self, engine: str) -> None: Set the tokenizer's engine. :param str engine: choose between different options of tokenizer engines - (i.e. *newmm*, *mm*, *longest*, *deepcut*) + (i.e. *newmm*, *mm*, *longest*) """ self.__engine = engine diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py deleted file mode 100644 index 38178f344..000000000 --- a/pythainlp/tokenize/deepcut.py +++ /dev/null @@ -1,35 +0,0 @@ -# -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project -# SPDX-License-Identifier: Apache-2.0 -""" -Wrapper for deepcut Thai word segmentation. deepcut is a -Thai word segmentation library using 1D Convolution Neural Network. - -User need to install deepcut (and its dependency: tensorflow) by themselves. - -:See Also: - * `GitHub repository `_ -""" - -from typing import List, Union - -try: - from deepcut import tokenize -except ImportError: - raise ImportError("Please install deepcut by pip install deepcut") -from pythainlp.util import Trie - - -def segment( - text: str, custom_dict: Union[Trie, List[str], str] = [] -) -> List[str]: - if not text or not isinstance(text, str): - return [] - - if custom_dict: - if isinstance(custom_dict, Trie): - custom_dict = list(custom_dict) - - return tokenize(text, custom_dict) - - return tokenize(text) diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index ad5a1f5e9..ce3a52cde 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -8,7 +8,6 @@ DEFAULT_WORD_DICT_TRIE, Tokenizer, attacut, - deepcut, etcc, longest, multi_cut, @@ -455,7 +454,6 @@ def test_word_tokenize(self): ) self.assertIsNotNone(word_tokenize(self.text_1, engine="nlpo3")) self.assertIsNotNone(word_tokenize(self.text_1, engine="attacut")) - self.assertIsNotNone(word_tokenize(self.text_1, engine="deepcut")) self.assertIsNotNone(word_tokenize(self.text_1, engine="icu")) self.assertIsNotNone(word_tokenize(self.text_1, engine="longest")) self.assertIsNotNone(word_tokenize(self.text_1, engine="mm")) @@ -487,18 +485,6 @@ def test_attacut(self): attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-c") ) - def test_deepcut(self): - self.assertEqual(deepcut.segment(None), []) - self.assertEqual(deepcut.segment(""), []) - self.assertIsNotNone(deepcut.segment("ทดสอบ", DEFAULT_WORD_DICT_TRIE)) - self.assertIsNotNone(deepcut.segment("ทดสอบ", ["ทด", "สอบ"])) - self.assertIsNotNone(word_tokenize("ทดสอบ", engine="deepcut")) - self.assertIsNotNone( - word_tokenize( - "ทดสอบ", engine="deepcut", custom_dict=DEFAULT_WORD_DICT_TRIE - ) - ) - def test_etcc(self): self.assertEqual(etcc.segment(None), []) self.assertEqual(etcc.segment(""), []) @@ -842,7 +828,7 @@ def test_word_detokenize(self): ) def test_numeric_data_format(self): - engines = ["attacut", "deepcut", "newmm", "sefr_cut"] + engines = ["attacut", "newmm", "sefr_cut"] for engine in engines: self.assertIn( From aa9d91e9b6c15bde63d731a1696fcae5f2054389 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:35:46 +0000 Subject: [PATCH 24/41] Update test-windows.yml --- .github/workflows/test-windows.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 532d94be6..40ce5eeb2 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -40,6 +40,8 @@ jobs: python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls + wget https://github.com/cgohlke/pyicu-build/releases/download/v2.13/PyICU-2.13-cp310-cp310-win_amd64.whl + python -m pip install PyICU-2.13-cp310-cp310-win_amd64.whl python -m pip install -r docker_requirements.txt python -m pip install .[full] python -m nltk.downloader omw-1.4 From c1b1212e5fc2f60829f2b01c92206262cee4e668 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:40:51 +0000 Subject: [PATCH 25/41] Update test-windows.yml --- .github/workflows/test-windows.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 40ce5eeb2..e3f96dbbd 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -40,8 +40,7 @@ jobs: python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls - wget https://github.com/cgohlke/pyicu-build/releases/download/v2.13/PyICU-2.13-cp310-cp310-win_amd64.whl - python -m pip install PyICU-2.13-cp310-cp310-win_amd64.whl + python -m pip install "https://github.com/cgohlke/pyicu-build/releases/download/v2.13/PyICU-2.13-cp310-cp310-win_amd64.whl" python -m pip install -r docker_requirements.txt python -m pip install .[full] python -m nltk.downloader omw-1.4 From ed5b45eff8931b11790149d1de5aa1709f047812 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:45:32 +0000 Subject: [PATCH 26/41] Remove OSKut --- docker_requirements.txt | 2 +- docs/api/tokenize.rst | 7 ------- docs/notes/installation.rst | 1 - pythainlp/tokenize/core.py | 7 ------- pythainlp/tokenize/oskut.py | 27 --------------------------- setup.py | 2 -- tests/test_tokenize.py | 12 ------------ 7 files changed, 1 insertion(+), 57 deletions(-) delete mode 100644 pythainlp/tokenize/oskut.py diff --git a/docker_requirements.txt b/docker_requirements.txt index f4d2f99e8..eded3bb4c 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -14,7 +14,7 @@ khanaa==0.1.1 nlpo3==1.3.0 nltk==3.9.1 numpy==1.22.4 # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 -OSKut==1.3 +#OSKut==1.3 pandas==1.5.3 panphon==0.21.2 phunspell==0.1.6 diff --git a/docs/api/tokenize.rst b/docs/api/tokenize.rst index 57c1263e4..663d0aa74 100644 --- a/docs/api/tokenize.rst +++ b/docs/api/tokenize.rst @@ -122,13 +122,6 @@ Word level An advanced word tokenizer for segmenting Thai text, with a focus on precision. -**oskut** - -.. automodule:: pythainlp.tokenize.oskut - :members: - - A tokenizer that uses a pre-trained model for word segmentation. It's a reliable choice for general-purpose text analysis. - **newmm (Default)** .. automodule:: pythainlp.tokenize.newmm diff --git a/docs/notes/installation.rst b/docs/notes/installation.rst index a22164a12..ddcbb90de 100644 --- a/docs/notes/installation.rst +++ b/docs/notes/installation.rst @@ -29,7 +29,6 @@ where ``extras`` can be - ``spell`` (to support phunspell & symspellpy) - ``generate`` (to support text generate with umlfit or thai2fit) - ``textaugment`` (to support text augmentation) - - ``oskut`` (to support OSKUT) - ``nlpo3`` (to support nlpo3 engine) - ``spacy_thai`` (to support spacy_thai engine) - ``esupar`` (to support esupar engine) diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py index 636d709bb..7a5079dc8 100644 --- a/pythainlp/tokenize/core.py +++ b/pythainlp/tokenize/core.py @@ -159,9 +159,6 @@ def word_tokenize( * *nlpo3* - wrapper for a word tokenizer in `nlpO3 `_., adaptation of newmm in Rust (2.5x faster) - * *oskut* - wrapper for - `OSKut `_., - Out-of-domain StacKed cut for Word Segmentation * *sefr_cut* - wrapper for `SEFR CUT `_., Stacked Ensemble Filter and Refine for Word Segmentation @@ -272,10 +269,6 @@ def word_tokenize( elif engine == "tltk": from pythainlp.tokenize.tltk import segment - segments = segment(text) - elif engine == "oskut": - from pythainlp.tokenize.oskut import segment - segments = segment(text) elif engine == "nlpo3": from pythainlp.tokenize.nlpo3 import segment diff --git a/pythainlp/tokenize/oskut.py b/pythainlp/tokenize/oskut.py deleted file mode 100644 index ffe9bc61f..000000000 --- a/pythainlp/tokenize/oskut.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project -# SPDX-License-Identifier: Apache-2.0 -""" -Wrapper OSKut (Out-of-domain StacKed cut for Word Segmentation). -Handling Cross- and Out-of-Domain Samples in Thai Word Segmentation -Stacked Ensemble Framework and DeepCut as Baseline model (ACL 2021 Findings) - -:See Also: - * `GitHub repository `_ -""" -from typing import List - -import oskut - -DEFAULT_ENGINE = "ws" -oskut.load_model(engine=DEFAULT_ENGINE) - - -def segment(text: str, engine: str = "ws") -> List[str]: - global DEFAULT_ENGINE - if not text or not isinstance(text, str): - return [] - if engine != DEFAULT_ENGINE: - DEFAULT_ENGINE = engine - oskut.load_model(engine=DEFAULT_ENGINE) - return oskut.OSKut(text) diff --git a/setup.py b/setup.py index 7a9c1f5c2..6d7b405cb 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,6 @@ "spylls>=0.1.5", "symspellpy>=6.7.6" ], - "oskut": ["oskut>=1.3"], "nlpo3": ["nlpo3>=1.2.2"], "onnx": [ "sentencepiece>=0.1.91", @@ -141,7 +140,6 @@ "phunspell>=0.1.6", "spylls>=0.1.5", "symspellpy>=6.7.6", - "oskut>=1.3", "nlpo3>=1.2.2", "onnxruntime>=1.10.0", "thai_nner", diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index ce3a52cde..96665f94e 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -13,7 +13,6 @@ multi_cut, nercut, newmm, - oskut, paragraph_tokenize, pyicu, sefr_cut, @@ -461,7 +460,6 @@ def test_word_tokenize(self): self.assertIsNotNone(word_tokenize(self.text_1, engine="newmm")) self.assertIsNotNone(word_tokenize(self.text_1, engine="sefr_cut")) self.assertIsNotNone(word_tokenize(self.text_1, engine="tltk")) - self.assertIsNotNone(word_tokenize(self.text_1, engine="oskut")) with self.assertRaises(ValueError): word_tokenize("หมอนทอง", engine="XX") # engine does not exist @@ -793,16 +791,6 @@ def test_sefr_cut(self): sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tnhc"), ) - def test_oskut(self): - self.assertEqual(oskut.segment(None), []) - self.assertEqual(oskut.segment(""), []) - self.assertIsNotNone( - oskut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"), - ) - self.assertIsNotNone( - oskut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="scads"), - ) - def test_word_detokenize(self): self.assertEqual( word_detokenize(["ผม", "เลี้ยง", "5", "ตัว"]), "ผมเลี้ยง 5 ตัว" From f6522acf4a50e98d6a70ef0b5c1ed93f32f3feb0 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 00:49:44 +0000 Subject: [PATCH 27/41] Remove serf_cut --- docker_requirements.txt | 3 --- docs/api/tokenize.rst | 7 ------- pythainlp/tokenize/core.py | 7 ------- pythainlp/tokenize/sefr_cut.py | 26 -------------------------- setup.py | 2 -- tests/test_tokenize.py | 14 +------------- 6 files changed, 1 insertion(+), 58 deletions(-) delete mode 100644 pythainlp/tokenize/sefr_cut.py diff --git a/docker_requirements.txt b/docker_requirements.txt index eded3bb4c..0e8317fea 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -1,7 +1,6 @@ PyYAML==6.0.2 attacut==1.0.6 bpemb==0.3.6 -#deepcut==0.7.0.0 emoji==0.5.4 epitran==1.25.1 esupar==1.7.5 @@ -14,7 +13,6 @@ khanaa==0.1.1 nlpo3==1.3.0 nltk==3.9.1 numpy==1.22.4 # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 -#OSKut==1.3 pandas==1.5.3 panphon==0.21.2 phunspell==0.1.6 @@ -23,7 +21,6 @@ pyicu==2.13 python-crfsuite==0.9.11 requests==2.32.3 sacremoses==0.1.1 -sefr_cut==1.1 sentence-transformers==2.7.0 sentencepiece==0.2.0 spacy_thai==0.7.7 diff --git a/docs/api/tokenize.rst b/docs/api/tokenize.rst index 663d0aa74..10fc8b2be 100644 --- a/docs/api/tokenize.rst +++ b/docs/api/tokenize.rst @@ -115,13 +115,6 @@ Word level A tokenizer optimized for Named Entity Recognition (NER) tasks, ensuring accurate tokenization for entity recognition. -**sefr_cut** - -.. automodule:: pythainlp.tokenize.sefr_cut - :members: - - An advanced word tokenizer for segmenting Thai text, with a focus on precision. - **newmm (Default)** .. automodule:: pythainlp.tokenize.newmm diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py index 7a5079dc8..04b35e061 100644 --- a/pythainlp/tokenize/core.py +++ b/pythainlp/tokenize/core.py @@ -159,9 +159,6 @@ def word_tokenize( * *nlpo3* - wrapper for a word tokenizer in `nlpO3 `_., adaptation of newmm in Rust (2.5x faster) - * *sefr_cut* - wrapper for - `SEFR CUT `_., - Stacked Ensemble Filter and Refine for Word Segmentation * *tltk* - wrapper for `TLTK `_., maximum collocation approach @@ -261,10 +258,6 @@ def word_tokenize( elif engine == "nercut": from pythainlp.tokenize.nercut import segment - segments = segment(text) - elif engine == "sefr_cut": - from pythainlp.tokenize.sefr_cut import segment - segments = segment(text) elif engine == "tltk": from pythainlp.tokenize.tltk import segment diff --git a/pythainlp/tokenize/sefr_cut.py b/pythainlp/tokenize/sefr_cut.py deleted file mode 100644 index 34579e2eb..000000000 --- a/pythainlp/tokenize/sefr_cut.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project -# SPDX-License-Identifier: Apache-2.0 -""" -Wrapper for SEFR CUT Thai word segmentation. SEFR CUT is a -Thai Word Segmentation Models using Stacked Ensemble. - -:See Also: - * `GitHub repository `_ -""" -from typing import List - -import sefr_cut - -DEFAULT_ENGINE = "ws1000" -sefr_cut.load_model(engine=DEFAULT_ENGINE) - - -def segment(text: str, engine: str = "ws1000") -> List[str]: - global DEFAULT_ENGINE - if not text or not isinstance(text, str): - return [] - if engine != DEFAULT_ENGINE: - DEFAULT_ENGINE = engine - sefr_cut.load_model(engine=DEFAULT_ENGINE) - return sefr_cut.tokenize(text)[0] diff --git a/setup.py b/setup.py index 6d7b405cb..0ed4d26d4 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,6 @@ "wtp": ["transformers>=4.6.0", "wtpsplit>=1.0.1"], "wordnet": ["nltk>=3.3"], "generate": ["fastai<2.0"], - "sefr_cut": ["sefr_cut>=1.1"], "spell": [ "phunspell>=0.1.6", "spylls>=0.1.5", @@ -136,7 +135,6 @@ "fastai<2.0", "bpemb>=0.3.2", "transformers>=4.22.1", - "sefr_cut>=1.1", "phunspell>=0.1.6", "spylls>=0.1.5", "symspellpy>=6.7.6", diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 96665f94e..f7600e0e7 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -15,7 +15,6 @@ newmm, paragraph_tokenize, pyicu, - sefr_cut, sent_tokenize, ssg, subword_tokenize, @@ -458,7 +457,6 @@ def test_word_tokenize(self): self.assertIsNotNone(word_tokenize(self.text_1, engine="mm")) self.assertIsNotNone(word_tokenize(self.text_1, engine="nercut")) self.assertIsNotNone(word_tokenize(self.text_1, engine="newmm")) - self.assertIsNotNone(word_tokenize(self.text_1, engine="sefr_cut")) self.assertIsNotNone(word_tokenize(self.text_1, engine="tltk")) with self.assertRaises(ValueError): @@ -781,16 +779,6 @@ def test_tcc_p(self): self.assertEqual(list(tcc_p.tcc("")), []) self.assertEqual(tcc_p.tcc_pos(""), set()) - def test_sefr_cut(self): - self.assertEqual(sefr_cut.segment(None), []) - self.assertEqual(sefr_cut.segment(""), []) - self.assertIsNotNone( - sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"), - ) - self.assertIsNotNone( - sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tnhc"), - ) - def test_word_detokenize(self): self.assertEqual( word_detokenize(["ผม", "เลี้ยง", "5", "ตัว"]), "ผมเลี้ยง 5 ตัว" @@ -816,7 +804,7 @@ def test_word_detokenize(self): ) def test_numeric_data_format(self): - engines = ["attacut", "newmm", "sefr_cut"] + engines = ["attacut", "newmm"] for engine in engines: self.assertIn( From dae8a6442b9aa4d17983f44518bdad99158ecdc1 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 06:02:52 +0000 Subject: [PATCH 28/41] setuptools==73.0.1 --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 99901167b..332ef8a2c 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,7 +22,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60.0" + python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install .[full] diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 31ec498a6..32e97db20 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60.0" + python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index e6c2c2518..da63959a6 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60.0" + python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 6383b6999..87fe96310 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60.0" + python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 8e0a09c86..54c3629b3 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60.0" + python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index e3f96dbbd..5d1365b51 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -36,7 +36,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools<60.0" + python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools==73.0.1" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/Dockerfile b/Dockerfile index d0e4af748..837dcbfef 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade "pip==24.0" "setuptools<60.0" +RUN pip3 install --upgrade "pip==24.0" "setuptools==73.0.1" RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge From 6bacd77a29277b268b2d2d4d829f399d79f5eb3a Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 06:13:10 +0000 Subject: [PATCH 29/41] setuptools<60 in pyproject.toml --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- pyproject.toml | 4 ++++ 8 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 332ef8a2c..958f46539 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,7 +22,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" + python -m pip install --upgrade "pip==24.0" "setuptools<60" pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install .[full] diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 32e97db20..2763c3af0 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" + python -m pip install --upgrade "pip==24.0" "setuptools<60" pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index da63959a6..25bfc6036 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" + python -m pip install --upgrade "pip==24.0" "setuptools<60" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 87fe96310..024e6ecb6 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" + python -m pip install --upgrade "pip==24.0" "setuptools<60" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 54c3629b3..ef0556a29 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools==73.0.1" + python -m pip install --upgrade "pip==24.0" "setuptools<60" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 5d1365b51..00231619c 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -36,7 +36,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools==73.0.1" + python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools<60" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/Dockerfile b/Dockerfile index 837dcbfef..117a14f47 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade "pip==24.0" "setuptools==73.0.1" +RUN pip3 install --upgrade "pip==24.0" "setuptools<60" RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge diff --git a/pyproject.toml b/pyproject.toml index faa102a79..ef37cc5a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,3 +17,7 @@ docstring-code-format = true # Flag errors (`C901`) whenever the complexity level exceeds 5. Default is 10. # We should aim to gradually reduce this to 10. max-complexity = 40 + +[build-system] +requires = ["setuptools<60"] +build-backend = "setuptools.build_meta" From 4e72d6b64196b16892d39697f980201a26b03101 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 06:17:18 +0000 Subject: [PATCH 30/41] sentence-transformers==3.2.1 --- docker_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_requirements.txt b/docker_requirements.txt index 0e8317fea..43947a17e 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -21,7 +21,7 @@ pyicu==2.13 python-crfsuite==0.9.11 requests==2.32.3 sacremoses==0.1.1 -sentence-transformers==2.7.0 +sentence-transformers==3.2.1 sentencepiece==0.2.0 spacy_thai==0.7.7 spacy==3.8.2 From 47d67720fa8ebab21f5ff486a4b0aee949103e87 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 06:25:21 +0000 Subject: [PATCH 31/41] numpy==2.1.2 --- docker_requirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker_requirements.txt b/docker_requirements.txt index 43947a17e..099e6a577 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -12,7 +12,7 @@ h5py==3.12.1 khanaa==0.1.1 nlpo3==1.3.0 nltk==3.9.1 -numpy==1.22.4 # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 +numpy==2.1.2 pandas==1.5.3 panphon==0.21.2 phunspell==0.1.6 diff --git a/requirements.txt b/requirements.txt index 7d765bc3b..c9620fc0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ PyYAML==6.0.* -numpy==1.22.* # See https://github.com/numpy/numpy/issues/22135#issuecomment-1220383873 +numpy==2.1.* python-crfsuite==0.9.* requests==2.32.* From d871fdb4ad4b6640a1f2bd0fbd1dcaa3bb89a341 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 06:54:25 +0000 Subject: [PATCH 32/41] gensim==4.3.2 --- docker_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_requirements.txt b/docker_requirements.txt index 099e6a577..8438c662c 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -7,7 +7,7 @@ esupar==1.7.5 fairseq==0.12.2 fastai==1.0.61 fastcoref==2.1.6 -gensim==4.3.3 +gensim==4.3.2 # https://github.com/piskvorky/gensim/issues/3560 h5py==3.12.1 khanaa==0.1.1 nlpo3==1.3.0 From 361f0ff2d84520c4d815a1a09215395013da2508 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 06:59:47 +0000 Subject: [PATCH 33/41] setuptools==65.5.1 --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- pyproject.toml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 958f46539..112ed69d7 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,7 +22,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60" + python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install .[full] diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 2763c3af0..519c01c48 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60" + python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index 25bfc6036..b0241b608 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60" + python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 024e6ecb6..41d061f50 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60" + python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index ef0556a29..59b014908 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools<60" + python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 00231619c..1006c1e05 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -36,7 +36,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools<60" + python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools==65.5.1" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/Dockerfile b/Dockerfile index 117a14f47..1bc5f30c0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade "pip==24.0" "setuptools<60" +RUN pip3 install --upgrade "pip==24.0" "setuptools==65.5.1" RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge diff --git a/pyproject.toml b/pyproject.toml index ef37cc5a6..1d2430352 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,5 +19,5 @@ docstring-code-format = true max-complexity = 40 [build-system] -requires = ["setuptools<60"] +requires = ["setuptools==65.5.1"] build-backend = "setuptools.build_meta" From 13820147bddacadf351c6f767b06c087335b71fe Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 07:04:47 +0000 Subject: [PATCH 34/41] Update test-ubuntu.yml --- .github/workflows/test-ubuntu.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 59b014908..4622d684d 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -30,6 +30,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" + python -m pip install "numpy==2.1.2" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls From e2815e2d0b2036fc4342cd5a02b5f3d432a19d90 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 07:15:00 +0000 Subject: [PATCH 35/41] scikit-learn no longer need upper limit on setuptools --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 4 ++-- Dockerfile | 2 +- pyproject.toml | 4 ++-- setup.py | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 112ed69d7..0baee703e 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -22,7 +22,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" + python -m pip install --upgrade "pip==24.0" "setuptools" pip install pytest coverage coveralls if [ -f docker_requirements.txt ]; then pip install -r docker_requirements.txt; fi pip install .[full] diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 519c01c48..d3ec7d878 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -21,7 +21,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" + python -m pip install --upgrade "pip==24.0" "setuptools" pip install wheel twine python setup.py sdist bdist_wheel - name: Publish a Python distribution to PyPI diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index b0241b608..a10773537 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -23,7 +23,7 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" + python -m pip install --upgrade "pip==24.0" "setuptools" SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r https://raw.githubusercontent.com/PyThaiNLP/pythainlp/dev/docker_requirements.txt pip install pythainlp[full] python -m nltk.downloader omw-1.4 diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 41d061f50..8dff818f2 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies shell: bash -l {0} run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" + python -m pip install --upgrade "pip==24.0" "setuptools" python -m pip --version python -m pip show setuptools python -m pip install pytest coverage coveralls diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 4622d684d..01042853a 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -29,7 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade "pip==24.0" "setuptools==65.5.1" + python -m pip install --upgrade "pip==24.0" "setuptools" python -m pip install "numpy==2.1.2" python -m pip --version python -m pip show setuptools diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 1006c1e05..420cd191a 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -36,9 +36,9 @@ jobs: env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True run: | - python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools==65.5.1" + python -m pip install --disable-pip-version-check --user --upgrade "pip==24.0" "setuptools" python -m pip --version - python -m pip show setuptools + python -m pip show setuptools==65.5.1 python -m pip install pytest coverage coveralls python -m pip install "https://github.com/cgohlke/pyicu-build/releases/download/v2.13/PyICU-2.13-cp310-cp310-win_amd64.whl" python -m pip install -r docker_requirements.txt diff --git a/Dockerfile b/Dockerfile index 1bc5f30c0..06575130c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,6 @@ COPY . . RUN apt-get update && apt-get install -y --no-install-recommends build-essential libicu-dev libicu63 pkg-config && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade "pip==24.0" "setuptools==65.5.1" +RUN pip3 install --upgrade "pip==24.0" "setuptools" RUN if [ -f docker_requirements.txt ]; then pip3 install -r docker_requirements.txt; fi RUN pip3 install -e .[full] && pip3 cache purge diff --git a/pyproject.toml b/pyproject.toml index 1d2430352..d6f4c19ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [tool.ruff] line-length = 79 indent-width = 4 -target-version = "py38" +target-version = "py310" [tool.ruff.format] quote-style = "double" @@ -19,5 +19,5 @@ docstring-code-format = true max-complexity = 40 [build-system] -requires = ["setuptools==65.5.1"] +requires = ["setuptools"] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 0ed4d26d4..01d677abd 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ https://github.com/PyThaiNLP/pythainlp """ -from setuptools import find_packages, setup +from setuptools==65.5.1 import find_packages, setup readme = """ ![PyThaiNLP Logo](https://avatars0.githubusercontent.com/u/32934255?s=200&v=4) From 5367b78358432d66fe805cf368660d32c7a00ca4 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 07:17:32 +0000 Subject: [PATCH 36/41] Update pyproject.toml --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d6f4c19ed..f30ea6242 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,3 @@ docstring-code-format = true # Flag errors (`C901`) whenever the complexity level exceeds 5. Default is 10. # We should aim to gradually reduce this to 10. max-complexity = 40 - -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" From d11eb0ea303b9ba2b29d00feacd2fafcb3755db7 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 09:12:03 +0000 Subject: [PATCH 37/41] Try Python 3.9 --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- Dockerfile | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 0baee703e..903f8c29a 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.9" - name: Install dependencies env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index d3ec7d878..fb7e812c6 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.10"] + python-version: ["3.9"] steps: - name: Checkout diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index a10773537..b1e425857 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.10"] + python-version: ["3.9"] steps: - name: Checkout diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 8dff818f2..c0c6f6e4f 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [macos-latest] - python-version: ["3.10"] + python-version: ["3.9"] steps: - name: Checkout diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index 01042853a..e8fd70893 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.10"] + python-version: ["3.9"] steps: - name: Checkout diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 420cd191a..887dcda20 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [windows-latest] - python-version: ["3.10"] + python-version: ["3.9"] steps: - name: Checkout diff --git a/Dockerfile b/Dockerfile index 06575130c..9e26fd6f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 -FROM python:3.10-slim +FROM python:3.9-slim COPY . . From 3317655a89c1355782fc8880f32b5325c4cad77e Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 09:15:16 +0000 Subject: [PATCH 38/41] Back to Python 3.10 --- .github/workflows/deploy_docs.yml | 2 +- .github/workflows/pypi-publish.yml | 2 +- .github/workflows/pypi-test.yml | 2 +- .github/workflows/test-macos.yml | 2 +- .github/workflows/test-ubuntu.yml | 2 +- .github/workflows/test-windows.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 903f8c29a..0baee703e 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.9" + python-version: "3.10" - name: Install dependencies env: SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index fb7e812c6..d3ec7d878 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10"] steps: - name: Checkout diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index b1e425857..a10773537 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10"] steps: - name: Checkout diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index c0c6f6e4f..8dff818f2 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [macos-latest] - python-version: ["3.9"] + python-version: ["3.10"] steps: - name: Checkout diff --git a/.github/workflows/test-ubuntu.yml b/.github/workflows/test-ubuntu.yml index e8fd70893..01042853a 100644 --- a/.github/workflows/test-ubuntu.yml +++ b/.github/workflows/test-ubuntu.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10"] steps: - name: Checkout diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index 887dcda20..420cd191a 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [windows-latest] - python-version: ["3.9"] + python-version: ["3.10"] steps: - name: Checkout From 1d67d92b18e76ef5966203d9c0b6a6db2d2e06a3 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 09:58:24 +0000 Subject: [PATCH 39/41] Update docker_requirements.txt --- docker_requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docker_requirements.txt b/docker_requirements.txt index 8438c662c..84ee6b57b 100644 --- a/docker_requirements.txt +++ b/docker_requirements.txt @@ -21,6 +21,7 @@ pyicu==2.13 python-crfsuite==0.9.11 requests==2.32.3 sacremoses==0.1.1 +scikit-learn==1.5.2 sentence-transformers==3.2.1 sentencepiece==0.2.0 spacy_thai==0.7.7 From b50f37db7c25a142bbfd93fcb958fdc6db217bbe Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Mon, 28 Oct 2024 10:08:44 +0000 Subject: [PATCH 40/41] brew install icu4c on macOS --- .github/workflows/test-macos.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml index 8dff818f2..647b21846 100644 --- a/.github/workflows/test-macos.yml +++ b/.github/workflows/test-macos.yml @@ -31,6 +31,16 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Install the ICU library + run: | + brew install icu4c + PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig + echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}" >> "${GITHUB_ENV}" + - name: Determine the ICU version + run: | + ICU_VER=$(pkg-config --modversion icu-i18n) + echo "ICU_VER=${ICU_VER}" + echo "ICU_VER=${ICU_VER}" >> "${GITHUB_ENV}" - name: Install dependencies shell: bash -l {0} run: | From baab223034a0b068debcff6405ebaef6879dc6fd Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Tue, 29 Oct 2024 11:42:32 +0000 Subject: [PATCH 41/41] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 01d677abd..0ed4d26d4 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ https://github.com/PyThaiNLP/pythainlp """ -from setuptools==65.5.1 import find_packages, setup +from setuptools import find_packages, setup readme = """ ![PyThaiNLP Logo](https://avatars0.githubusercontent.com/u/32934255?s=200&v=4)