Skip to content

Commit 4033085

Browse files
authored
Merge pull request #952 from bact/add-test-suites
Specify a limited test suite
2 parents 89ea62e + 6d8fe2f commit 4033085

File tree

117 files changed

+1349
-1155
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

117 files changed

+1349
-1155
lines changed

.github/workflows/unittest.yml

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Unit test and code coverage
1+
name: Unit test and coverage
22

33
on:
44
push:
@@ -18,9 +18,14 @@ jobs:
1818
fail-fast: false
1919
matrix:
2020
os: ["macos-latest", "ubuntu-latest", "windows-latest"]
21-
python-version: ["3.10"]
21+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
2222

2323
runs-on: ${{ matrix.os }}
24+
env:
25+
PYICU_WIN_VER: 2.14
26+
INSTALL_PYICU_WIN: false
27+
INSTALL_TORCH: false
28+
INSTALL_FULL_DEPS: false
2429

2530
steps:
2631
- name: Checkout
@@ -32,8 +37,11 @@ jobs:
3237
cache: "pip"
3338
- name: Install build tools
3439
run: |
35-
python -m pip install --upgrade "pip<24.1" "setuptools==73.0.1"
36-
python -m pip install coverage coveralls
40+
pip install --upgrade "pip<24.1" "setuptools>=65.0.2,<=73.0.1"
41+
pip install coverage coveralls
42+
# pip<24.1 because https://github.com/omry/omegaconf/pull/1195
43+
# setuptools>=65.0.2 because https://github.com/pypa/setuptools/commit/d03da04e024ad4289342077eef6de40013630a44#diff-9ea6e1e3dde6d4a7e08c7c88eceed69ca745d0d2c779f8f85219b22266efff7fR1
44+
# setuptools<=73.0.1 because https://github.com/pypa/setuptools/issues/4620
3745
- name: Install ICU (macOS)
3846
if: startsWith(matrix.os, 'macos-')
3947
run: |
@@ -43,26 +51,38 @@ jobs:
4351
ICU_VER=$(pkg-config --modversion icu-i18n)
4452
echo "ICU_VER=${ICU_VER}"
4553
echo "ICU_VER=${ICU_VER}" >> "${GITHUB_ENV}"
46-
- name: Install ICU (Windows)
47-
if: startsWith(matrix.os, 'windows-')
54+
- name: Install PyICU (Windows)
55+
if: startsWith(matrix.os, 'windows-') && env.INSTALL_PYICU_WIN == 'true'
56+
shell: powershell
4857
run: |
49-
python -m pip install "https://github.com/cgohlke/pyicu-build/releases/download/v2.14/PyICU-2.14-cp310-cp310-win_amd64.whl"
50-
# if needed, get pip wheel link from https://github.com/cgohlke/pyicu-build/releases
58+
$PYTHON_WIN_VER = "${{ matrix.python-version }}"
59+
$CP_VER = "cp" + $PYTHON_WIN_VER.Replace(".", "")
60+
$WHEEL_URL = "https://github.com/cgohlke/pyicu-build/releases/download/v${{ env.PYICU_WIN_VER }}/PyICU-${{ env.PYICU_WIN_VER }}-${CP_VER}-${CP_VER}-win_amd64.whl"
61+
pip install "$WHEEL_URL"
62+
# Get wheel URL from https://github.com/cgohlke/pyicu-build/releases
5163
- name: Install PyTorch
64+
if: env.INSTALL_TORCH == 'true'
5265
run: pip install torch
53-
# if needed, get pip wheel link from http://download.pytorch.org/whl/torch/
54-
# - name: Install dependencies
55-
# env:
56-
# SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True
57-
# run: |
58-
# python -m pip install -r docker_requirements.txt
66+
# If torch for the platform is not available in PyPI, use this command:
67+
# pip install "<torch_wheel_url>"
68+
# Get wheel URL from http://download.pytorch.org/whl/torch/
69+
- name: Install dependencies
70+
if: env.INSTALL_FULL_DEPS == 'true'
71+
env:
72+
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True
73+
run: pip install -r docker_requirements.txt
5974
- name: Install PyThaiNLP
60-
run: |
61-
python -m pip install .
62-
- name: Test
75+
run: pip install .
76+
# Use the command below, if you want to install a small set of external
77+
# packages, which includes numpy, pyicu, python-crfsuite, and requests:
78+
# pip install .[compact]
79+
- name: Unit test and code coverage
80+
run: coverage run -m unittest tests
81+
# Use 'unittest tests' instead of 'unittest discover' to avoid loading
82+
# tests with external imports.
83+
# Test cases loaded is defined in __init__.py in the tests directory.
84+
- name: Coverage report
6385
env:
6486
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
6587
COVERALLS_SERVICE_NAME: github
66-
run: |
67-
coveralls
68-
# coverage run -m unittest discover
88+
run: coveralls

pythainlp/ancient/aksonhan.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
# -*- coding: utf-8 -*-
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
4-
from pythainlp.util import Trie
54
from pythainlp import thai_consonants, thai_tonemarks
6-
from pythainlp.tokenize import Tokenizer
75
from pythainlp.corpus import thai_orst_words
8-
6+
from pythainlp.tokenize import Tokenizer
7+
from pythainlp.util import Trie
98

109
_dict_aksonhan = {}
1110
for i in list(thai_consonants):

pythainlp/augment/lm/fasttext.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
# SPDX-License-Identifier: Apache-2.0
44
import itertools
55
from typing import List, Tuple
6+
67
from gensim.models.fasttext import FastText as FastText_gensim
78
from gensim.models.keyedvectors import KeyedVectors
9+
810
from pythainlp.tokenize import word_tokenize
911

1012

pythainlp/augment/lm/phayathaibert.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,20 @@
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from typing import List
65
import random
76
import re
7+
from typing import List
88

99
from pythainlp.phayathaibert.core import ThaiTextProcessor
1010

11-
1211
_MODEL_NAME = "clicknext/phayathaibert"
1312

1413

1514
class ThaiTextAugmenter:
1615
def __init__(self) -> None:
1716
from transformers import (
18-
AutoTokenizer,
1917
AutoModelForMaskedLM,
18+
AutoTokenizer,
2019
pipeline,
2120
)
2221

pythainlp/augment/word2vec/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@
88
__all__ = ["Word2VecAug", "Thai2fitAug", "LTW2VAug"]
99

1010
from pythainlp.augment.word2vec.core import Word2VecAug
11-
from pythainlp.augment.word2vec.thai2fit import Thai2fitAug
1211
from pythainlp.augment.word2vec.ltw2v import LTW2VAug
12+
from pythainlp.augment.word2vec.thai2fit import Thai2fitAug

pythainlp/augment/word2vec/bpemb_wv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
44
from typing import List, Tuple
5+
56
from pythainlp.augment.word2vec.core import Word2VecAug
67

78

pythainlp/augment/word2vec/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# -*- coding: utf-8 -*-
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
4-
from typing import List, Tuple
54
import itertools
5+
from typing import List, Tuple
66

77

88
class Word2VecAug:

pythainlp/augment/word2vec/ltw2v.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
44
from typing import List, Tuple
5+
56
from pythainlp.augment.word2vec.core import Word2VecAug
67
from pythainlp.corpus import get_corpus_path
78
from pythainlp.tokenize import word_tokenize

pythainlp/augment/word2vec/thai2fit.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
44
from typing import List, Tuple
5+
56
from pythainlp.augment.word2vec.core import Word2VecAug
67
from pythainlp.corpus import get_corpus_path
78
from pythainlp.tokenize import THAI2FIT_TOKENIZER

pythainlp/augment/wordnet.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@
99
"postype2wordnet",
1010
]
1111

12-
from collections import OrderedDict
1312
import itertools
13+
from collections import OrderedDict
1414
from typing import List
1515

1616
from nltk.corpus import wordnet as wn
17+
1718
from pythainlp.corpus import wordnet
18-
from pythainlp.tokenize import word_tokenize
1919
from pythainlp.tag import pos_tag
20-
20+
from pythainlp.tokenize import word_tokenize
2121

2222
orchid = {
2323
"": "",

0 commit comments

Comments
 (0)