diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 1d9efae0d..2cc568663 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -1,4 +1,4 @@
-name: Unit test and code coverage
+name: Unit test and coverage
 
 on:
   push:
@@ -18,9 +18,14 @@ jobs:
       fail-fast: false
       matrix:
         os: ["macos-latest", "ubuntu-latest", "windows-latest"]
-        python-version: ["3.10"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 
     runs-on: ${{ matrix.os }}
+    env:
+      PYICU_WIN_VER: 2.14
+      INSTALL_PYICU_WIN: false
+      INSTALL_TORCH: false
+      INSTALL_FULL_DEPS: false
 
     steps:
     - name: Checkout
@@ -32,8 +37,11 @@ jobs:
         cache: "pip"
     - name: Install build tools
       run: |
-        python -m pip install --upgrade "pip<24.1" "setuptools==73.0.1"
-        python -m pip install coverage coveralls
+        pip install --upgrade "pip<24.1" "setuptools>=65.0.2,<=73.0.1"
+        pip install coverage coveralls
+      # pip<24.1 because https://github.com/omry/omegaconf/pull/1195
+      # setuptools>=65.0.2 because https://github.com/pypa/setuptools/commit/d03da04e024ad4289342077eef6de40013630a44#diff-9ea6e1e3dde6d4a7e08c7c88eceed69ca745d0d2c779f8f85219b22266efff7fR1
+      # setuptools<=73.0.1 because https://github.com/pypa/setuptools/issues/4620
     - name: Install ICU (macOS)
       if: startsWith(matrix.os, 'macos-')
       run: |
@@ -43,26 +51,38 @@ jobs:
         ICU_VER=$(pkg-config --modversion icu-i18n)
         echo "ICU_VER=${ICU_VER}"
         echo "ICU_VER=${ICU_VER}" >> "${GITHUB_ENV}"
-    - name: Install ICU (Windows)
-      if: startsWith(matrix.os, 'windows-')
+    - name: Install PyICU (Windows)
+      if: startsWith(matrix.os, 'windows-') && env.INSTALL_PYICU_WIN == 'true'
+      shell: powershell
       run: |
-        python -m pip install "https://github.com/cgohlke/pyicu-build/releases/download/v2.14/PyICU-2.14-cp310-cp310-win_amd64.whl"
-      # if needed, get pip wheel link from https://github.com/cgohlke/pyicu-build/releases
+        $PYTHON_WIN_VER = "${{ matrix.python-version }}"
+        $CP_VER = "cp" + $PYTHON_WIN_VER.Replace(".", "")
+        $WHEEL_URL = "https://github.com/cgohlke/pyicu-build/releases/download/v${{ env.PYICU_WIN_VER }}/PyICU-${{ env.PYICU_WIN_VER }}-${CP_VER}-${CP_VER}-win_amd64.whl"
+        pip install "$WHEEL_URL"
+      # Get wheel URL from https://github.com/cgohlke/pyicu-build/releases
     - name: Install PyTorch
+      if: env.INSTALL_TORCH == 'true'
       run: pip install torch
-      # if needed, get pip wheel link from http://download.pytorch.org/whl/torch/
-    # - name: Install dependencies
-    #   env:
-    #     SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True
-    #   run: |
-    #     python -m pip install -r docker_requirements.txt
+      # If torch for the platform is not available in PyPI, use this command:
+      # pip install "<torch_wheel_url>"
+      # Get wheel URL from http://download.pytorch.org/whl/torch/
+    - name: Install dependencies
+      if: env.INSTALL_FULL_DEPS == 'true'
+      env:
+        SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL: True
+      run: pip install -r docker_requirements.txt
     - name: Install PyThaiNLP
-      run: |
-        python -m pip install .
-    - name: Test
+      run: pip install .
+      # Use the command below, if you want to install a small set of external
+      # packages, which includes numpy, pyicu, python-crfsuite, and requests:
+      # pip install .[compact]
+    - name: Unit test and code coverage
+      run: coverage run -m unittest tests
+      # Use 'unittest tests' instead of 'unittest discover' to avoid loading
+      # tests with external imports.
+      # Test cases loaded is defined in __init__.py in the tests directory.
+    - name: Coverage report
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         COVERALLS_SERVICE_NAME: github
-      run: |
-        coveralls
-#        coverage run -m unittest discover
+      run: coveralls
diff --git a/pythainlp/ancient/aksonhan.py b/pythainlp/ancient/aksonhan.py
index 57389d1aa..2aad7851c 100644
--- a/pythainlp/ancient/aksonhan.py
+++ b/pythainlp/ancient/aksonhan.py
@@ -1,11 +1,10 @@
 # -*- coding: utf-8 -*-
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
-from pythainlp.util import Trie
 from pythainlp import thai_consonants, thai_tonemarks
-from pythainlp.tokenize import Tokenizer
 from pythainlp.corpus import thai_orst_words
-
+from pythainlp.tokenize import Tokenizer
+from pythainlp.util import Trie
 
 _dict_aksonhan = {}
 for i in list(thai_consonants):
diff --git a/pythainlp/augment/lm/fasttext.py b/pythainlp/augment/lm/fasttext.py
index 45b2b792f..4d4b99140 100644
--- a/pythainlp/augment/lm/fasttext.py
+++ b/pythainlp/augment/lm/fasttext.py
@@ -3,8 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 import itertools
 from typing import List, Tuple
+
 from gensim.models.fasttext import FastText as FastText_gensim
 from gensim.models.keyedvectors import KeyedVectors
+
 from pythainlp.tokenize import word_tokenize
 
 
diff --git a/pythainlp/augment/lm/phayathaibert.py b/pythainlp/augment/lm/phayathaibert.py
index ad9af5c67..60a8d1499 100644
--- a/pythainlp/augment/lm/phayathaibert.py
+++ b/pythainlp/augment/lm/phayathaibert.py
@@ -2,21 +2,20 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import List
 import random
 import re
+from typing import List
 
 from pythainlp.phayathaibert.core import ThaiTextProcessor
 
-
 _MODEL_NAME = "clicknext/phayathaibert"
 
 
 class ThaiTextAugmenter:
     def __init__(self) -> None:
         from transformers import (
-            AutoTokenizer,
             AutoModelForMaskedLM,
+            AutoTokenizer,
             pipeline,
         )
 
diff --git a/pythainlp/augment/word2vec/__init__.py b/pythainlp/augment/word2vec/__init__.py
index ddfb20721..9c7491034 100644
--- a/pythainlp/augment/word2vec/__init__.py
+++ b/pythainlp/augment/word2vec/__init__.py
@@ -8,5 +8,5 @@
 __all__ = ["Word2VecAug", "Thai2fitAug", "LTW2VAug"]
 
 from pythainlp.augment.word2vec.core import Word2VecAug
-from pythainlp.augment.word2vec.thai2fit import Thai2fitAug
 from pythainlp.augment.word2vec.ltw2v import LTW2VAug
+from pythainlp.augment.word2vec.thai2fit import Thai2fitAug
diff --git a/pythainlp/augment/word2vec/bpemb_wv.py b/pythainlp/augment/word2vec/bpemb_wv.py
index e0a13029d..b643a5b54 100644
--- a/pythainlp/augment/word2vec/bpemb_wv.py
+++ b/pythainlp/augment/word2vec/bpemb_wv.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List, Tuple
+
 from pythainlp.augment.word2vec.core import Word2VecAug
 
 
diff --git a/pythainlp/augment/word2vec/core.py b/pythainlp/augment/word2vec/core.py
index 9f0a4fa1f..0416bf621 100644
--- a/pythainlp/augment/word2vec/core.py
+++ b/pythainlp/augment/word2vec/core.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
-from typing import List, Tuple
 import itertools
+from typing import List, Tuple
 
 
 class Word2VecAug:
diff --git a/pythainlp/augment/word2vec/ltw2v.py b/pythainlp/augment/word2vec/ltw2v.py
index bc2027b84..f8b28fde7 100644
--- a/pythainlp/augment/word2vec/ltw2v.py
+++ b/pythainlp/augment/word2vec/ltw2v.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List, Tuple
+
 from pythainlp.augment.word2vec.core import Word2VecAug
 from pythainlp.corpus import get_corpus_path
 from pythainlp.tokenize import word_tokenize
diff --git a/pythainlp/augment/word2vec/thai2fit.py b/pythainlp/augment/word2vec/thai2fit.py
index e7f61eb54..8257a3eb3 100644
--- a/pythainlp/augment/word2vec/thai2fit.py
+++ b/pythainlp/augment/word2vec/thai2fit.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List, Tuple
+
 from pythainlp.augment.word2vec.core import Word2VecAug
 from pythainlp.corpus import get_corpus_path
 from pythainlp.tokenize import THAI2FIT_TOKENIZER
diff --git a/pythainlp/augment/wordnet.py b/pythainlp/augment/wordnet.py
index 4112ec7b9..a545d099e 100644
--- a/pythainlp/augment/wordnet.py
+++ b/pythainlp/augment/wordnet.py
@@ -9,15 +9,15 @@
     "postype2wordnet",
 ]
 
-from collections import OrderedDict
 import itertools
+from collections import OrderedDict
 from typing import List
 
 from nltk.corpus import wordnet as wn
+
 from pythainlp.corpus import wordnet
-from pythainlp.tokenize import word_tokenize
 from pythainlp.tag import pos_tag
-
+from pythainlp.tokenize import word_tokenize
 
 orchid = {
     "": "",
diff --git a/pythainlp/classify/param_free.py b/pythainlp/classify/param_free.py
index e416bf72c..f7f02024a 100644
--- a/pythainlp/classify/param_free.py
+++ b/pythainlp/classify/param_free.py
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import gzip
+import json
 from typing import List, Tuple
+
 import numpy as np
-import json
 
 
 class GzipModel:
diff --git a/pythainlp/cli/__init__.py b/pythainlp/cli/__init__.py
index 983a28d62..4bacaddf4 100644
--- a/pythainlp/cli/__init__.py
+++ b/pythainlp/cli/__init__.py
@@ -2,11 +2,10 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 """Command line helpers."""
+
 import sys
 from argparse import ArgumentParser
 
-from pythainlp.cli import data, soundex, tag, tokenize, benchmark
-
 # a command should start with a verb when possible
 COMMANDS = sorted(["data", "soundex", "tag", "tokenize", "benchmark"])
 
diff --git a/pythainlp/cli/benchmark.py b/pythainlp/cli/benchmark.py
index f7b28bbfb..721eb3559 100644
--- a/pythainlp/cli/benchmark.py
+++ b/pythainlp/cli/benchmark.py
@@ -8,6 +8,7 @@
 import os
 
 import yaml
+
 from pythainlp import cli
 from pythainlp.benchmarks import word_tokenization
 
diff --git a/pythainlp/coref/__init__.py b/pythainlp/coref/__init__.py
index 2a2bac107..e785ac38f 100644
--- a/pythainlp/coref/__init__.py
+++ b/pythainlp/coref/__init__.py
@@ -5,4 +5,5 @@
 PyThaiNLP Coreference Resolution
 """
 __all__ = ["coreference_resolution"]
+
 from pythainlp.coref.core import coreference_resolution
diff --git a/pythainlp/coref/_fastcoref.py b/pythainlp/coref/_fastcoref.py
index 368ce7b8c..f8eea9d83 100644
--- a/pythainlp/coref/_fastcoref.py
+++ b/pythainlp/coref/_fastcoref.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List
+
 import spacy
 
 
diff --git a/pythainlp/coref/core.py b/pythainlp/coref/core.py
index 4ca9f4029..ac00fe9ef 100644
--- a/pythainlp/coref/core.py
+++ b/pythainlp/coref/core.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from typing import List
 
-model = None
+_MODEL = None
 
 
 def coreference_resolution(
@@ -40,17 +40,17 @@ def coreference_resolution(
         # 'clusters': [[(0, 10), (50, 52)]]}
         # ]
     """
-    global model
+    global _MODEL
     if isinstance(texts, str):
         texts = [texts]
 
-    if model is None and model_name == "han-coref-v1.0":
+    if _MODEL is None and model_name == "han-coref-v1.0":
         from pythainlp.coref.han_coref import HanCoref
 
-        model = HanCoref(device=device)
+        _MODEL = HanCoref(device=device)
 
-    if model:
-        return model.predict(texts)
+    if _MODEL:
+        return _MODEL.predict(texts)
 
     return [
         {"text": text, "clusters_string": [], "clusters": []} for text in texts
diff --git a/pythainlp/coref/han_coref.py b/pythainlp/coref/han_coref.py
index dc3368d7f..efd5ceb59 100644
--- a/pythainlp/coref/han_coref.py
+++ b/pythainlp/coref/han_coref.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 import spacy
+
 from pythainlp.coref._fastcoref import FastCoref
 
 
diff --git a/pythainlp/corpus/common.py b/pythainlp/corpus/common.py
index 57bd5351f..4a9ebe638 100644
--- a/pythainlp/corpus/common.py
+++ b/pythainlp/corpus/common.py
@@ -23,8 +23,8 @@
     "thai_wsd_dict",
 ]
 
-from typing import FrozenSet, List, Union
 import warnings
+from typing import FrozenSet, List, Union
 
 from pythainlp.corpus import get_corpus, get_corpus_as_is, get_corpus_path
 
diff --git a/pythainlp/corpus/core.py b/pythainlp/corpus/core.py
index 660013b7a..44cb49101 100644
--- a/pythainlp/corpus/core.py
+++ b/pythainlp/corpus/core.py
@@ -4,14 +4,13 @@
 """
 Corpus related functions.
 """
+import json
 import os
 from typing import Union
-import json
 
+from pythainlp import __version__
 from pythainlp.corpus import corpus_db_path, corpus_db_url, corpus_path
 from pythainlp.tools import get_full_data_path
-from pythainlp import __version__
-
 
 _CHECK_MODE = os.getenv("PYTHAINLP_READ_MODE")
 
@@ -293,9 +292,10 @@ def _download(url: str, dst: str) -> int:
     """
     _CHUNK_SIZE = 64 * 1024  # 64 KiB
 
-    import requests
     from urllib.request import urlopen
 
+    import requests
+
     file_size = int(urlopen(url).info().get("Content-Length", -1))
     r = requests.get(url, stream=True)
     with open(get_full_data_path(dst), "wb") as f:
diff --git a/pythainlp/corpus/icu.py b/pythainlp/corpus/icu.py
index 352b3696a..7adec1a39 100644
--- a/pythainlp/corpus/icu.py
+++ b/pythainlp/corpus/icu.py
@@ -8,7 +8,6 @@
 
 from pythainlp.corpus.common import get_corpus
 
-
 _THAI_ICU_FILENAME = "icubrk_th.txt"
 
 
diff --git a/pythainlp/corpus/tnc.py b/pythainlp/corpus/tnc.py
index 0c250b141..b8ac3ed05 100644
--- a/pythainlp/corpus/tnc.py
+++ b/pythainlp/corpus/tnc.py
@@ -15,9 +15,7 @@
 from collections import defaultdict
 from typing import List, Tuple
 
-from pythainlp.corpus import get_corpus
-from pythainlp.corpus import get_corpus_path
-
+from pythainlp.corpus import get_corpus, get_corpus_path
 
 _FILENAME = "tnc_freq.txt"
 _BIGRAM = "tnc_bigram_word_freqs"
diff --git a/pythainlp/el/_multiel.py b/pythainlp/el/_multiel.py
index 90fd51967..698852232 100644
--- a/pythainlp/el/_multiel.py
+++ b/pythainlp/el/_multiel.py
@@ -8,14 +8,16 @@ def __init__(self, model_name="bela", device="cuda"):
         self.model_name = model_name
         self.device = device
         self.load_model()
+
     def load_model(self):
         try:
             from multiel import BELA
-        except ImportError:
+        except ImportError as exc:
             raise ImportError(
                 "Can't import multiel package, you can install by pip install multiel."
-            )
+            ) from exc
         self._bela_run = BELA(device=self.device)
+
     def process_batch(self, list_text):
         if isinstance(list_text, str):
             list_text = [list_text]
diff --git a/pythainlp/generate/__init__.py b/pythainlp/generate/__init__.py
index d7e4b8ca9..8d3664098 100644
--- a/pythainlp/generate/__init__.py
+++ b/pythainlp/generate/__init__.py
@@ -5,6 +5,6 @@
 Thai Text Generation
 """
 
-__all__ = ["Unigram", "Bigram", "Trigram"]
+__all__ = ["Bigram", "Trigram", "Unigram"]
 
-from pythainlp.generate.core import Unigram, Bigram, Trigram
+from pythainlp.generate.core import Bigram, Trigram, Unigram
diff --git a/pythainlp/generate/core.py b/pythainlp/generate/core.py
index 0d589291e..ce43c33ec 100644
--- a/pythainlp/generate/core.py
+++ b/pythainlp/generate/core.py
@@ -9,13 +9,14 @@
 """
 import random
 from typing import List, Union
-from pythainlp.corpus.tnc import unigram_word_freqs as tnc_word_freqs_unigram
-from pythainlp.corpus.tnc import bigram_word_freqs as tnc_word_freqs_bigram
-from pythainlp.corpus.tnc import trigram_word_freqs as tnc_word_freqs_trigram
-from pythainlp.corpus.ttc import unigram_word_freqs as ttc_word_freqs_unigram
+
 from pythainlp.corpus.oscar import (
     unigram_word_freqs as oscar_word_freqs_unigram,
 )
+from pythainlp.corpus.tnc import bigram_word_freqs as tnc_word_freqs_bigram
+from pythainlp.corpus.tnc import trigram_word_freqs as tnc_word_freqs_trigram
+from pythainlp.corpus.tnc import unigram_word_freqs as tnc_word_freqs_unigram
+from pythainlp.corpus.ttc import unigram_word_freqs as ttc_word_freqs_unigram
 
 
 class Unigram:
diff --git a/pythainlp/generate/thai2fit.py b/pythainlp/generate/thai2fit.py
index 240c0b28b..ed0337ac6 100644
--- a/pythainlp/generate/thai2fit.py
+++ b/pythainlp/generate/thai2fit.py
@@ -9,17 +9,22 @@
 """
 __all__ = ["gen_sentence"]
 
-import random
 import pickle
+import random
 from typing import List, Union
-import pandas as pd
 
 # fastai
 import fastai
+import pandas as pd
 from fastai.text import *
 
 # pythainlp
-from pythainlp.ulmfit import *
+from pythainlp.ulmfit import (
+    THWIKI_LSTM,
+    ThaiTokenizer,
+    post_rules_th,
+    pre_rules_th,
+)
 
 # get dummy data
 imdb = untar_data(URLs.IMDB_SAMPLE)
@@ -78,7 +83,7 @@
 
 def gen_sentence(
     start_seq: str = None,
-    N: int = 4,
+    n: int = 4,
     prob: float = 0.001,
     output_str: bool = True,
 ) -> Union[List[str], str]:
@@ -107,7 +112,7 @@ def gen_sentence(
     if start_seq is None:
         start_seq = random.choice(list(thwiki_itos))
     list_word = learn.predict(
-        start_seq, N, temperature=0.8, min_p=prob, sep="-*-"
+        start_seq, n, temperature=0.8, min_p=prob, sep="-*-"
     ).split("-*-")
     if output_str:
         return "".join(list_word)
diff --git a/pythainlp/generate/wangchanglm.py b/pythainlp/generate/wangchanglm.py
index e99fcd2e7..a573b73d0 100644
--- a/pythainlp/generate/wangchanglm.py
+++ b/pythainlp/generate/wangchanglm.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 import re
+
 import torch
 
 
diff --git a/pythainlp/morpheme/__init__.py b/pythainlp/morpheme/__init__.py
index 94eb54822..3d1bcb85a 100644
--- a/pythainlp/morpheme/__init__.py
+++ b/pythainlp/morpheme/__init__.py
@@ -9,5 +9,5 @@
     "nighit",
     "is_native_thai"
 ]
-from pythainlp.morpheme.word_formation import nighit
 from pythainlp.morpheme.thaiwordcheck import is_native_thai
+from pythainlp.morpheme.word_formation import nighit
diff --git a/pythainlp/parse/core.py b/pythainlp/parse/core.py
index 137b86381..cdae989af 100644
--- a/pythainlp/parse/core.py
+++ b/pythainlp/parse/core.py
@@ -4,7 +4,6 @@
 
 from typing import List, Union
 
-
 _tagger = None
 _tagger_name = ""
 
diff --git a/pythainlp/phayathaibert/core.py b/pythainlp/phayathaibert/core.py
index f3563a45e..a70973c40 100644
--- a/pythainlp/phayathaibert/core.py
+++ b/pythainlp/phayathaibert/core.py
@@ -2,16 +2,16 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import Callable, List, Tuple, Union
 import random
 import re
 import warnings
+from typing import Callable, List, Tuple, Union
 
-from pythainlp.tokenize import word_tokenize
 from transformers import (
     CamembertTokenizer,
 )
 
+from pythainlp.tokenize import word_tokenize
 
 _PAT_URL = r"(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?"
 
@@ -305,8 +305,8 @@ class PartOfSpeechTagger:
     def __init__(self, model: str = "lunarlist/pos_thai_phayathai") -> None:
         # Load model directly
         from transformers import (
-            AutoTokenizer,
             AutoModelForTokenClassification,
+            AutoTokenizer,
         )
 
         self.tokenizer = AutoTokenizer.from_pretrained(model)
@@ -349,8 +349,8 @@ def get_tag(
 class NamedEntityTagger:
     def __init__(self, model: str = "Pavarissy/phayathaibert-thainer") -> None:
         from transformers import (
-            AutoTokenizer,
             AutoModelForTokenClassification,
+            AutoTokenizer,
         )
 
         self.tokenizer = AutoTokenizer.from_pretrained(model)
diff --git a/pythainlp/soundex/core.py b/pythainlp/soundex/core.py
index 52a8bc5d2..f436b2ad7 100644
--- a/pythainlp/soundex/core.py
+++ b/pythainlp/soundex/core.py
@@ -6,11 +6,11 @@
 
 Has three systems to choose from: Udom83 (default), LK82, and MetaSound
 """
+from pythainlp.soundex import DEFAULT_SOUNDEX_ENGINE
 from pythainlp.soundex.lk82 import lk82
 from pythainlp.soundex.metasound import metasound
-from pythainlp.soundex.udom83 import udom83
 from pythainlp.soundex.prayut_and_somchaip import prayut_and_somchaip
-from pythainlp.soundex import DEFAULT_SOUNDEX_ENGINE
+from pythainlp.soundex.udom83 import udom83
 
 # Other Thai soundex systems (not implemented yet): Arun91, KSS97
 # [KSS97] https://linux.thai.net/~thep/soundex/soundex.html
diff --git a/pythainlp/soundex/sound.py b/pythainlp/soundex/sound.py
index e5e9a62eb..1b86a4e1c 100644
--- a/pythainlp/soundex/sound.py
+++ b/pythainlp/soundex/sound.py
@@ -2,10 +2,12 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List
+
 import panphon
 import panphon.distance
-from pythainlp.transliterate import pronunciate, transliterate
+
 from pythainlp.tokenize import word_tokenize
+from pythainlp.transliterate import pronunciate, transliterate
 
 _ft = panphon.FeatureTable()
 _dst = panphon.distance.Distance()
diff --git a/pythainlp/spell/__init__.py b/pythainlp/spell/__init__.py
index 351379f7a..0889f5e3e 100644
--- a/pythainlp/spell/__init__.py
+++ b/pythainlp/spell/__init__.py
@@ -7,15 +7,16 @@
 
 __all__ = [
     "DEFAULT_SPELL_CHECKER",
+    "NorvigSpellChecker",
     "correct",
+    "correct_sent",
     "spell",
-    "NorvigSpellChecker",
     "spell_sent",
-    "correct_sent",
 ]
 
 from pythainlp.spell.pn import NorvigSpellChecker
 
 DEFAULT_SPELL_CHECKER = NorvigSpellChecker()
 
-from pythainlp.spell.core import correct, spell, correct_sent, spell_sent
+# these imports are placed here to avoid circular imports
+from pythainlp.spell.core import correct, correct_sent, spell, spell_sent
diff --git a/pythainlp/spell/phunspell.py b/pythainlp/spell/phunspell.py
index 028569ea2..86117df8e 100644
--- a/pythainlp/spell/phunspell.py
+++ b/pythainlp/spell/phunspell.py
@@ -11,6 +11,7 @@
         https://github.com/dvwright/phunspell
 """
 from typing import List
+
 import phunspell
 
 pspell = phunspell.Phunspell("th_TH")
diff --git a/pythainlp/spell/symspellpy.py b/pythainlp/spell/symspellpy.py
index d9fb72b00..469dfeae0 100644
--- a/pythainlp/spell/symspellpy.py
+++ b/pythainlp/spell/symspellpy.py
@@ -12,9 +12,10 @@
         https://github.com/mammothb/symspellpy
 """
 from typing import List
+
 from symspellpy import SymSpell, Verbosity
-from pythainlp.corpus import get_corpus_path
-from pythainlp.corpus import path_pythainlp_corpus
+
+from pythainlp.corpus import get_corpus_path, path_pythainlp_corpus
 
 _UNIGRAM = "tnc_freq.txt"
 _BIGRAM = "tnc_bigram_word_freqs"
diff --git a/pythainlp/spell/wanchanberta_thai_grammarly.py b/pythainlp/spell/wanchanberta_thai_grammarly.py
index ecf9d7856..3d6af1ee6 100644
--- a/pythainlp/spell/wanchanberta_thai_grammarly.py
+++ b/pythainlp/spell/wanchanberta_thai_grammarly.py
@@ -10,9 +10,12 @@
     * GitHub: \
         https://github.com/bookpanda/Two-stage-Thai-Misspelling-Correction-Based-on-Pre-trained-Language-Models
 """
-from transformers import AutoModelForMaskedLM
-from transformers import AutoTokenizer, BertForTokenClassification
 import torch
+from transformers import (
+    AutoModelForMaskedLM,
+    AutoTokenizer,
+    BertForTokenClassification,
+)
 
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda" if use_cuda else "cpu")
diff --git a/pythainlp/summarize/__init__.py b/pythainlp/summarize/__init__.py
index b47b276c8..cddaa9390 100644
--- a/pythainlp/summarize/__init__.py
+++ b/pythainlp/summarize/__init__.py
@@ -5,10 +5,14 @@
 Text summarization
 """
 
-__all__ = ["summarize", "extract_keywords"]
+__all__ = [
+    "extract_keywords",
+    "summarize",
+]
 
 DEFAULT_SUMMARIZE_ENGINE = "frequency"
 CPE_KMUTT_THAI_SENTENCE_SUM = "mt5-cpe-kmutt-thai-sentence-sum"
 DEFAULT_KEYWORD_EXTRACTION_ENGINE = "keybert"
 
+# these imports are placed here to avoid circular imports
 from pythainlp.summarize.core import extract_keywords, summarize
diff --git a/pythainlp/summarize/keybert.py b/pythainlp/summarize/keybert.py
index 766c22525..f71af840d 100644
--- a/pythainlp/summarize/keybert.py
+++ b/pythainlp/summarize/keybert.py
@@ -10,8 +10,8 @@
 
 https://github.com/MaartenGr/KeyBERT
 """
-from typing import List, Optional, Iterable, Tuple, Union
 from collections import Counter
+from typing import Iterable, List, Optional, Tuple, Union
 
 import numpy as np
 from transformers import pipeline
diff --git a/pythainlp/tag/crfchunk.py b/pythainlp/tag/crfchunk.py
index 239bdf254..5eb5b68f6 100644
--- a/pythainlp/tag/crfchunk.py
+++ b/pythainlp/tag/crfchunk.py
@@ -2,7 +2,9 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import Dict, List, Tuple
+
 from pycrfsuite import Tagger as CRFTagger
+
 from pythainlp.corpus import path_pythainlp_corpus, thai_stopwords
 
 
diff --git a/pythainlp/tag/thai_nner.py b/pythainlp/tag/thai_nner.py
index 0e69b3075..704e6352a 100644
--- a/pythainlp/tag/thai_nner.py
+++ b/pythainlp/tag/thai_nner.py
@@ -2,7 +2,9 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List, Tuple
+
 from thai_nner import NNER
+
 from pythainlp.corpus import get_corpus_path
 
 
diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index 9c9530822..f9ed0f517 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List, Tuple, Union
+
 try:
     from tltk import nlp
 except ImportError:
diff --git a/pythainlp/tag/wangchanberta_onnx.py b/pythainlp/tag/wangchanberta_onnx.py
index c394a5e40..75ee85307 100644
--- a/pythainlp/tag/wangchanberta_onnx.py
+++ b/pythainlp/tag/wangchanberta_onnx.py
@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
-from typing import List
 import json
+from typing import List
 
 import numpy as np
+
 from pythainlp.corpus import get_path_folder_corpus
 
 
@@ -18,9 +19,9 @@ def __init__(
     ) -> None:
         import sentencepiece as spm
         from onnxruntime import (
+            GraphOptimizationLevel,
             InferenceSession,
             SessionOptions,
-            GraphOptimizationLevel,
         )
 
         self.model_name = model_name
diff --git a/pythainlp/tokenize/_utils.py b/pythainlp/tokenize/_utils.py
index 6731c80e0..d3dcb4046 100644
--- a/pythainlp/tokenize/_utils.py
+++ b/pythainlp/tokenize/_utils.py
@@ -6,7 +6,7 @@
 """
 
 import re
-from typing import List, Callable
+from typing import Callable, List
 
 _DIGITS_WITH_SEPARATOR = re.compile(r"(\d+[\.\,:])+\d+")
 
diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py
index 57669337d..a4c1ebbe1 100644
--- a/pythainlp/tokenize/core.py
+++ b/pythainlp/tokenize/core.py
@@ -4,9 +4,10 @@
 """
 Generic functions of tokenizers
 """
+
+import copy
 import re
 from typing import Iterable, List, Union
-import copy
 
 from pythainlp.tokenize import (
     DEFAULT_SENT_TOKENIZE_ENGINE,
@@ -37,7 +38,21 @@ def clause_tokenize(doc: List[str]) -> List[List[str]]:
     ::
 
         from pythainlp.tokenize import clause_tokenize
-        clause_tokenize(["ฉัน","นอน","และ","คุณ","เล่น","มือถือ","ส่วน","น้อง","เขียน","โปรแกรม"])
+
+        clause_tokenize(
+            [
+                "ฉัน",
+                "นอน",
+                "และ",
+                "คุณ",
+                "เล่น",
+                "มือถือ",
+                "ส่วน",
+                "น้อง",
+                "เขียน",
+                "โปรแกรม",
+            ]
+        )
         # [['ฉัน', 'นอน'],
         # ['และ', 'คุณ', 'เล่น', 'มือถือ'],
         # ['ส่วน', 'น้อง', 'เขียน', 'โปรแกรม']]
@@ -63,6 +78,7 @@ def word_detokenize(
     ::
 
         from pythainlp.tokenize import word_detokenize
+
         print(word_detokenize(["เรา", "เล่น"]))
         # output: เราเล่น
     """
@@ -291,18 +307,19 @@ def word_tokenize(
         segments = segment(text)
     elif engine == "nlpo3":
         from pythainlp.tokenize.nlpo3 import segment
+
         # Currently cannot handle custom_dict from inside word_tokenize(),
         # due to difference in type.
-        #if isinstance(custom_dict, str):
+        # if isinstance(custom_dict, str):
         #    segments = segment(text, custom_dict=custom_dict)
-        #elif not isinstance(custom_dict, str) and not custom_dict:
+        # elif not isinstance(custom_dict, str) and not custom_dict:
         #    raise ValueError(
         #        f"""Tokenizer \"{engine}\":
         #        custom_dict must be a str.
         #        It is a dictionary name as assigned with load_dict().
         #        See pythainlp.tokenize.nlpo3.load_dict()"""
         #    )
-        #else:
+        # else:
         #    segments = segment(text)
         segments = segment(text)
     else:
@@ -346,7 +363,7 @@ def map_indices_to_words(index_list, sentences):
             if start > n_sum + len(words) - 1:
                 break
             else:
-                word = sentence[start - n_sum:end + 1 - n_sum]
+                word = sentence[start - n_sum : end + 1 - n_sum]
                 sentence_result.append(word)
                 n += 1
 
@@ -356,6 +373,7 @@ def map_indices_to_words(index_list, sentences):
             del c[0]
     return result
 
+
 def sent_tokenize(
     text: Union[str, List[str]],
     engine: str = DEFAULT_SENT_TOKENIZE_ENGINE,
@@ -434,7 +452,6 @@ def sent_tokenize(
     is_list_input = isinstance(text, list)
 
     if is_list_input:
-
         try:
             original_text = "".join(text)
         except ValueError:
@@ -458,10 +475,10 @@ def sent_tokenize(
         segments = re.split(r" +", original_text, flags=re.U)
         if is_list_input:
             result = []
-            _temp = []
+            _temp: list[str] = []
             for i, w in enumerate(text):
                 if re.findall(r" ", w) != [] and re.findall(r"\w", w) == []:
-                    if _temp == []:
+                    if not _temp:
                         continue
                     result.append(_temp)
                     _temp = []
@@ -477,11 +494,9 @@ def sent_tokenize(
             _temp = []
             for i, w in enumerate(text):
                 if (
-                    (re.findall(r"\s", w) != [] or
-                        re.findall(r"\n", w) != []) and
-                        re.findall(r"\w", w) == []
-                ):
-                    if _temp == []:
+                    re.findall(r"\s", w) != [] or re.findall(r"\n", w) != []
+                ) and re.findall(r"\w", w) == []:
+                    if not _temp:
                         continue
                     result.append(_temp)
                     _temp = []
@@ -492,11 +507,13 @@ def sent_tokenize(
             return result
     elif engine == "tltk":
         from pythainlp.tokenize.tltk import sent_tokenize as segment
+
         segments = segment(original_text)
     elif engine == "thaisum":
         from pythainlp.tokenize.thaisumcut import (
             ThaiSentenceSegmentor as segmentor,
         )
+
         segment = segmentor()
         segments = segment.split_into_sentences(original_text)
     elif engine.startswith("wtp"):
@@ -505,6 +522,7 @@ def sent_tokenize(
         else:
             _size = engine.split("-")[-1]
         from pythainlp.tokenize.wtsplit import tokenize as segment
+
         segments = segment(original_text, size=_size, tokenize="sentence")
     else:
         raise ValueError(
diff --git a/pythainlp/tokenize/crfcls.py b/pythainlp/tokenize/crfcls.py
index 8adfa6f01..00b4ad041 100644
--- a/pythainlp/tokenize/crfcls.py
+++ b/pythainlp/tokenize/crfcls.py
@@ -7,8 +7,9 @@
 from typing import List
 
 import pycrfsuite
-from pythainlp.tag import pos_tag
+
 from pythainlp.corpus import path_pythainlp_corpus
+from pythainlp.tag import pos_tag
 
 
 def _doc2features(doc, i):
diff --git a/pythainlp/tokenize/crfcut.py b/pythainlp/tokenize/crfcut.py
index 75b2a7c6a..38cbe799a 100644
--- a/pythainlp/tokenize/crfcut.py
+++ b/pythainlp/tokenize/crfcut.py
@@ -20,6 +20,7 @@
 from typing import List
 
 import pycrfsuite
+
 from pythainlp.corpus import corpus_path
 from pythainlp.tokenize import word_tokenize
 
diff --git a/pythainlp/tokenize/han_solo.py b/pythainlp/tokenize/han_solo.py
index 8b876597e..3a7a9c158 100644
--- a/pythainlp/tokenize/han_solo.py
+++ b/pythainlp/tokenize/han_solo.py
@@ -8,6 +8,7 @@
 GitHub: https://github.com/PyThaiNLP/Han-solo
 """
 from typing import List
+
 from pythainlp.corpus import path_pythainlp_corpus
 
 try:
diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py
index 504ed50be..43299c7aa 100644
--- a/pythainlp/tokenize/newmm.py
+++ b/pythainlp/tokenize/newmm.py
@@ -20,9 +20,8 @@
 from typing import Generator, List
 
 from pythainlp.tokenize import DEFAULT_WORD_DICT_TRIE
-from pythainlp.util import Trie
-
 from pythainlp.tokenize.tcc_p import tcc_pos
+from pythainlp.util import Trie
 
 # match non-Thai tokens
 # `|` is used as like "early return",
diff --git a/pythainlp/tokenize/nlpo3.py b/pythainlp/tokenize/nlpo3.py
index 839ca5bd5..d8a026d80 100644
--- a/pythainlp/tokenize/nlpo3.py
+++ b/pythainlp/tokenize/nlpo3.py
@@ -4,10 +4,11 @@
 from sys import stderr
 from typing import List
 
-from nlpo3 import segment as nlpo3_segment
 from nlpo3 import load_dict as nlpo3_load_dict
-from pythainlp.corpus.common import _THAI_WORDS_FILENAME
+from nlpo3 import segment as nlpo3_segment
+
 from pythainlp.corpus import path_pythainlp_corpus
+from pythainlp.corpus.common import _THAI_WORDS_FILENAME
 
 _NLPO3_DEFAULT_DICT_NAME = "_67a47bf9"
 _NLPO3_DEFAULT_DICT = nlpo3_load_dict(
diff --git a/pythainlp/tokenize/thaisumcut.py b/pythainlp/tokenize/thaisumcut.py
index 2ab8152a3..8320b1992 100644
--- a/pythainlp/tokenize/thaisumcut.py
+++ b/pythainlp/tokenize/thaisumcut.py
@@ -15,10 +15,11 @@
     school={Beijing Institute of Technology}
 """
 
-import re
-import operator
 import math
+import operator
+import re
 from typing import List
+
 from pythainlp.tokenize import word_tokenize
 
 
diff --git a/pythainlp/tokenize/tltk.py b/pythainlp/tokenize/tltk.py
index a8a6b0c56..1c03ed100 100644
--- a/pythainlp/tokenize/tltk.py
+++ b/pythainlp/tokenize/tltk.py
@@ -2,9 +2,10 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List
+
 try:
-    from tltk.nlp import word_segment as tltk_segment
     from tltk.nlp import syl_segment
+    from tltk.nlp import word_segment as tltk_segment
 except ImportError:
     raise ImportError("Not found tltk! Please install tltk by pip install tltk")
 
diff --git a/pythainlp/tokenize/wtsplit.py b/pythainlp/tokenize/wtsplit.py
index 027f02eb6..e6ca15b6d 100644
--- a/pythainlp/tokenize/wtsplit.py
+++ b/pythainlp/tokenize/wtsplit.py
@@ -7,6 +7,7 @@
 GitHub: https://github.com/bminixhofer/wtpsplit
 """
 from typing import List
+
 from wtpsplit import WtP
 
 _MODEL = None
diff --git a/pythainlp/tools/misspell.py b/pythainlp/tools/misspell.py
index e9c5b3061..497e671f5 100644
--- a/pythainlp/tools/misspell.py
+++ b/pythainlp/tools/misspell.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
 from typing import List
+
 import numpy as np
 
 THAI_CHARACTERS_WITHOUT_SHIFT = [
diff --git a/pythainlp/translate/__init__.py b/pythainlp/translate/__init__.py
index 10f011eda..7effc17cb 100644
--- a/pythainlp/translate/__init__.py
+++ b/pythainlp/translate/__init__.py
@@ -8,7 +8,6 @@
 __all__ = ["ThZhTranslator", "ZhThTranslator", "Translate"]
 
 from pythainlp.translate.core import Translate
-
 from pythainlp.translate.zh_th import (
     ThZhTranslator,
     ZhThTranslator,
diff --git a/pythainlp/translate/en_th.py b/pythainlp/translate/en_th.py
index 2074c2112..75614cb0f 100644
--- a/pythainlp/translate/en_th.py
+++ b/pythainlp/translate/en_th.py
@@ -15,7 +15,6 @@
 
 from pythainlp.corpus import download, get_corpus_path
 
-
 _EN_TH_MODEL_NAME = "scb_1m_en-th_moses"
 # SCB_1M-MT_OPUS+TBASE_en-th_moses-spm_130000-16000_v1.0.tar.gz
 _EN_TH_FILE_NAME = "SCB_1M-MT_OPUS+TBASE_en-th_moses-spm_130000-16000_v1.0"
diff --git a/pythainlp/translate/small100.py b/pythainlp/translate/small100.py
index 05c9b1e36..1d2bc9975 100644
--- a/pythainlp/translate/small100.py
+++ b/pythainlp/translate/small100.py
@@ -1,6 +1,8 @@
 from transformers import M2M100ForConditionalGeneration
+
 from .tokenization_small100 import SMALL100Tokenizer
 
+
 class Small100Translator:
     """
     Machine Translation using small100 model
diff --git a/pythainlp/translate/th_fr.py b/pythainlp/translate/th_fr.py
index e9a97aa21..fd48eeb96 100644
--- a/pythainlp/translate/th_fr.py
+++ b/pythainlp/translate/th_fr.py
@@ -12,7 +12,7 @@
 
 - Huggingface https://huggingface.co/Helsinki-NLP/opus-mt-th-fr
 """
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
 class ThFrTranslator:
diff --git a/pythainlp/translate/tokenization_small100.py b/pythainlp/translate/tokenization_small100.py
index 9bb21cfe7..300b16bc8 100644
--- a/pythainlp/translate/tokenization_small100.py
+++ b/pythainlp/translate/tokenization_small100.py
@@ -23,10 +23,8 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import sentencepiece
-
 from transformers.tokenization_utils import BatchEncoding, PreTrainedTokenizer
 
-
 SPIECE_UNDERLINE = "▁"
 
 VOCAB_FILES_NAMES = {
diff --git a/pythainlp/translate/zh_th.py b/pythainlp/translate/zh_th.py
index daacd7c07..ec026cc7e 100644
--- a/pythainlp/translate/zh_th.py
+++ b/pythainlp/translate/zh_th.py
@@ -9,7 +9,7 @@
 - GitHub: https://github.com/LalitaDeelert/lalita-mt-zhth
 - Facebook post https://web.facebook.com/aibuildersx/posts/166736255494822
 """
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 
 
 class ThZhTranslator:
diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py
index bf5789d3d..ef4859f79 100644
--- a/pythainlp/transliterate/__init__.py
+++ b/pythainlp/transliterate/__init__.py
@@ -5,7 +5,12 @@
 Transliteration.
 """
 
-__all__ = ["romanize", "transliterate", "pronunciate", "puan"]
+__all__ = [
+    "pronunciate",
+    "puan",
+    "romanize",
+    "transliterate",
+]
 
-from pythainlp.transliterate.core import romanize, transliterate, pronunciate
+from pythainlp.transliterate.core import pronunciate, romanize, transliterate
 from pythainlp.transliterate.spoonerism import puan
diff --git a/pythainlp/transliterate/lookup.py b/pythainlp/transliterate/lookup.py
index 7d8082103..2414695a6 100644
--- a/pythainlp/transliterate/lookup.py
+++ b/pythainlp/transliterate/lookup.py
@@ -10,13 +10,13 @@
 """
 
 from typing import Callable, Optional
+
 from pythainlp.corpus.th_en_translit import (
     TRANSLITERATE_DICT,
     TRANSLITERATE_EN,
     TRANSLITERATE_FOLLOW_RTSG,
 )
 
-
 _TRANSLITERATE_IDX = 0
 
 
diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py
index de994941b..25f450765 100644
--- a/pythainlp/transliterate/spoonerism.py
+++ b/pythainlp/transliterate/spoonerism.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
-from pythainlp.transliterate import pronunciate
 from pythainlp import thai_consonants
+from pythainlp.transliterate import pronunciate
 
 _list_consonants = list(thai_consonants.replace("ห", ""))
 
diff --git a/pythainlp/transliterate/thai2rom.py b/pythainlp/transliterate/thai2rom.py
index f3fea086a..e6f7d5395 100644
--- a/pythainlp/transliterate/thai2rom.py
+++ b/pythainlp/transliterate/thai2rom.py
@@ -7,8 +7,9 @@
 import random
 
 import torch
-from torch import nn
 import torch.nn.functional as F
+from torch import nn
+
 from pythainlp.corpus import get_corpus_path
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
diff --git a/pythainlp/transliterate/thai2rom_onnx.py b/pythainlp/transliterate/thai2rom_onnx.py
index 147457392..6b2296733 100644
--- a/pythainlp/transliterate/thai2rom_onnx.py
+++ b/pythainlp/transliterate/thai2rom_onnx.py
@@ -5,10 +5,11 @@
 Romanization of Thai words based on machine-learnt engine in ONNX runtime ("thai2rom")
 """
 import json
+
 import numpy as np
 from onnxruntime import InferenceSession
-from pythainlp.corpus import get_corpus_path
 
+from pythainlp.corpus import get_corpus_path
 
 _MODEL_ENCODER_NAME = "thai2rom_encoder_onnx"
 _MODEL_DECODER_NAME = "thai2rom_decoder_onnx"
diff --git a/pythainlp/transliterate/thaig2p.py b/pythainlp/transliterate/thaig2p.py
index 5923b897e..c86d7dd77 100644
--- a/pythainlp/transliterate/thaig2p.py
+++ b/pythainlp/transliterate/thaig2p.py
@@ -10,8 +10,9 @@
 
 import numpy as np
 import torch
-from torch import nn
 import torch.nn.functional as F
+from torch import nn
+
 from pythainlp.corpus import get_corpus_path
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
diff --git a/pythainlp/transliterate/w2p.py b/pythainlp/transliterate/w2p.py
index 96034602f..babfe4d5c 100644
--- a/pythainlp/transliterate/w2p.py
+++ b/pythainlp/transliterate/w2p.py
@@ -9,6 +9,7 @@
 from typing import Union
 
 import numpy as np
+
 from pythainlp.corpus import download, get_corpus_path
 
 _GRAPHEMES = list(
diff --git a/pythainlp/ulmfit/core.py b/pythainlp/ulmfit/core.py
index f1e6d7a91..681ced883 100644
--- a/pythainlp/ulmfit/core.py
+++ b/pythainlp/ulmfit/core.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import torch
+
 from pythainlp.corpus import get_corpus_path
 from pythainlp.tokenize import THAI2FIT_TOKENIZER
 from pythainlp.ulmfit.preprocess import (
diff --git a/pythainlp/ulmfit/tokenizer.py b/pythainlp/ulmfit/tokenizer.py
index 9801cb0cc..b44001b87 100644
--- a/pythainlp/ulmfit/tokenizer.py
+++ b/pythainlp/ulmfit/tokenizer.py
@@ -6,6 +6,7 @@
 """
 
 from typing import Collection, List
+
 from pythainlp.tokenize import THAI2FIT_TOKENIZER
 
 
diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index 6807c9d53..5d9eb65fb 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -28,6 +28,7 @@
     "normalize",
     "now_reign_year",
     "num_to_thaiword",
+    "maiyamok",
     "rank",
     "reign_year_to_ad",
     "remove_dangling",
@@ -51,24 +52,26 @@
     "thai_strptime",
     "thai_strftime",
     "thai_to_eng",
-    "to_idna",
     "thai_word_tone_detector",
     "thaiword_to_date",
     "thaiword_to_num",
     "thaiword_to_time",
     "time_to_thaiword",
     "tis620_to_utf8",
+    "to_idna",
     "tone_detector",
     "words_to_num",
 ]
 
+from pythainlp.util import spell_words
+from pythainlp.util.abbreviation import abbreviation_to_full_text
 from pythainlp.util.collate import collate
 from pythainlp.util.date import (
+    convert_years,
     now_reign_year,
     reign_year_to_ad,
-    thaiword_to_date,
-    convert_years,
     thai_strptime,
+    thaiword_to_date,
 )
 from pythainlp.util.digitconv import (
     arabic_digit_to_thai_digit,
@@ -77,16 +80,17 @@
     text_to_thai_digit,
     thai_digit_to_arabic_digit,
 )
+from pythainlp.util.emojiconv import emoji_to_thai
+from pythainlp.util.encoding import tis620_to_utf8, to_idna
 from pythainlp.util.keyboard import (
     eng_to_thai,
     thai_keyboard_dist,
     thai_to_eng,
 )
-from pythainlp.util.emojiconv import emoji_to_thai
 from pythainlp.util.keywords import find_keyword, rank
 from pythainlp.util.normalize import (
-    normalize,
     maiyamok,
+    normalize,
     remove_dangling,
     remove_dup_spaces,
     remove_repeat_vowels,
@@ -94,14 +98,15 @@
     remove_zw,
     reorder_vowels,
 )
+from pythainlp.util.numtoword import bahttext, num_to_thaiword
+from pythainlp.util.phoneme import ipa_to_rtgs, nectec_to_ipa, remove_tone_ipa
 from pythainlp.util.remove_trailing_repeat_consonants import (
     remove_trailing_repeat_consonants,
 )
-from pythainlp.util.numtoword import bahttext, num_to_thaiword
 from pythainlp.util.strftime import thai_strftime
 from pythainlp.util.thai import (
-    countthai,
     count_thai_chars,
+    countthai,
     display_thai_char,
     isthai,
     isthaichar,
@@ -110,15 +115,13 @@
 from pythainlp.util.thaiwordcheck import is_native_thai
 from pythainlp.util.time import thaiword_to_time, time_to_thaiword
 from pythainlp.util.trie import Trie, dict_trie
-from pythainlp.util.wordtonum import thaiword_to_num, text_to_num, words_to_num
+from pythainlp.util.wordtonum import text_to_num, thaiword_to_num, words_to_num
+
+# syllable and pronounce have to be imported last
 from pythainlp.util.syllable import (
     sound_syllable,
-    tone_detector,
     syllable_length,
     syllable_open_close_detector,
+    tone_detector,
 )
-from pythainlp.util.phoneme import nectec_to_ipa, ipa_to_rtgs, remove_tone_ipa
-from pythainlp.util.encoding import to_idna, tis620_to_utf8
-from pythainlp.util import spell_words
-from pythainlp.util.abbreviation import abbreviation_to_full_text
 from pythainlp.util.pronounce import rhyme
diff --git a/pythainlp/util/date.py b/pythainlp/util/date.py
index 0201c4c8c..d2a301e7e 100644
--- a/pythainlp/util/date.py
+++ b/pythainlp/util/date.py
@@ -22,9 +22,9 @@
     "thaiword_to_date",
 ]
 
+import re
 from datetime import datetime, timedelta
 from typing import Union
-import re
 
 try:
     from zoneinfo import ZoneInfo
diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index bee233afe..24c7550c4 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -4,6 +4,7 @@
 """
 Text normalization
 """
+
 import re
 from typing import List, Union
 
@@ -14,7 +15,6 @@
 from pythainlp import thai_tonemarks as tonemarks
 from pythainlp.tokenize import word_tokenize
 
-
 _DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
 _RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
 
@@ -76,7 +76,7 @@ def remove_dangling(text: str) -> str:
 
         from pythainlp.util import remove_dangling
 
-        remove_dangling('๊ก')
+        remove_dangling("๊ก")
         # output: 'ก'
     """
     return _RE_REMOVE_DANGLINGS.sub("", text)
@@ -98,7 +98,7 @@ def remove_dup_spaces(text: str) -> str:
 
         from pythainlp.util import remove_dup_spaces
 
-        remove_dup_spaces('ก    ข    ค')
+        remove_dup_spaces("ก    ข    ค")
         # output: 'ก ข ค'
     """
     while "  " in text:
@@ -132,7 +132,7 @@ def remove_tonemark(text: str) -> str:
 
         from pythainlp.util import remove_tonemark
 
-        remove_tonemark('สองพันหนึ่งร้อยสี่สิบเจ็ดล้านสี่แสนแปดหมื่นสามพันหกร้อยสี่สิบเจ็ด')
+        remove_tonemark("สองพันหนึ่งร้อยสี่สิบเจ็ดล้านสี่แสนแปดหมื่นสามพันหกร้อยสี่สิบเจ็ด")
         # output: สองพันหนึงรอยสีสิบเจ็ดลานสีแสนแปดหมืนสามพันหกรอยสีสิบเจ็ด
     """
     for ch in tonemarks:
@@ -235,10 +235,10 @@ def normalize(text: str) -> str:
 
         from pythainlp.util import normalize
 
-        normalize('เเปลก')  # starts with two Sara E
+        normalize("เเปลก")  # starts with two Sara E
         # output: แปลก
 
-        normalize('นานาาา')
+        normalize("นานาาา")
         # output: นานา
     """
     text = remove_zw(text)
@@ -268,7 +268,7 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
         maiyamok("เด็กๆชอบไปโรงเรียน")
         # output: ['เด็ก', 'เด็ก', 'ชอบ', 'ไป', 'โรงเรียน']
 
-        maiyamok(["ทำไม","คน","ดี"," ","ๆ","ๆ"," ","ถึง","ทำ","ไม่ได้"])
+        maiyamok(["ทำไม", "คน", "ดี", " ", "ๆ", "ๆ", " ", "ถึง", "ทำ", "ไม่ได้"])
         # output: ['ทำไม', 'คน', 'ดี', 'ดี', 'ดี', ' ', 'ถึง', 'ทำ', 'ไม่ได้']
     """
     if isinstance(sent, str):
diff --git a/pythainlp/util/phoneme.py b/pythainlp/util/phoneme.py
index 075d43a9d..c7de73731 100644
--- a/pythainlp/util/phoneme.py
+++ b/pythainlp/util/phoneme.py
@@ -5,8 +5,9 @@
 Phonemes util
 """
 import unicodedata
-from pythainlp.util.trie import Trie
+
 from pythainlp.tokenize import Tokenizer
+from pythainlp.util.trie import Trie
 
 consonants_ipa_nectec = [
     ("k", "k", "k^"),
diff --git a/pythainlp/util/pronounce.py b/pythainlp/util/pronounce.py
index db0869b7f..a5a9387a6 100644
--- a/pythainlp/util/pronounce.py
+++ b/pythainlp/util/pronounce.py
@@ -4,9 +4,8 @@
 from typing import List
 
 from pythainlp.corpus import thai_words
-from pythainlp.tokenize import syllable_tokenize
 from pythainlp.khavee import KhaveeVerifier
-
+from pythainlp.tokenize import syllable_tokenize
 
 kv = KhaveeVerifier()
 all_thai_words_dict = None
diff --git a/pythainlp/util/remove_trailing_repeat_consonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py
index e0770703e..c4b6eafba 100644
--- a/pythainlp/util/remove_trailing_repeat_consonants.py
+++ b/pythainlp/util/remove_trailing_repeat_consonants.py
@@ -4,10 +4,11 @@
 """
 Removement of repeated consonants at the end of words
 """
+from typing import Iterable, List, Tuple
+
+from pythainlp import thai_consonants as consonants
 from pythainlp.corpus import thai_words
 from pythainlp.util.trie import Trie
-from pythainlp import thai_consonants as consonants
-from typing import Iterable, List, Tuple
 
 # used by remove_trailing_repeat_consonants()
 # contains all words that has repeating consonants at the end
diff --git a/pythainlp/util/spell_words.py b/pythainlp/util/spell_words.py
index 86f255dc7..998b6df64 100644
--- a/pythainlp/util/spell_words.py
+++ b/pythainlp/util/spell_words.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 import re
 from typing import List
+
 from pythainlp import (
     thai_above_vowels,
     thai_below_vowels,
@@ -12,8 +13,7 @@
     thai_letters,
     thai_tonemarks,
 )
-from pythainlp.tokenize import subword_tokenize, Tokenizer
-
+from pythainlp.tokenize import Tokenizer, subword_tokenize
 
 _r1 = ["เ-ย", "เ-ะ", "แ-ะ", "โ-ะ", "เ-าะ", "เ-อะ", "เ-อ", "เ-า"]
 _r2 = ["–ั:วะ", "เ–ี:ยะ", "เ–ือะ", "–ั:ว", "เ–ี:ย", "เ–ื:อ", "–ื:อ"]
diff --git a/pythainlp/util/syllable.py b/pythainlp/util/syllable.py
index 57f0c68ab..8d134cedd 100644
--- a/pythainlp/util/syllable.py
+++ b/pythainlp/util/syllable.py
@@ -5,6 +5,7 @@
 Syllable tools
 """
 import re
+
 from pythainlp import thai_consonants, thai_tonemarks
 
 spelling_class = {
diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py
index ae95c04e6..f2853db02 100644
--- a/pythainlp/util/thai.py
+++ b/pythainlp/util/thai.py
@@ -4,23 +4,22 @@
 """
 Check if it is Thai text
 """
+
 import string
 from typing import Tuple
 
 from pythainlp import (
-    thai_lead_vowels,
-    thai_follow_vowels,
     thai_above_vowels,
     thai_below_vowels,
     thai_consonants,
-    thai_vowels,
-    thai_tonemarks,
-    thai_signs,
     thai_digits,
+    thai_follow_vowels,
+    thai_lead_vowels,
     thai_punctuations,
+    thai_signs,
+    thai_tonemarks,
+    thai_vowels,
 )
-from pythainlp.transliterate import pronunciate
-from pythainlp.util.syllable import tone_detector
 
 _DEFAULT_IGNORE_CHARS = string.whitespace + string.digits + string.punctuation
 _TH_FIRST_CHAR_ASCII = 3584
@@ -193,6 +192,9 @@ def thai_word_tone_detector(word: str) -> Tuple[str, str]:
         print(thai_word_tone_detector("มือถือ"))
         # output: [('มือ', 'm'), ('ถือ', 'r')]
     """
+    from ..transliterate import pronunciate
+    from ..util.syllable import tone_detector
+
     _pronunciate = pronunciate(word).split("-")
     return [(i, tone_detector(i.replace("หฺ", "ห"))) for i in _pronunciate]
 
diff --git a/pythainlp/util/thaiwordcheck.py b/pythainlp/util/thaiwordcheck.py
index 0fe3f296b..0bc7a976d 100644
--- a/pythainlp/util/thaiwordcheck.py
+++ b/pythainlp/util/thaiwordcheck.py
@@ -3,13 +3,16 @@
 # SPDX-License-Identifier: Apache-2.0
 import warnings
 
+
 def is_native_thai(word: str) -> bool:
     warnings.warn(
         """
-        pythainlp.util.is_native_thai is rename as \
+        pythainlp.util.is_native_thai has been renamed to \
             pythainlp.morpheme.is_native_thai.
-        This function will remove in PyThaiNLP 5.1.
-        """, DeprecationWarning)
+        This function will be removed in PyThaiNLP 5.1.
+        """,
+        DeprecationWarning,
+    )
     from pythainlp.morpheme import is_native_thai as check
 
     return check(word)
diff --git a/pythainlp/util/wordtonum.py b/pythainlp/util/wordtonum.py
index acb0b8217..d192a72f8 100644
--- a/pythainlp/util/wordtonum.py
+++ b/pythainlp/util/wordtonum.py
@@ -10,8 +10,8 @@
 import re
 from typing import List
 
-from pythainlp.tokenize import Tokenizer
 from pythainlp.corpus import thai_words
+from pythainlp.tokenize import Tokenizer
 
 _ptn_digits = r"(|หนึ่ง|เอ็ด|สอง|ยี่|สาม|สี่|ห้า|หก|เจ็ด|แปด|เก้า)"
 _ptn_six_figures = (
diff --git a/pythainlp/wangchanberta/core.py b/pythainlp/wangchanberta/core.py
index afae29c40..a7d3c1a57 100644
--- a/pythainlp/wangchanberta/core.py
+++ b/pythainlp/wangchanberta/core.py
@@ -1,13 +1,15 @@
 # -*- coding: utf-8 -*-
 # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
 # SPDX-License-Identifier: Apache-2.0
-from typing import List, Tuple, Union
 import re
 import warnings
+from typing import List, Tuple, Union
+
 from transformers import (
     CamembertTokenizer,
     pipeline,
 )
+
 from pythainlp.tokenize import word_tokenize
 
 _model_name = "wangchanberta-base-att-spm-uncased"
@@ -140,8 +142,7 @@ def __init__(
              AI Research Institute of Thailand
         :param str model: The model that use wangchanberta pretrained.
         """
-        from transformers import AutoTokenizer
-        from transformers import AutoModelForTokenClassification
+        from transformers import AutoModelForTokenClassification, AutoTokenizer
 
         self.tokenizer = AutoTokenizer.from_pretrained(model)
         self.model = AutoModelForTokenClassification.from_pretrained(model)
diff --git a/pythainlp/word_vector/core.py b/pythainlp/word_vector/core.py
index 03932a735..1909995b1 100644
--- a/pythainlp/word_vector/core.py
+++ b/pythainlp/word_vector/core.py
@@ -6,6 +6,7 @@
 from gensim.models import KeyedVectors
 from gensim.models.keyedvectors import Word2VecKeyedVectors
 from numpy import ndarray, zeros
+
 from pythainlp.corpus import get_corpus_path
 from pythainlp.tokenize import THAI2FIT_TOKENIZER, word_tokenize
 
diff --git a/pythainlp/wsd/__init__.py b/pythainlp/wsd/__init__.py
index 357b4bc44..d066a8753 100644
--- a/pythainlp/wsd/__init__.py
+++ b/pythainlp/wsd/__init__.py
@@ -5,4 +5,5 @@
 Thai Word Sense Disambiguation (WSD)
 """
 __all__ = ["get_sense"]
+
 from pythainlp.wsd.core import get_sense
diff --git a/pythainlp/wsd/core.py b/pythainlp/wsd/core.py
index 751a8962f..0b64191ef 100644
--- a/pythainlp/wsd/core.py
+++ b/pythainlp/wsd/core.py
@@ -3,9 +3,9 @@
 # SPDX-License-Identifier: Apache-2.0
 from typing import List, Tuple, Union
 
+from pythainlp.corpus import thai_wsd_dict
 from pythainlp.tokenize import Tokenizer
 from pythainlp.util.trie import Trie
-from pythainlp.corpus import thai_wsd_dict
 
 _wsd_dict = thai_wsd_dict()
 _mean_all = {}
diff --git a/setup.py b/setup.py
index 4acda432f..07e786024 100644
--- a/setup.py
+++ b/setup.py
@@ -40,16 +40,42 @@
 requirements = [
     "backports.zoneinfo; python_version<'3.9'",
     "requests>=2.22.0",
-    "tzdata; sys_platform == 'win32'"
+    "tzdata; sys_platform == 'win32'",
 ]
 
 extras = {
+    "abbreviation": ["khamyo>=0.2.0"],
     "attacut": ["attacut>=1.0.6"],
     "benchmarks": ["PyYAML>=5.3.1", "numpy>=1.22", "pandas>=0.24"],
+    "coreference_resolution": [
+        "fastcoref>=2.1.5",
+        "spacy>=3.0",
+    ],
+    "dependency_parsing": [
+        "spacy_thai>=0.7.1",
+        "transformers>=4.22.1",
+        "ufal.chu-liu-edmonds>=1.0.2",
+    ],
+    "el": ["multiel>=0.5"],
+    "esupar": [
+        "esupar>=1.3.8",
+        "numpy",
+        "transformers>=4.22.1",
+    ],
+    "generate": ["fastai<2.0"],
     "icu": ["pyicu>=2.3"],
     "ipa": ["epitran>=1.1"],
     "ml": ["numpy>=1.22", "torch>=1.0.0"],
+    "mt5": ["sentencepiece>=0.1.91", "transformers>=4.6.0"],
+    "nlpo3": ["nlpo3>=1.2.2"],
+    "onnx": ["numpy>=1.22", "onnxruntime>=1.10.0", "sentencepiece>=0.1.91"],
+    "oskut": ["oskut>=1.3"],
+    "sefr_cut": ["sefr_cut>=1.1"],
+    "spacy_thai": ["spacy_thai>=0.7.1"],
+    "spell": ["phunspell>=0.1.6", "spylls>=0.1.5", "symspellpy>=6.7.6"],
     "ssg": ["ssg>=0.0.8"],
+    "textaugment": ["bpemb", "gensim>=4.0.0"],
+    "thai_nner": ["thai_nner"],
     "thai2fit": ["emoji>=0.5.1", "gensim>=4.0.0", "numpy>=1.22"],
     "thai2rom": ["numpy>=1.22", "torch>=1.0.0"],
     "translate": [
@@ -59,47 +85,24 @@
         "torch>=1.0.0",
         "transformers>=4.6.0",
     ],
-    "wunsen": ["wunsen>=0.0.1"],
-    "textaugment": ["bpemb", "gensim>=4.0.0"],
-    "wangchanberta": ["sentencepiece>=0.1.91", "transformers>=4.6.0"],
-    "mt5": ["sentencepiece>=0.1.91", "transformers>=4.6.0"],
-    "wtp": ["transformers>=4.6.0", "wtpsplit>=1.0.1"],
-    "wordnet": ["nltk>=3.3"],
-    "generate": ["fastai<2.0"],
-    "sefr_cut": ["sefr_cut>=1.1"],
-    "spell": ["phunspell>=0.1.6", "spylls>=0.1.5", "symspellpy>=6.7.6"],
-    "oskut": ["oskut>=1.3"],
-    "nlpo3": ["nlpo3>=1.2.2"],
-    "onnx": ["numpy>=1.22", "onnxruntime>=1.10.0", "sentencepiece>=0.1.91"],
-    "thai_nner": ["thai_nner"],
-    "esupar": [
-        "esupar>=1.3.8",
-        "numpy",
-        "transformers>=4.22.1",
-    ],
-    "spacy_thai": ["spacy_thai>=0.7.1"],
     "transformers_ud": [
         "transformers>=4.22.1",
         "ufal.chu-liu-edmonds>=1.0.2",
     ],
-    "dependency_parsing": [
-        "spacy_thai>=0.7.1",
-        "transformers>=4.22.1",
-        "ufal.chu-liu-edmonds>=1.0.2",
-    ],
-    "coreference_resolution": [
-        "fastcoref>=2.1.5",
-        "spacy>=3.0",
-    ],
-    "word_approximation": ["panphon>=0.20.0"],
+    "wangchanberta": ["sentencepiece>=0.1.91", "transformers>=4.6.0"],
     "wangchanglm": [
         "pandas>=0.24",
         "sentencepiece>=0.1.91",
         "transformers>=4.6.0",
     ],
+    "word_approximation": ["panphon>=0.20.0"],
+    "wordnet": ["nltk>=3.3"],
     "wsd": ["sentence-transformers>=2.2.2"],
-    "el": ["multiel>=0.5"],
-    "abbreviation": ["khamyo>=0.2.0"],
+    "wtp": ["transformers>=4.6.0", "wtpsplit>=1.0.1"],
+    "wunsen": ["wunsen>=0.0.1"],
+    # Compact dependencies
+    "compact": ["numpy>=1.22", "pyicu>=2.3", "python-crfsuite>=0.9.7"],
+    # Full dependencies
     "full": [
         "PyYAML>=5.3.1",
         "attacut>=1.0.4",
diff --git a/tests/__init__.py b/tests/__init__.py
index 95434038f..6066cd92f 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -6,12 +6,31 @@
 
 Each file in tests/ is for each main package.
 """
-import sys
-import unittest
 
-sys.path.append("../pythainlp")
+from unittest import TestLoader, TestSuite
 
-loader = unittest.TestLoader()
-testSuite = loader.discover("tests")
-testRunner = unittest.TextTestRunner(verbosity=1)
-testRunner.run(testSuite)
+test_packages: list[str] = [
+    "tests.test_ancient",
+    # "tests.test_cli",
+    # "tests.test_corpus",
+    "tests.test_soundex",
+    "tests.test_spell",
+    "tests.test_tokenize",
+    "tests.test_transliterate",
+    "tests.test_util",
+]
+
+
+def load_tests(
+    loader: TestLoader, standard_tests: TestSuite, pattern: str
+) -> TestSuite:
+    """Load test protocol"""
+    suite = TestSuite()
+    for test_package in test_packages:
+        tests = loader.loadTestsFromName(test_package)
+        suite.addTests(tests)
+    return suite
+
+if __name__ == "__main__":
+    import unittest
+    unittest.main()
diff --git a/tests/test_ancient.py b/tests/test_ancient.py
index ac3ef2da0..e165ca513 100644
--- a/tests/test_ancient.py
+++ b/tests/test_ancient.py
@@ -6,7 +6,7 @@
 from pythainlp.ancient import aksonhan_to_current
 
 
-class TestAncientPackage(unittest.TestCase):
+class AncientTestCase(unittest.TestCase):
     def test_aksonhan_to_current(self):
         self.assertEqual(aksonhan_to_current("ก"), "ก")
         self.assertEqual(aksonhan_to_current("กก"), "กก")
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
index 45e79617e..8973a097f 100644
--- a/tests/test_benchmarks.py
+++ b/tests/test_benchmarks.py
@@ -13,7 +13,7 @@
     TEST_DATA = yaml.safe_load(stream)
 
 
-class TestBenchmarksPackage(unittest.TestCase):
+class BenchmarksTestCase(unittest.TestCase):
     def test_preprocessing(self):
         self.assertIsNotNone(
             word_tokenization.preprocessing(
diff --git a/tests/test_classify.py b/tests/test_classify.py
index 723aad4dd..2e4128ed2 100644
--- a/tests/test_classify.py
+++ b/tests/test_classify.py
@@ -7,7 +7,7 @@
 from pythainlp.classify import GzipModel
 
 
-class TestClsPackage(unittest.TestCase):
+class ClsTestCase(unittest.TestCase):
     def test_GzipModel(self):
         training_data = [
             ("รายละเอียดตามนี้เลยค่าา ^^", "Neutral"),
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 4f9258536..b7e3ca3c0 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -9,7 +9,7 @@
 from pythainlp import __main__, cli
 
 
-class TestMainPackage(unittest.TestCase):
+class CliTestCase(unittest.TestCase):
     def test_cli_main(self):
         # call with no argument, should exit with 2
         with self.assertRaises(SystemExit) as ex:
diff --git a/tests/test_corpus.py b/tests/test_corpus.py
index 031e7a883..224832207 100644
--- a/tests/test_corpus.py
+++ b/tests/test_corpus.py
@@ -5,8 +5,6 @@
 import os
 import unittest
 
-import nltk
-from nltk.corpus import wordnet as wn
 from requests import Response
 
 from pythainlp.corpus import (
@@ -35,12 +33,11 @@
     thai_words,
     tnc,
     ttc,
-    wordnet,
 )
 from pythainlp.corpus.util import revise_newmm_default_wordset
 
 
-class TestCorpusPackage(unittest.TestCase):
+class CorpusTestCase(unittest.TestCase):
     def test_conceptnet(self):
         self.assertIsNotNone(conceptnet.edges("รัก"))
 
@@ -133,13 +130,11 @@ def test_corpus(self):
         self.assertIsNotNone(download(name="test", version="0.0.10"))
         with self.assertRaises(Exception) as context:
             # Force re-downloading since the corpus already exists
-            self.assertIsNotNone(download(
-                name="test", version="0.0.11", force=True
-            ))
+            self.assertIsNotNone(
+                download(name="test", version="0.0.11", force=True)
+            )
         self.assertTrue(
-            "Hash does not match expected."
-            in
-            str(context.exception)
+            "Hash does not match expected." in str(context.exception)
         )
         self.assertIsNotNone(download(name="test", version="0.1"))
         self.assertIsNotNone(remove("test"))
@@ -159,38 +154,6 @@ def test_ttc(self):
         self.assertIsNotNone(ttc.word_freqs())
         self.assertIsNotNone(ttc.unigram_word_freqs())
 
-    def test_wordnet(self):
-        nltk.download('omw-1.4', force=True)  # load wordnet
-        self.assertIsNotNone(wordnet.langs())
-        self.assertIn("tha", wordnet.langs())
-
-        self.assertEqual(
-            wordnet.synset("spy.n.01").lemma_names("tha"), ["สปาย", "สายลับ"]
-        )
-        self.assertIsNotNone(wordnet.synsets("นก"))
-        self.assertIsNotNone(wordnet.all_synsets(pos=wn.ADJ))
-
-        self.assertIsNotNone(wordnet.lemmas("นก"))
-        self.assertIsNotNone(wordnet.all_lemma_names(pos=wn.ADV))
-        self.assertIsNotNone(wordnet.lemma("cat.n.01.cat"))
-
-        self.assertEqual(wordnet.morphy("dogs"), "dog")
-
-        bird = wordnet.synset("bird.n.01")
-        mouse = wordnet.synset("mouse.n.01")
-        self.assertEqual(
-            wordnet.path_similarity(bird, mouse), bird.path_similarity(mouse)
-        )
-        self.assertEqual(
-            wordnet.wup_similarity(bird, mouse), bird.wup_similarity(mouse)
-        )
-        self.assertEqual(
-            wordnet.lch_similarity(bird, mouse), bird.lch_similarity(mouse)
-        )
-
-        cat_key = wordnet.synsets("แมว")[0].lemmas()[0].key()
-        self.assertIsNotNone(wordnet.lemma_from_key(cat_key))
-
     def test_revise_wordset(self):
         training_data = [
             ["ถวิล อุดล", " ", "เป็น", "นักการเมือง", "หนึ่ง", "ใน"],
@@ -208,7 +171,6 @@ def test_zip(self):
 
     def test_find_synonyms(self):
         self.assertEqual(
-            find_synonyms("หมู"),
-            ['จรุก', 'วราหะ', 'วราห์', 'ศูกร', 'สุกร']
+            find_synonyms("หมู"), ["จรุก", "วราหะ", "วราห์", "ศูกร", "สุกร"]
         )
         self.assertEqual(find_synonyms("1"), [])
diff --git a/tests/test_el.py b/tests/test_el.py
index 007442f1c..4a029886a 100644
--- a/tests/test_el.py
+++ b/tests/test_el.py
@@ -7,7 +7,7 @@
 from pythainlp.el import EntityLinker
 
 
-class TestElPackage(unittest.TestCase):
+class ElTestCase(unittest.TestCase):
     def test_EntityLinker(self):
         with self.assertRaises(NotImplementedError):
             EntityLinker(model_name="cat")
diff --git a/tests/test_khavee.py b/tests/test_khavee.py
index 6b43e4da9..803cc8f38 100644
--- a/tests/test_khavee.py
+++ b/tests/test_khavee.py
@@ -9,7 +9,7 @@
 kv = KhaveeVerifier()
 
 
-class TestKhaveePackage(unittest.TestCase):
+class KhaveeTestCase(unittest.TestCase):
     def test_check_sara(self):
         self.assertEqual(kv.check_sara("เริง"), "เออ")
 
diff --git a/tests/test_morpheme.py b/tests/test_morpheme.py
index 35cdb4bc3..00e472db8 100644
--- a/tests/test_morpheme.py
+++ b/tests/test_morpheme.py
@@ -7,7 +7,7 @@
 from pythainlp.morpheme import is_native_thai, nighit
 
 
-class TestMorphemePackage(unittest.TestCase):
+class MorphemeTestCase(unittest.TestCase):
     def test_nighit(self):
         self.assertEqual(nighit("สํ", "คีต"), "สังคีต")
         self.assertEqual(nighit("สํ", "จร"), "สัญจร")
diff --git a/tests/test_parse.py b/tests/test_parse.py
index b5ed5b85a..a93fa4cfa 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -7,7 +7,7 @@
 from pythainlp.parse import dependency_parsing
 
 
-class TestParsePackage(unittest.TestCase):
+class ParseTestCase(unittest.TestCase):
     def test_dependency_parsing(self):
         self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="esupar"))
         self.assertIsNotNone(
diff --git a/tests/test_soundex.py b/tests/test_soundex.py
index cbd8cbe53..505b7e4ad 100644
--- a/tests/test_soundex.py
+++ b/tests/test_soundex.py
@@ -11,10 +11,9 @@
     soundex,
     udom83,
 )
-from pythainlp.soundex.sound import audio_vector, word_approximation
 
 
-class TestSoundexPackage(unittest.TestCase):
+class SoundexTestCase(unittest.TestCase):
     def test_soundex(self):
         self.assertIsNotNone(soundex("a", engine="lk82"))
         self.assertIsNotNone(soundex("a", engine="udom83"))
@@ -82,9 +81,3 @@ def test_soundex(self):
         self.assertIsNotNone(prayut_and_somchaip("ณาญ"))
         self.assertIsNotNone(prayut_and_somchaip("กาง"))
         self.assertIsNotNone(prayut_and_somchaip("ว้าว"))
-
-    def test_word_approximation(self):
-        self.assertIsNotNone(word_approximation("รถ", ["รส", "รด", "คน"]))
-
-    def test_audio_vector(self):
-        self.assertIsNotNone(audio_vector("คน"))
diff --git a/tests/test_spell.py b/tests/test_spell.py
index 094bbf45c..39a69416b 100644
--- a/tests/test_spell.py
+++ b/tests/test_spell.py
@@ -10,11 +10,12 @@
     correct_sent,
     spell,
     spell_sent,
-    symspellpy,
 )
 
+SENT_TOKS = ["เด็", "อินอร์เน็ต", "แรง"]
 
-class TestSpellPackage(unittest.TestCase):
+
+class SpellTestCase(unittest.TestCase):
     def test_spell(self):
         self.assertEqual(spell(None), [""])
         self.assertEqual(spell(""), [""])
@@ -27,30 +28,6 @@ def test_spell(self):
         self.assertIsInstance(result, list)
         self.assertGreater(len(result), 0)
 
-        result = spell("เน้ร", engine="phunspell")
-        self.assertIsInstance(result, list)
-        self.assertGreater(len(result), 0)
-
-        result = spell("เกสมร์", engine="phunspell")
-        self.assertIsInstance(result, list)
-        self.assertGreater(len(result), 0)
-
-        result = spell("เน้ร", engine="symspellpy")
-        self.assertIsInstance(result, list)
-        self.assertGreater(len(result), 0)
-
-        result = spell("เกสมร์", engine="symspellpy")
-        self.assertIsInstance(result, list)
-        self.assertGreater(len(result), 0)
-
-        result = spell("เน้ร", engine="tltk")
-        self.assertIsInstance(result, list)
-        self.assertGreater(len(result), 0)
-
-        result = spell("เดก", engine="tltk")
-        self.assertIsInstance(result, list)
-        self.assertGreater(len(result), 0)
-
     def test_word_correct(self):
         self.assertEqual(correct(None), "")
         self.assertEqual(correct(""), "")
@@ -63,18 +40,6 @@ def test_word_correct(self):
         self.assertIsInstance(result, str)
         self.assertNotEqual(result, "")
 
-        result = correct("ทดสอง", engine="phunspell")
-        self.assertIsInstance(result, str)
-        self.assertNotEqual(result, "")
-
-        result = correct("ทดสอง", engine="symspellpy")
-        self.assertIsInstance(result, str)
-        self.assertNotEqual(result, "")
-
-        result = correct("ทดสอง", engine="wanchanberta_thai_grammarly")
-        self.assertIsInstance(result, str)
-        self.assertNotEqual(result, "")
-
     def test_norvig_spell_checker(self):
         checker = NorvigSpellChecker(dict_filter=None)
         self.assertTrue(len(checker.dictionary()) > 0)
@@ -124,21 +89,9 @@ def test_norvig_spell_checker(self):
             checker = NorvigSpellChecker(custom_dict=user_dict)
 
     def test_spell_sent(self):
-        self.spell_sent = ["เด็", "อินอร์เน็ต", "แรง"]
-        self.assertIsNotNone(spell_sent(self.spell_sent))
-        self.assertIsNotNone(spell_sent(self.spell_sent, engine="pn"))
-        self.assertIsNotNone(spell_sent(self.spell_sent, engine="phunspell"))
-        self.assertIsNotNone(spell_sent(self.spell_sent, engine="symspellpy"))
+        self.assertIsNotNone(spell_sent(SENT_TOKS))
+        self.assertIsNotNone(spell_sent(SENT_TOKS, engine="pn"))
 
     def test_correct_sent(self):
-        self.spell_sent = ["เด็", "อินอร์เน็ต", "แรง"]
-        self.assertIsNotNone(correct_sent(self.spell_sent))
-        self.assertIsNotNone(correct_sent(self.spell_sent, engine="pn"))
-        self.assertIsNotNone(correct_sent(self.spell_sent, engine="phunspell"))
-        self.assertIsNotNone(
-            correct_sent(self.spell_sent, engine="symspellpy")
-        )
-        self.assertIsNotNone(
-            correct_sent(self.spell_sent, engine="wanchanberta_thai_grammarly")
-        )
-        self.assertIsNotNone(symspellpy.correct_sent(self.spell_sent))
+        self.assertIsNotNone(correct_sent(SENT_TOKS))
+        self.assertIsNotNone(correct_sent(SENT_TOKS, engine="pn"))
diff --git a/tests/test_summarize.py b/tests/test_summarize.py
deleted file mode 100644
index 114a19d3f..000000000
--- a/tests/test_summarize.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# -*- coding: utf-8 -*-
-# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
-# SPDX-License-Identifier: Apache-2.0
-
-import unittest
-
-from pythainlp.summarize import extract_keywords, summarize
-
-
-class TestSummarizePackage(unittest.TestCase):
-    def test_summarize(self):
-        text = (
-            "อาหาร หมายถึง ของแข็งหรือของเหลว "
-            "ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว "
-            "จะทำให้เกิดพลังงานและความร้อนแก่ร่างกาย "
-            "ทำให้ร่างกายเจริญเติบโต "
-            "ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย "
-            "ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ "
-            "อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"
-        )
-        self.assertEqual(
-            summarize(text=text, n=1),
-            ["อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"],
-        )
-        # self.assertIsNotNone(summarize(text, engine="mt5-small"))
-        # self.assertIsNotNone(summarize([]))
-        # self.assertIsNotNone(summarize(text, 1, engine="mt5-small"))
-        self.assertIsNotNone(
-            summarize(text, 1, engine="mt5-cpe-kmutt-thai-sentence-sum")
-        )
-        self.assertIsNotNone(summarize(text, 1, engine="XX"))
-        with self.assertRaises(ValueError):
-            self.assertIsNotNone(summarize(text, 1, engine="mt5-cat"))
-
-    def test_keyword_extraction(self):
-        text = (
-            "อาหาร หมายถึง ของแข็งหรือของเหลว "
-            "ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว "
-            "จะทำให้เกิดพลังงานและความร้อนแก่ร่างกาย "
-            "ทำให้ร่างกายเจริญเติบโต "
-            "ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย "
-            "ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ "
-            "อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"
-        )
-        self.assertEqual(extract_keywords(""), [])
-        self.assertEqual(extract_keywords("   "), [])
-
-        # test default engine, common case
-        keywords = extract_keywords(text)
-        expected = ["ซ่อมแซมส่วน", "เจริญเติบโต", "อวัยวะต่างๆ", "ควบคุมการเปลี่ยนแปลง"]
-        for exp_kw in expected:
-            self.assertIn(exp_kw, keywords)
-
-        # test another engine
-        for max_kw in (5, 10):
-            keywords = extract_keywords(
-                text, engine="frequency", max_keywords=max_kw
-            )
-            self.assertEqual(len(keywords), max_kw)
-
-        # test invalid engine
-        with self.assertRaises(ValueError):
-            extract_keywords(text, engine="random engine")
-
-        # test different tokenizer
-        keywords = extract_keywords(text, tokenizer="attacut")
-
-        expected = ["อวัยวะต่างๆ", "ซ่อมแซมส่วน", "เจริญเติบโต", "เกิดพลังงาน"]
-        for exp_kw in expected:
-            self.assertIn(exp_kw, keywords)
-
-        # test overriding stop words
-        stpw = "เจริญเติบโต"
-        keywords = extract_keywords(text, stop_words=[stpw])
-        self.assertNotIn(stpw, keywords)
-
-    def test_keybert(self):
-        text = (
-            "อาหาร หมายถึง ของแข็งหรือของเหลว "
-            "ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว "
-            "จะทำให้เกิดพลังงานและความร้อนแก่ร่างกาย "
-            "ทำให้ร่างกายเจริญเติบโต "
-            "ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย "
-            "ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ "
-            "อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"
-        )
-
-        from pythainlp.summarize.keybert import KeyBERT
-        from pythainlp.tokenize import word_tokenize
-
-        keybert = KeyBERT()
-        # test ngram range
-        ng_ranges = [(1, 1), (1, 2), (2, 2), (3, 3)]
-        for ng_min, ng_max in ng_ranges:
-            keywords = keybert.extract_keywords(
-                text, keyphrase_ngram_range=(ng_min, ng_max)
-            )
-
-            for kw in keywords:
-                self.assertTrue(ng_min <= len(word_tokenize(kw)) <= ng_max)
-
-        # test max_keywords
-        max_kws = 10
-        keywords = keybert.extract_keywords(text, max_keywords=max_kws)
-        self.assertLessEqual(len(keywords), max_kws)
-
-        text_short = "เฮลโหล"
-        keywords = keybert.extract_keywords(text_short, max_keywords=max_kws)
-        self.assertLessEqual(len(keywords), max_kws)
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 5a12c45ac..dc0d436f6 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -21,7 +21,7 @@
 from pythainlp.tag.thainer import ThaiNameTagger
 
 
-class TestTagPackage(unittest.TestCase):
+class TagTestCase(unittest.TestCase):
     # ### pythainlp.tag.PerceptronTagger
 
     def test_chunk_parse(self):
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 0d6026168..14df6b394 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -7,198 +7,204 @@
 from pythainlp.tokenize import (
     DEFAULT_WORD_DICT_TRIE,
     Tokenizer,
-    attacut,
-    deepcut,
     etcc,
     longest,
     multi_cut,
-    nercut,
     newmm,
-    oskut,
-    paragraph_tokenize,
-    pyicu,
-    sefr_cut,
     sent_tokenize,
-    ssg,
     subword_tokenize,
     syllable_tokenize,
     tcc,
     tcc_p,
-    tltk,
     word_detokenize,
     word_tokenize,
 )
-from pythainlp.tokenize import clause_tokenize as sent_clause_tokenize
 from pythainlp.util import dict_trie
 
+TEXT_1 = "หมอนทองตากลมหูว์MBK39 :.ฉฺ๐๐๓-#™±"
+TEXT_2 = "ทดสอบ"
+
+LONG_TEXT = (
+    "ไต้หวัน (แป่ะเอ๋ยี้: Tâi-oân; ไต่อวัน) หรือ ไถวาน "
+    "(อักษรโรมัน: Taiwan; จีนตัวย่อ: 台湾; จีนตัวเต็ม: 臺灣/台灣; พินอิน: "
+    "Táiwān; ไถวาน) หรือชื่อทางการว่า สาธารณรัฐจีน (จีนตัวย่อ: 中华民国; "
+    "จีนตัวเต็ม: 中華民國; พินอิน: Zhōnghuá "
+    "Mínguó) เป็นรัฐในทวีปเอเชียตะวันออก[7][8][9] ปัจจุบันประกอบด้วย"
+    "เกาะใหญ่ 5 แห่ง คือ จินเหมิน (金門), ไต้หวัน, เผิงหู (澎湖), หมาจู่ "
+    "(馬祖), และอูชิว (烏坵) กับทั้งเกาะเล็กเกาะน้อยอีกจำนวนหนึ่ง "
+    'ท้องที่ดังกล่าวเรียกรวมกันว่า "พื้นที่ไต้หวัน" (臺灣地區)\n'
+    "ไต้หวันด้านตะวันตกติดกับจีนแผ่นดินใหญ่ ด้านตะวันออกและตะวันออก"
+    "เฉียงเหนือติดกับญี่ปุ่น และด้านใต้ติดกับฟิลิปปินส์ กรุงไทเปเป็น"
+    "เมืองหลวง ส่วนไทเปใหม่เป็นเขตปกครองที่จัดตั้งขึ้นใหม่ กินพื้นที่"
+    "กรุงไทเปและเป็นเขตซึ่งประชากรหนาแน่นที่สุดในเวลานี้\n"
+    "เกาะไต้หวันเดิมเป็นที่อยู่ของชนพื้นเมือง และมีชาวจีนจากแผ่นดิน"
+    "ใหญ่เข้ามาอาศัยร่วมด้วย จนกระทั่งชาววิลันดาและสเปนเดินทางเข้า"
+    "มาในยุคสำรวจเมื่อศตวรรษที่ 17 และมาตั้งบ้านเรือนกลายเป็นนิคม"
+    "ใหญ่โต ต่อมาปี 1662 ราชวงศ์หมิงในแผ่นดินใหญ่ถูกราชวงศ์ชิงแทนที่ "
+    "เจิ้ง เฉิงกง (鄭成功) ขุนศึกหมิง รวมกำลังหนีมาถึงเกาะไต้หวัน "
+    "และรุกไล่ฝรั่งออกไปได้อย่างราบคาบ เขาจึงตั้งราชอาณาจักรตงหนิง "
+    '(東寧) ขึ้นบนเกาะเพื่อ "โค่นชิงฟื้นหมิง" แต่ในปี 1683 ราชวงศ์'
+    "ชิงปราบปรามอาณาจักรตงหนิงและเข้าครอบครองไต้หวันเป็นผลสำเร็จ "
+    "ไต้หวันจึงกลายเป็นมณฑลหนึ่งของจีน อย่างไรก็ดี ความบาดหมางระหว่าง"
+    "จีนกับญี่ปุ่นเป็นเหตุให้ญี่ปุ่นได้ไต้หวันไปในปี 1895\n"
+    "ก่อนเสียไต้หวันคืนแก่จีนหลังสงครามโลกครั้งที่สอง ช่วงนั้น มีการ"
+    "เปลี่ยนแปลงการปกครองในจีน พรรคก๊กมินตั๋ง ได้เป็นใหญ่ "
+    "แต่ไม่นานก็เสียทีให้แก่พรรคคอมมิวนิสต์จีน พรรคก๊กมินตั๋งจึงหนี"
+    "มายังเกาะไต้หวันและสถาปนาสาธารณรัฐจีนขึ้นบนเกาะแยกต่างหาก "
+    "ส่วนฝ่ายคอมมิวนิสต์จีนที่เป็นฝ่ายได้รับชัยชนะได้สถาปนาสาธารณรัฐ"
+    "ประชาชนจีนบนแผ่นดินใหญ่ อย่างไรก็ดี จีนยังคงถือว่า ไต้หวันเป็น"
+    "มณฑลหนึ่งของตน และไต้หวันเองก็ยังมิได้รับการยอมรับจากนานาชาติ"
+    "ว่าเป็นประเทศเอกราชมาจนบัดนี้\n"
+    "ในช่วงทศวรรษ 1980 ถึงต้นทศวรรษ 1990 การเมืองการปกครอง"
+    "สาธารณรัฐจีน (ไต้หวัน) เจริญรุ่งเรืองจนเป็นประชาธิปไตยที่มีพรรค"
+    "การเมืองหลายพรรคและมีการเลือกตั้งทั่วหน้า ในช่วงกลางศตวรรษที่ "
+    "20 เศรษฐกิจไต้หวันงอกงามอย่างรวดเร็ว ไต้หวันจึงกลายเป็นประเทศ"
+    "พัฒนาแล้ว ได้ชื่อว่าเป็นหนึ่งในสี่เสือแห่งเอเชีย มีอุตสาหกรรม"
+    "ล้ำหน้า และมีเศรษฐกิจใหญ่โตเป็นอันดับที่ 19 ของโลก[11][12] "
+    "อุตสาหกรรมที่ใช้เทคโนโลยีชั้นสูงของไต้หวันยังมีบทบาทสำคัญมากใน"
+    "เศรษฐกิจโลก เป็นเหตุให้ไต้หวันได้เป็นสมาชิกองค์การการค้าโลกและ"
+    "ความร่วมมือทางเศรษฐกิจเอเชีย-แปซิฟิก เสรีภาพของสื่อมวลชน เสรี"
+    "ภาพทางเศรษฐกิจ การสาธารณสุข[13]การศึกษา และดัชนีการพัฒนามนุษย์ใน"
+    "ไต้หวันยังได้รับการจัดอยู่ในอันดับสูงด้วย[14][4][15]\n"
+    "สาธารณรัฐจีน มีลักษณะเป็นกลุ่มเกาะ ภูมิประเทศติดกับทะเล ไม่ติด"
+    "กับประเทศใดเลย ห่างจากเกาะทางทิศเหนือและทิศตะวันตกเป็นสาธารณรัฐ"
+    "ประชาชนจีน ทิศใต้เป็นประเทศฟิลิปปินส์และทะเลจีนใต้ ส่วนทิศ"
+    "ตะวันออกเป็นมหาสมุทรแปซิฟิก\n"
+    "ในปี ค.ศ. 1638 หลังการพ่ายแพ้ของหลานชายของเจิ้ง เฉิงกง "
+    "จากการบุกโจมตีทางทัพเรือของราชวงศ์ชิงแมนจูที่นำทัพโดยชื่อ หลาง"
+    "จากทางใต้ของมณฑลฝูเจี้ยน ทำให้ราชวงศ์ชิงผนวกยึดเกาะไต้หวันเป็น"
+    "ส่วนหนึ่งสำเร็จ และวางไว้ภายใต้เขตอำนาจของมณฑลฝูเจี้ยน ราชสำนัก"
+    "ราชวงศ์ชิงพยายามลดการละเมิดสิทธิ์และความไม่ลงรอยกันในพื้นที่โดย"
+    "ออกกฎหมายเพื่อจัดการตรวจคนเข้าเมืองและเคารพสิทธิในที่ดินของชน"
+    "พื้นเมืองไต้หวัน ผู้อพยพจากฝูเจี้ยนทางใต้ส่วนใหญ่ยังคงเดินทางไป"
+    "ไต้หวัน เขตแดนระหว่างดินแดนที่เสียภาษีและสิ่งที่ถูกพิจารณาว่า"
+    'เป็นดินแดน "เขตอันตราย" เปลี่ยนไปทางทิศตะวันออกโดยชาวพื้นเมือง'
+    "บางคนเข้ารีตรับวัฒนธรรมแบบจีน ในขณะที่คนอื่นถอยกลับเข้าในภูเขา "
+    "ในช่วงเวลานี้มีความขัดแย้งจำนวนมากระหว่างกลุ่มชาวฮั่นด้วยกันเอง"
+    "จากภูมิภาคต่าง ๆ ของฝูเจี้ยนทางใต้โดยเฉพาะอย่างยิ่งระหว่างเฉวียน"
+    "โจวกับฉางโจว และระหว่างฝูเจี้ยนตอนใต้และชาวพื้นเมืองไต้หวัน\n"
+    "พ.ศ. 2454 (ค.ศ. 1911) การจลาจลอู่ฮั่นในประเทศจีน เป็นจุดเริ่มต้น"
+    "การล่มสลายของราชวงศ์ชิง เมื่อพรรคคอมมิวนิสต์จีนเข้ามีอำนาจในจีน"
+    "แผ่นดินใหญ่เมื่อ พ.ศ. 2492 (1949) พรรคก๊กมินตั๋ง พรรคการเมือง"
+    "ชาตินิยมของจีนที่เป็นฝ่ายแพ้ก็พาผู้คนอพยพหนีออกจากแผ่นดินใหญ่มา"
+    "ตั้งหลักที่ไต้หวัน เพื่อวางแผนกลับไปครองอำนาจในจีนต่อไป\n"
+    "ชาวจีนมากกว่า 1 ล้าน 5 แสนคน อพยพตามมาอยู่ที่เกาะไต้หวันในยุคที่"
+    "เหมา เจ๋อตง มีอำนาจเต็มที่ในจีนแผ่นดินใหญ่ ผู้นำของประเทศทั้งสอง"
+    "จีนคือผู้นำพรรคคอมมิวนิสต์กับผู้นำสาธารณรัฐจีนบนเกาะไต้หวัน แย่ง"
+    "กันเป็นกระบอกเสียงของประชาชนจีนในเวทีโลก แต่เสียงของนานาประเทศ"
+    "ส่วนใหญ่เกรงอิทธิพลของจีนแผ่นดินใหญ่ จึงให้การยอมรับจีนแผ่นดิน"
+    "ใหญ่มากกว่า\n"
+    "ในปี พ.ศ. 2514 (ค.ศ. 1971) ก่อนที่นายพล เจียง ไคเช็ก"
+    "(ภาษาจีน: 蔣中正) จะถึงอสัญกรรมไม่กี่ปี สาธารณรัฐจีนซึ่งเป็น"
+    "ประเทศที่ร่วมก่อตั้งองค์การสหประชาชาติได้สูญเสียสมาชิกภาพใน"
+    "ฐานะตัวแทนชาวจีนให้กับสาธารณรัฐประชาชนจีน ในปี พ.ศ. 2521 (1978)"
+    "สหประชาชาติประกาศรับรองจีนเดียวคือจีนแผ่นดินใหญ่และตัดสัมพันธ์"
+    "ทางการเมืองกับสาธารณรัฐจีน ทั้งสหรัฐอเมริกาก็ได้ถอนการรับรองว่า"
+    "สาธารณรัฐจีนมีฐานะเป็นรัฐ ไต้หวันจึงกลายเป็นเพียงดินแดนที่จีน"
+    "อ้างว่าเป็นส่วนหนึ่งของสาธารณรัฐประชาชนจีนตั้งแต่นั้นเป็นต้นมา\n"
+    "เมื่อเจียง ไคเช็ก ถึงแก่อสัญกรรมในปี พ.ศ. 2518 (1975) ลูกชาย"
+    "ที่ชื่อ เจี่ยง จิงกั๋ว ได้เป็นผู้สืบทอดการปกครอง"
+    "ไต้หวันต่อและเริ่มกระบวนการ วางรากฐานไปสู่ประชาธิปไตย\n"
+    "หลังจากที่ประธานาธิบดี เจียง จิงกั๋ว เสียชีวิต ไต้หวันจึงได้เข้า"
+    "สู่ระบอบประชาธิปไตยเต็มรูปแบบ ประธานาธิบดีคนใหม่ ซึ่งเกิดใน"
+    "ไต้หวัน ชื่อ หลี่ เติงฮุย ขึ้นบริหารประเทศ โดยการสนับสนุนของ"
+    "เจี่ยง จิงกั๋ว ทั้งที่ หลี่ เติงฮุย นั้นเคลื่อนไหว"
+    "สนับสนุนเอกราชไต้หวัน นาย รัฐบาลจีนที่ปักกิ่งได้ตั้ง"
+    'ฉายาประธานาธิบดีไต้หวันคนใหม่ว่า "จิ้งจกปากหวาน" '
+    "ช่วงเวลาที่นายหลี่ เติงฮุย เป็นประธานาธิบดี การเมืองของไต้หวัน"
+    "เกิดการแตกแยกออกเป็น 3 ฝ่ายคือ 1) พวกก๊กมินตั๋ง ที่ต้องการกลับ"
+    "ไปรวมประเทศกับจีนแผ่นดินใหญ่ (รวมจีนแผ่นดินใหญ่ภายใต้การปกครอง"
+    "ของสาธารณรัฐจีน) 2) พวกที่ต้องการให้ไต้หวันเป็นประเทศอิสระไม่"
+    "เกี่ยวข้องกับจีนแผ่นดินใหญ่ และ 3) พวกที่ต้องการดำรงฐานะของ"
+    "ประเทศไว้ดังเดิมต่อไป\n"
+    "ไต้หวันกับจีนแผ่นดินใหญ่นัดเจรจาหาทางออกของข้อขัดแย้งทางการเมือง"
+    "ครั้งแรกที่สิงคโปร์เมื่อปี พ.ศ. 2536 (ค.ศ. 1993) แต่ปรากฏว่าจีน"
+    "แผ่นดินใหญ่ประวิงเวลาลงนามในสัญญาหลายฉบับที่เป็นข้อตกลงร่วมกัน "
+    "ทำให้ผลการเจรจาคราวนั้นไม่ก้าวหน้าไปถึงไหน ความสัมพันธ์ระหว่าง"
+    "สองจีนเลวร้ายลงทุกที เมื่อประธานาธิบดี หลี่ เติงฮุย เดินทางไป"
+    "เยือนสหรัฐอเมริกาและได้รับการยอมรับอย่างเอิกเกริก ทำให้จีนแผ่น"
+    "ดินใหญ่ไม่พอใจอย่างมาก จึงข่มขวัญไต้หวันกับประเทศที่ให้การสนับ"
+    "สนุนไต้หวัน ด้วยการทำการซ้อมรบขึ้นใกล้ ๆ เกาะไต้หวัน สหรัฐ"
+    "อเมริกาออกมาแสดงอาการปกป้องคุ้มครองไต้หวันด้วยการส่งกำลังกอง"
+    "เรือรบของสหรัฐฯ มาป้วนเปี้ยนอยู่ในน่านน้ำที่จีนซ้อมรบ\n"
+    "ขณะที่โลกกำลังล่อแหลมกับสถานการณ์ที่ตึงเครียดในน่านน้ำจีนมาก"
+    "ขึ้นทุกทีนั้น ไต้หวันก็จัดให้มีการเลือกตั้งครั้งใหม่ และในการ"
+    "เลือกตั้งครั้งใหม่นั้นเอง ไต้หวันก็ได้นายหลี่ เติงฮุย เป็น"
+    "ประธานาธิบดีอีกครั้ง\n"
+    "ไต้หวันเข้าสู่สภาวะวิกฤต เมื่อเกิดแผ่นดินไหวครั้งร้ายแรงที่สุดใน"
+    "ประวัติศาสตร์ในเดือนกันยายน พ.ศ. 2542 (ค.ศ. 1999) ทำให้ประชากร"
+    "ส่วนมากที่เป็นชาวพื้นเมืองเสียชีวิตไป 2,000 คน ทั้งเมืองมีแต่"
+    "เศษซากปรักหักพังจากภัยธรรมชาติ และช่วงนี้ไต้หวันต้องเผชิญความ"
+    "ยากลำบาก จีนแผ่นดินใหญ่ก็เพิ่มความกดดันไม่ให้นานาชาติ"
+    "เข้ามายุ่งเกี่ยวกับไต้หวันแม้ในยามคับขันเช่นนี้ โดยประกาศว่า "
+    "หากมีประเทศใดจะเข้าไปให้ความช่วยเหลือไต้หวัน จะต้องได้รับอนุญาต"
+    "จากจีนก่อน ซึ่งคำประกาศของจีนแผ่นดินใหญ่สวนทางกับเมตตาธรรมของ"
+    "ประเทศทั่วโลกที่ต้องการให้ความช่วยเหลือไต้หวัน\n"
+    "เดือนมีนาคม พ.ศ. 2543 (ค.ศ. 2000) มีการเลือกตั้งใหม่ในไต้หวัน "
+    "ชาวไต้หวันเลือกผู้แทนจากพรรคประชาธิปไตยก้าวหน้า คือ นายเฉิน สุย"
+    "เปี่ยน เป็นประธานาธิบดีคนใหม่ของไต้หวัน ผู้ประกาศนโยบายการเมือง"
+    "แข็งกร้าวว่าไต้หวันต้องการแยกตัวเป็นอิสระจากจีนแผ่นดินใหญ่ ยุติ"
+    "ยุคของพรรคชาตินิยมที่ยังฝักใฝ่แผ่นดินใหญ่อยู่ จีนแผ่นดินใหญ่จึง"
+    "ถือว่าเป็นกบฏต่อการปกครองของจีน เพราะแต่ไหนแต่ไร ไต้หวันไม่เคย"
+    "ประกาศอย่างเป็นทางการว่าเป็นประเทศอิสระแยกจากจีน และจีนพูดอยู่"
+    "เสมอว่าไต้หวันเป็นเด็กในปกครองที่ค่อนข้างจะหัวดื้อและเกเร หาก"
+    "ไต้หวันประกาศว่าเป็นอิสระจากจีนเมื่อใด จีนก็จะยกกำลังจัดการ"
+    "กับไต้หวันทันที\n"
+    "ในขณะที่ความสัมพันธ์ทางการเมืองระหว่างสองจีนในสายตาชาวโลก"
+    "เลวร้ายลง จีนทั้งสองกลับมีการติดต่อทางการค้ากันมากขึ้น มีการ"
+    "ผ่อนปรนอนุญาตให้ชาวไต้หวันเดินทางไปจีนแผ่นดินใหญ่เพื่อเยี่ยม"
+    "ญาติได้ เกิดปรากฏการณ์สำคัญคือนักธุรกิจไต้หวันหอบเงินทุนกว่า "
+    "20,000 ล้านดอลลาร์สหรัฐ ไปลงทุนดำเนินธุรกิจทางตอนใต้ของจีน"
+    "แผ่นดินใหญ่ จนกระทั่งขณะนี้ชาวไต้หวันกลายเป็นนักลงทุนรายใหญ่"
+    "เป็นลำดับ 2 ของจีน\n"
+    "วันที่ 24 พฤษภาคม 2560 ศาลรัฐธรรมนูญวินิจฉัยว่ากฎหมายสมรส"
+    "ปัจจุบันในเวลานั้น ละเมิดรัฐธรรมนูญ โดยปฏิเสธสิทธิสมรสของคู่รัก"
+    "เพศเดียวกันชาวไต้หวัน ศาลวินิจฉัยว่าหากสภานิติบัญญัติไม่ผ่าน"
+    "การแก้ไขกฎหมายที่เพียงพอต่อกฎหมายสมรสของไต้หวันภายในสองปี "
+    "การสมรสเพศเดียวกันจะชอบด้วยกฎหมายโดยอัตโนมัติในไต้หวัน[17] "
+    "วันที่ 17 พฤษภาคม 2562 สภานิติบัญญัติไต้หวันอนุมัติ"
+    "ร่างกฎหมายทำให้การสมรสเพศเดียวกันชอบด้วยกฎหมาย"
+    " ทำให้เป็นประเทศแรกในทวีปเอเชียที่ผ่านกฎหมายดังกล่าว[18][19]"
+)
 
-class TestTokenizePackage(unittest.TestCase):
-    def setUp(self):
-        self.text_1 = "หมอนทองตากลมหูว์MBK39 :.ฉฺ๐๐๓-#™±"
-        self.text_2 = "ทดสอบ"
+DANGER_TEXT_1 = (
+    "ชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิ"
+    "ชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิ"
+    "ชิชิชิชิชิชิชิชิชิ"
+)
 
-        self.long_text = (
-            "ไต้หวัน (แป่ะเอ๋ยี้: Tâi-oân; ไต่อวัน) หรือ ไถวาน "
-            "(อักษรโรมัน: Taiwan; จีนตัวย่อ: 台湾; จีนตัวเต็ม: 臺灣/台灣; พินอิน: "
-            "Táiwān; ไถวาน) หรือชื่อทางการว่า สาธารณรัฐจีน (จีนตัวย่อ: 中华民国; "
-            "จีนตัวเต็ม: 中華民國; พินอิน: Zhōnghuá "
-            "Mínguó) เป็นรัฐในทวีปเอเชียตะวันออก[7][8][9] ปัจจุบันประกอบด้วย"
-            "เกาะใหญ่ 5 แห่ง คือ จินเหมิน (金門), ไต้หวัน, เผิงหู (澎湖), หมาจู่ "
-            "(馬祖), และอูชิว (烏坵) กับทั้งเกาะเล็กเกาะน้อยอีกจำนวนหนึ่ง "
-            'ท้องที่ดังกล่าวเรียกรวมกันว่า "พื้นที่ไต้หวัน" (臺灣地區)\n'
-            "ไต้หวันด้านตะวันตกติดกับจีนแผ่นดินใหญ่ ด้านตะวันออกและตะวันออก"
-            "เฉียงเหนือติดกับญี่ปุ่น และด้านใต้ติดกับฟิลิปปินส์ กรุงไทเปเป็น"
-            "เมืองหลวง ส่วนไทเปใหม่เป็นเขตปกครองที่จัดตั้งขึ้นใหม่ กินพื้นที่"
-            "กรุงไทเปและเป็นเขตซึ่งประชากรหนาแน่นที่สุดในเวลานี้\n"
-            "เกาะไต้หวันเดิมเป็นที่อยู่ของชนพื้นเมือง และมีชาวจีนจากแผ่นดิน"
-            "ใหญ่เข้ามาอาศัยร่วมด้วย จนกระทั่งชาววิลันดาและสเปนเดินทางเข้า"
-            "มาในยุคสำรวจเมื่อศตวรรษที่ 17 และมาตั้งบ้านเรือนกลายเป็นนิคม"
-            "ใหญ่โต ต่อมาปี 1662 ราชวงศ์หมิงในแผ่นดินใหญ่ถูกราชวงศ์ชิงแทนที่ "
-            "เจิ้ง เฉิงกง (鄭成功) ขุนศึกหมิง รวมกำลังหนีมาถึงเกาะไต้หวัน "
-            "และรุกไล่ฝรั่งออกไปได้อย่างราบคาบ เขาจึงตั้งราชอาณาจักรตงหนิง "
-            '(東寧) ขึ้นบนเกาะเพื่อ "โค่นชิงฟื้นหมิง" แต่ในปี 1683 ราชวงศ์'
-            "ชิงปราบปรามอาณาจักรตงหนิงและเข้าครอบครองไต้หวันเป็นผลสำเร็จ "
-            "ไต้หวันจึงกลายเป็นมณฑลหนึ่งของจีน อย่างไรก็ดี ความบาดหมางระหว่าง"
-            "จีนกับญี่ปุ่นเป็นเหตุให้ญี่ปุ่นได้ไต้หวันไปในปี 1895\n"
-            "ก่อนเสียไต้หวันคืนแก่จีนหลังสงครามโลกครั้งที่สอง ช่วงนั้น มีการ"
-            "เปลี่ยนแปลงการปกครองในจีน พรรคก๊กมินตั๋ง ได้เป็นใหญ่ "
-            "แต่ไม่นานก็เสียทีให้แก่พรรคคอมมิวนิสต์จีน พรรคก๊กมินตั๋งจึงหนี"
-            "มายังเกาะไต้หวันและสถาปนาสาธารณรัฐจีนขึ้นบนเกาะแยกต่างหาก "
-            "ส่วนฝ่ายคอมมิวนิสต์จีนที่เป็นฝ่ายได้รับชัยชนะได้สถาปนาสาธารณรัฐ"
-            "ประชาชนจีนบนแผ่นดินใหญ่ อย่างไรก็ดี จีนยังคงถือว่า ไต้หวันเป็น"
-            "มณฑลหนึ่งของตน และไต้หวันเองก็ยังมิได้รับการยอมรับจากนานาชาติ"
-            "ว่าเป็นประเทศเอกราชมาจนบัดนี้\n"
-            "ในช่วงทศวรรษ 1980 ถึงต้นทศวรรษ 1990 การเมืองการปกครอง"
-            "สาธารณรัฐจีน (ไต้หวัน) เจริญรุ่งเรืองจนเป็นประชาธิปไตยที่มีพรรค"
-            "การเมืองหลายพรรคและมีการเลือกตั้งทั่วหน้า ในช่วงกลางศตวรรษที่ "
-            "20 เศรษฐกิจไต้หวันงอกงามอย่างรวดเร็ว ไต้หวันจึงกลายเป็นประเทศ"
-            "พัฒนาแล้ว ได้ชื่อว่าเป็นหนึ่งในสี่เสือแห่งเอเชีย มีอุตสาหกรรม"
-            "ล้ำหน้า และมีเศรษฐกิจใหญ่โตเป็นอันดับที่ 19 ของโลก[11][12] "
-            "อุตสาหกรรมที่ใช้เทคโนโลยีชั้นสูงของไต้หวันยังมีบทบาทสำคัญมากใน"
-            "เศรษฐกิจโลก เป็นเหตุให้ไต้หวันได้เป็นสมาชิกองค์การการค้าโลกและ"
-            "ความร่วมมือทางเศรษฐกิจเอเชีย-แปซิฟิก เสรีภาพของสื่อมวลชน เสรี"
-            "ภาพทางเศรษฐกิจ การสาธารณสุข[13]การศึกษา และดัชนีการพัฒนามนุษย์ใน"
-            "ไต้หวันยังได้รับการจัดอยู่ในอันดับสูงด้วย[14][4][15]\n"
-            "สาธารณรัฐจีน มีลักษณะเป็นกลุ่มเกาะ ภูมิประเทศติดกับทะเล ไม่ติด"
-            "กับประเทศใดเลย ห่างจากเกาะทางทิศเหนือและทิศตะวันตกเป็นสาธารณรัฐ"
-            "ประชาชนจีน ทิศใต้เป็นประเทศฟิลิปปินส์และทะเลจีนใต้ ส่วนทิศ"
-            "ตะวันออกเป็นมหาสมุทรแปซิฟิก\n"
-            "ในปี ค.ศ. 1638 หลังการพ่ายแพ้ของหลานชายของเจิ้ง เฉิงกง "
-            "จากการบุกโจมตีทางทัพเรือของราชวงศ์ชิงแมนจูที่นำทัพโดยชื่อ หลาง"
-            "จากทางใต้ของมณฑลฝูเจี้ยน ทำให้ราชวงศ์ชิงผนวกยึดเกาะไต้หวันเป็น"
-            "ส่วนหนึ่งสำเร็จ และวางไว้ภายใต้เขตอำนาจของมณฑลฝูเจี้ยน ราชสำนัก"
-            "ราชวงศ์ชิงพยายามลดการละเมิดสิทธิ์และความไม่ลงรอยกันในพื้นที่โดย"
-            "ออกกฎหมายเพื่อจัดการตรวจคนเข้าเมืองและเคารพสิทธิในที่ดินของชน"
-            "พื้นเมืองไต้หวัน ผู้อพยพจากฝูเจี้ยนทางใต้ส่วนใหญ่ยังคงเดินทางไป"
-            "ไต้หวัน เขตแดนระหว่างดินแดนที่เสียภาษีและสิ่งที่ถูกพิจารณาว่า"
-            'เป็นดินแดน "เขตอันตราย" เปลี่ยนไปทางทิศตะวันออกโดยชาวพื้นเมือง'
-            "บางคนเข้ารีตรับวัฒนธรรมแบบจีน ในขณะที่คนอื่นถอยกลับเข้าในภูเขา "
-            "ในช่วงเวลานี้มีความขัดแย้งจำนวนมากระหว่างกลุ่มชาวฮั่นด้วยกันเอง"
-            "จากภูมิภาคต่าง ๆ ของฝูเจี้ยนทางใต้โดยเฉพาะอย่างยิ่งระหว่างเฉวียน"
-            "โจวกับฉางโจว และระหว่างฝูเจี้ยนตอนใต้และชาวพื้นเมืองไต้หวัน\n"
-            "พ.ศ. 2454 (ค.ศ. 1911) การจลาจลอู่ฮั่นในประเทศจีน เป็นจุดเริ่มต้น"
-            "การล่มสลายของราชวงศ์ชิง เมื่อพรรคคอมมิวนิสต์จีนเข้ามีอำนาจในจีน"
-            "แผ่นดินใหญ่เมื่อ พ.ศ. 2492 (1949) พรรคก๊กมินตั๋ง พรรคการเมือง"
-            "ชาตินิยมของจีนที่เป็นฝ่ายแพ้ก็พาผู้คนอพยพหนีออกจากแผ่นดินใหญ่มา"
-            "ตั้งหลักที่ไต้หวัน เพื่อวางแผนกลับไปครองอำนาจในจีนต่อไป\n"
-            "ชาวจีนมากกว่า 1 ล้าน 5 แสนคน อพยพตามมาอยู่ที่เกาะไต้หวันในยุคที่"
-            "เหมา เจ๋อตง มีอำนาจเต็มที่ในจีนแผ่นดินใหญ่ ผู้นำของประเทศทั้งสอง"
-            "จีนคือผู้นำพรรคคอมมิวนิสต์กับผู้นำสาธารณรัฐจีนบนเกาะไต้หวัน แย่ง"
-            "กันเป็นกระบอกเสียงของประชาชนจีนในเวทีโลก แต่เสียงของนานาประเทศ"
-            "ส่วนใหญ่เกรงอิทธิพลของจีนแผ่นดินใหญ่ จึงให้การยอมรับจีนแผ่นดิน"
-            "ใหญ่มากกว่า\n"
-            "ในปี พ.ศ. 2514 (ค.ศ. 1971) ก่อนที่นายพล เจียง ไคเช็ก"
-            "(ภาษาจีน: 蔣中正) จะถึงอสัญกรรมไม่กี่ปี สาธารณรัฐจีนซึ่งเป็น"
-            "ประเทศที่ร่วมก่อตั้งองค์การสหประชาชาติได้สูญเสียสมาชิกภาพใน"
-            "ฐานะตัวแทนชาวจีนให้กับสาธารณรัฐประชาชนจีน ในปี พ.ศ. 2521 (1978)"
-            "สหประชาชาติประกาศรับรองจีนเดียวคือจีนแผ่นดินใหญ่และตัดสัมพันธ์"
-            "ทางการเมืองกับสาธารณรัฐจีน ทั้งสหรัฐอเมริกาก็ได้ถอนการรับรองว่า"
-            "สาธารณรัฐจีนมีฐานะเป็นรัฐ ไต้หวันจึงกลายเป็นเพียงดินแดนที่จีน"
-            "อ้างว่าเป็นส่วนหนึ่งของสาธารณรัฐประชาชนจีนตั้งแต่นั้นเป็นต้นมา\n"
-            "เมื่อเจียง ไคเช็ก ถึงแก่อสัญกรรมในปี พ.ศ. 2518 (1975) ลูกชาย"
-            "ที่ชื่อ เจี่ยง จิงกั๋ว ได้เป็นผู้สืบทอดการปกครอง"
-            "ไต้หวันต่อและเริ่มกระบวนการ วางรากฐานไปสู่ประชาธิปไตย\n"
-            "หลังจากที่ประธานาธิบดี เจียง จิงกั๋ว เสียชีวิต ไต้หวันจึงได้เข้า"
-            "สู่ระบอบประชาธิปไตยเต็มรูปแบบ ประธานาธิบดีคนใหม่ ซึ่งเกิดใน"
-            "ไต้หวัน ชื่อ หลี่ เติงฮุย ขึ้นบริหารประเทศ โดยการสนับสนุนของ"
-            "เจี่ยง จิงกั๋ว ทั้งที่ หลี่ เติงฮุย นั้นเคลื่อนไหว"
-            "สนับสนุนเอกราชไต้หวัน นาย รัฐบาลจีนที่ปักกิ่งได้ตั้ง"
-            'ฉายาประธานาธิบดีไต้หวันคนใหม่ว่า "จิ้งจกปากหวาน" '
-            "ช่วงเวลาที่นายหลี่ เติงฮุย เป็นประธานาธิบดี การเมืองของไต้หวัน"
-            "เกิดการแตกแยกออกเป็น 3 ฝ่ายคือ 1) พวกก๊กมินตั๋ง ที่ต้องการกลับ"
-            "ไปรวมประเทศกับจีนแผ่นดินใหญ่ (รวมจีนแผ่นดินใหญ่ภายใต้การปกครอง"
-            "ของสาธารณรัฐจีน) 2) พวกที่ต้องการให้ไต้หวันเป็นประเทศอิสระไม่"
-            "เกี่ยวข้องกับจีนแผ่นดินใหญ่ และ 3) พวกที่ต้องการดำรงฐานะของ"
-            "ประเทศไว้ดังเดิมต่อไป\n"
-            "ไต้หวันกับจีนแผ่นดินใหญ่นัดเจรจาหาทางออกของข้อขัดแย้งทางการเมือง"
-            "ครั้งแรกที่สิงคโปร์เมื่อปี พ.ศ. 2536 (ค.ศ. 1993) แต่ปรากฏว่าจีน"
-            "แผ่นดินใหญ่ประวิงเวลาลงนามในสัญญาหลายฉบับที่เป็นข้อตกลงร่วมกัน "
-            "ทำให้ผลการเจรจาคราวนั้นไม่ก้าวหน้าไปถึงไหน ความสัมพันธ์ระหว่าง"
-            "สองจีนเลวร้ายลงทุกที เมื่อประธานาธิบดี หลี่ เติงฮุย เดินทางไป"
-            "เยือนสหรัฐอเมริกาและได้รับการยอมรับอย่างเอิกเกริก ทำให้จีนแผ่น"
-            "ดินใหญ่ไม่พอใจอย่างมาก จึงข่มขวัญไต้หวันกับประเทศที่ให้การสนับ"
-            "สนุนไต้หวัน ด้วยการทำการซ้อมรบขึ้นใกล้ ๆ เกาะไต้หวัน สหรัฐ"
-            "อเมริกาออกมาแสดงอาการปกป้องคุ้มครองไต้หวันด้วยการส่งกำลังกอง"
-            "เรือรบของสหรัฐฯ มาป้วนเปี้ยนอยู่ในน่านน้ำที่จีนซ้อมรบ\n"
-            "ขณะที่โลกกำลังล่อแหลมกับสถานการณ์ที่ตึงเครียดในน่านน้ำจีนมาก"
-            "ขึ้นทุกทีนั้น ไต้หวันก็จัดให้มีการเลือกตั้งครั้งใหม่ และในการ"
-            "เลือกตั้งครั้งใหม่นั้นเอง ไต้หวันก็ได้นายหลี่ เติงฮุย เป็น"
-            "ประธานาธิบดีอีกครั้ง\n"
-            "ไต้หวันเข้าสู่สภาวะวิกฤต เมื่อเกิดแผ่นดินไหวครั้งร้ายแรงที่สุดใน"
-            "ประวัติศาสตร์ในเดือนกันยายน พ.ศ. 2542 (ค.ศ. 1999) ทำให้ประชากร"
-            "ส่วนมากที่เป็นชาวพื้นเมืองเสียชีวิตไป 2,000 คน ทั้งเมืองมีแต่"
-            "เศษซากปรักหักพังจากภัยธรรมชาติ และช่วงนี้ไต้หวันต้องเผชิญความ"
-            "ยากลำบาก จีนแผ่นดินใหญ่ก็เพิ่มความกดดันไม่ให้นานาชาติ"
-            "เข้ามายุ่งเกี่ยวกับไต้หวันแม้ในยามคับขันเช่นนี้ โดยประกาศว่า "
-            "หากมีประเทศใดจะเข้าไปให้ความช่วยเหลือไต้หวัน จะต้องได้รับอนุญาต"
-            "จากจีนก่อน ซึ่งคำประกาศของจีนแผ่นดินใหญ่สวนทางกับเมตตาธรรมของ"
-            "ประเทศทั่วโลกที่ต้องการให้ความช่วยเหลือไต้หวัน\n"
-            "เดือนมีนาคม พ.ศ. 2543 (ค.ศ. 2000) มีการเลือกตั้งใหม่ในไต้หวัน "
-            "ชาวไต้หวันเลือกผู้แทนจากพรรคประชาธิปไตยก้าวหน้า คือ นายเฉิน สุย"
-            "เปี่ยน เป็นประธานาธิบดีคนใหม่ของไต้หวัน ผู้ประกาศนโยบายการเมือง"
-            "แข็งกร้าวว่าไต้หวันต้องการแยกตัวเป็นอิสระจากจีนแผ่นดินใหญ่ ยุติ"
-            "ยุคของพรรคชาตินิยมที่ยังฝักใฝ่แผ่นดินใหญ่อยู่ จีนแผ่นดินใหญ่จึง"
-            "ถือว่าเป็นกบฏต่อการปกครองของจีน เพราะแต่ไหนแต่ไร ไต้หวันไม่เคย"
-            "ประกาศอย่างเป็นทางการว่าเป็นประเทศอิสระแยกจากจีน และจีนพูดอยู่"
-            "เสมอว่าไต้หวันเป็นเด็กในปกครองที่ค่อนข้างจะหัวดื้อและเกเร หาก"
-            "ไต้หวันประกาศว่าเป็นอิสระจากจีนเมื่อใด จีนก็จะยกกำลังจัดการ"
-            "กับไต้หวันทันที\n"
-            "ในขณะที่ความสัมพันธ์ทางการเมืองระหว่างสองจีนในสายตาชาวโลก"
-            "เลวร้ายลง จีนทั้งสองกลับมีการติดต่อทางการค้ากันมากขึ้น มีการ"
-            "ผ่อนปรนอนุญาตให้ชาวไต้หวันเดินทางไปจีนแผ่นดินใหญ่เพื่อเยี่ยม"
-            "ญาติได้ เกิดปรากฏการณ์สำคัญคือนักธุรกิจไต้หวันหอบเงินทุนกว่า "
-            "20,000 ล้านดอลลาร์สหรัฐ ไปลงทุนดำเนินธุรกิจทางตอนใต้ของจีน"
-            "แผ่นดินใหญ่ จนกระทั่งขณะนี้ชาวไต้หวันกลายเป็นนักลงทุนรายใหญ่"
-            "เป็นลำดับ 2 ของจีน\n"
-            "วันที่ 24 พฤษภาคม 2560 ศาลรัฐธรรมนูญวินิจฉัยว่ากฎหมายสมรส"
-            "ปัจจุบันในเวลานั้น ละเมิดรัฐธรรมนูญ โดยปฏิเสธสิทธิสมรสของคู่รัก"
-            "เพศเดียวกันชาวไต้หวัน ศาลวินิจฉัยว่าหากสภานิติบัญญัติไม่ผ่าน"
-            "การแก้ไขกฎหมายที่เพียงพอต่อกฎหมายสมรสของไต้หวันภายในสองปี "
-            "การสมรสเพศเดียวกันจะชอบด้วยกฎหมายโดยอัตโนมัติในไต้หวัน[17] "
-            "วันที่ 17 พฤษภาคม 2562 สภานิติบัญญัติไต้หวันอนุมัติ"
-            "ร่างกฎหมายทำให้การสมรสเพศเดียวกันชอบด้วยกฎหมาย"
-            " ทำให้เป็นประเทศแรกในทวีปเอเชียที่ผ่านกฎหมายดังกล่าว[18][19]"
-        )
+DANGER_TEXT_2 = (
+    "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้าน"
+    "หน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้าน"
+)
 
-        self.danger_text1 = (
-            "ชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิ"
-            "ชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิชิ"
-            "ชิชิชิชิชิชิชิชิชิ"
-        )
+DANGER_TEXT_3 = (
+    "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้า"
+    "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้า"
+    "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้า"
+    "ด้านหน้าด้านหน้าด้านกกกกกก"
+    "กกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกก"
+)
 
-        self.danger_text2 = (
-            "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้าน"
-            "หน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้าน"
-        )
+SENT_1 = "ฉันไปโรงเรียน เธอไปโรงพยาบาล"
+SENT_1_TOKS = ["ฉันไปโรงเรียน ", "เธอไปโรงพยาบาล"]
+SENT_2 = "วันนี้ฉันกินข้าว และโดดเรียน"
+SENT_2_TOKS = ["วันนี้ฉันกินข้าว และโดดเรียน"]
+SENT_3 = (
+    "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา"
+    + "จากผลงานวิจัยที่เคยทำมาในอดีต"
+    + " มิได้ทำการศึกษาค้นคว้าใหม่อย่างกว้างขวางแต่อย่างใด"
+    + " จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้"
+)
+SENT_3_TOKS = [
+    "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา" + "จากผลงานวิจัยที่เคยทำมาในอดีต ",
+    "มิได้ทำการศึกษาค้นคว้าใหม่อย่างกว้างขวางแต่อย่างใด ",
+    "จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้",
+]
+SENT_4 = ["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]
 
-        self.danger_text3 = (
-            "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้า"
-            "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้า"
-            "ด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้าด้านหน้า"
-            "ด้านหน้าด้านหน้าด้านกกกกกก"
-            "กกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกกก"
-        )
 
+class TokenizeTestCase(unittest.TestCase):
     def test_Tokenizer(self):
         _tokenizer = Tokenizer(DEFAULT_WORD_DICT_TRIE)
         self.assertEqual(_tokenizer.word_tokenize(""), [])
@@ -208,11 +214,7 @@ def test_Tokenizer(self):
         _tokenizer = Tokenizer()
         self.assertEqual(_tokenizer.word_tokenize("ก"), ["ก"])
         with self.assertRaises(NotImplementedError):
-            Tokenizer(engine="catcut")
-
-    def test_clause_tokenize(self):
-        self.assertIsNotNone(sent_clause_tokenize(["ฉัน", "ทดสอบ"]))
-        self.assertIsInstance(sent_clause_tokenize(["ฉัน", "ทดสอบ"]), list)
+            Tokenizer(engine="catcut888")
 
     def test_sent_tokenize(self):
         self.assertEqual(sent_tokenize(None), [])
@@ -225,135 +227,33 @@ def test_sent_tokenize(self):
             sent_tokenize("รักน้ำ  รักปลา  ", engine="whitespace+newline"),
             ["รักน้ำ", "รักปลา"],
         )
-
-        sent_1 = "ฉันไปโรงเรียน เธอไปโรงพยาบาล"
-        sent_1_toks = ["ฉันไปโรงเรียน ", "เธอไปโรงพยาบาล"]
-        sent_2 = "วันนี้ฉันกินข้าว และโดดเรียน"
-        sent_2_toks = ["วันนี้ฉันกินข้าว และโดดเรียน"]
-        sent_3 = (
-            "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา"
-            + "จากผลงานวิจัยที่เคยทำมาในอดีต"
-            + " มิได้ทำการศึกษาค้นคว้าใหม่อย่างกว้างขวางแต่อย่างใด"
-            + " จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้"
-        )
-        sent_3_toks = [
-            "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา" + "จากผลงานวิจัยที่เคยทำมาในอดีต ",
-            "มิได้ทำการศึกษาค้นคว้าใหม่อย่างกว้างขวางแต่อย่างใด ",
-            "จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้",
-        ]
-
-        self.assertEqual(
-            sent_tokenize(sent_1, engine="crfcut"),
-            sent_1_toks,
-        )
-        self.assertEqual(
-            sent_tokenize(sent_2, engine="crfcut"),
-            sent_2_toks,
-        )
-        self.assertEqual(
-            sent_tokenize(sent_3, engine="crfcut"),
-            sent_3_toks,
-        )
         self.assertEqual(
-            sent_tokenize(sent_1),
-            sent_1_toks,
+            sent_tokenize(SENT_1),
+            SENT_1_TOKS,
         )
         self.assertEqual(
-            sent_tokenize(sent_2),
-            sent_2_toks,
+            sent_tokenize(SENT_2),
+            SENT_2_TOKS,
         )
         self.assertEqual(
-            sent_tokenize(sent_3),
-            sent_3_toks,
+            sent_tokenize(SENT_3),
+            SENT_3_TOKS,
         )
         self.assertIsNotNone(
             sent_tokenize(
-                sent_1,
+                SENT_1,
                 keep_whitespace=False,
                 engine="whitespace",
             ),
         )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_1,
-                engine="tltk",
-            ),
-        )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_2,
-                engine="tltk",
-            ),
-        )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_3,
-                engine="tltk",
-            ),
-        )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_1,
-                engine="thaisum",
-            ),
-        )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_2,
-                engine="thaisum",
-            ),
-        )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_3,
-                engine="thaisum",
-            ),
-        )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_3,
-                engine="wtp",
-            ),
-        )
-        self.assertIsNotNone(
-            sent_tokenize(
-                sent_3,
-                engine="wtp-tiny",
-            ),
-        )
-        # self.assertIsNotNone(
-        #     sent_tokenize(
-        #         sent_3,
-        #         engine="wtp-base",
-        #     ),
-        # )
-        # self.assertIsNotNone(
-        #     sent_tokenize(
-        #         sent_3,
-        #         engine="wtp-large",
-        #     ),
-        # )
-        sent_4 = ["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]
         self.assertEqual(
-            sent_tokenize(sent_4, engine="crfcut"),
-            [["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]],
-        )
-        self.assertEqual(
-            sent_tokenize(sent_4, engine="whitespace"),
+            sent_tokenize(SENT_4, engine="whitespace"),
             [["ผม", "กิน", "ข้าว"], ["\n", "เธอ", "เล่น", "เกม"]],
         )
-        self.assertEqual(
-            sent_tokenize(sent_4, engine="whitespace+newline"),
-            [["ผม", "กิน", "ข้าว"], ["เธอ", "เล่น", "เกม"]],
-        )
-        self.assertEqual(
-            sent_tokenize(sent_4, engine="thaisum"),
-            [["ผม", "กิน", "ข้าว", " ", "เธอ", "เล่น", "เกม"]],
-        )
         self.assertFalse(
             " "
             in sent_tokenize(
-                sent_1,
+                SENT_1,
                 engine="whitespace",
                 keep_whitespace=False,
             )
@@ -361,17 +261,6 @@ def test_sent_tokenize(self):
         with self.assertRaises(ValueError):
             sent_tokenize("ฉันไป กิน", engine="XX")  # engine does not exist
 
-    def test_paragraph_tokenize(self):
-        sent = (
-            "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา"
-            + "จากผลงานวิจัยที่เคยทำมาในอดีต"
-            + " มิได้ทำการศึกษาค้นคว้าใหม่อย่างกว้างขวางแต่อย่างใด"
-            + " จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้"
-        )
-        self.assertIsNotNone(paragraph_tokenize(sent))
-        with self.assertRaises(ValueError):
-            paragraph_tokenize(sent, engine="ai2+2thai")
-
     def test_subword_tokenize(self):
         self.assertEqual(subword_tokenize(None), [])
         self.assertEqual(subword_tokenize(""), [])
@@ -394,17 +283,6 @@ def test_subword_tokenize(self):
             "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="etcc")
         )
         self.assertIsInstance(subword_tokenize("โควิด19", engine="etcc"), list)
-        self.assertEqual(subword_tokenize(None, engine="wangchanberta"), [])
-        self.assertEqual(subword_tokenize("", engine="wangchanberta"), [])
-        self.assertIsInstance(
-            subword_tokenize("สวัสดิีดาวอังคาร", engine="wangchanberta"), list
-        )
-        self.assertFalse(
-            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="wangchanberta")
-        )
-        self.assertIsInstance(
-            subword_tokenize("โควิด19", engine="wangchanberta"), list
-        )
         self.assertFalse(
             " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
         )
@@ -413,49 +291,9 @@ def test_subword_tokenize(self):
             ["สวัส", "ดี", "ชาว", "โลก"],
         )
         self.assertFalse("า" in subword_tokenize("สวัสดีชาวโลก", engine="dict"))
-        self.assertEqual(subword_tokenize(None, engine="ssg"), [])
-        self.assertEqual(subword_tokenize(None, engine="han_solo"), [])
-        self.assertEqual(
-            subword_tokenize("แมวกินปลา", engine="ssg"), ["แมว", "กิน", "ปลา"]
-        )
-        self.assertTrue(
-            "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
-        )
-        self.assertFalse("า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg"))
-        self.assertEqual(
-            subword_tokenize("แมวกินปลา", engine="han_solo"),
-            ["แมว", "กิน", "ปลา"],
-        )
-        self.assertTrue(
-            "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="han_solo")
-        )
-        self.assertFalse(
-            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="han_solo")
-        )
         self.assertFalse(
             " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
         )
-        self.assertEqual(subword_tokenize(None, engine="tltk"), [])
-        self.assertEqual(subword_tokenize("", engine="tltk"), [])
-        self.assertIsInstance(
-            subword_tokenize("สวัสดิีดาวอังคาร", engine="tltk"), list
-        )
-        self.assertFalse(
-            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tltk")
-        )
-        self.assertIsInstance(subword_tokenize("โควิด19", engine="tltk"), list)
-
-        self.assertEqual(subword_tokenize(None, engine="phayathai"), [])
-        self.assertEqual(subword_tokenize("", engine="phayathai"), [])
-        self.assertIsInstance(
-            subword_tokenize("สวัสดิีดาวอังคาร", engine="phayathai"), list
-        )
-        self.assertFalse(
-            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="phayathai")
-        )
-        self.assertIsInstance(
-            subword_tokenize("โควิด19", engine="phayathai"), list
-        )
         with self.assertRaises(ValueError):
             subword_tokenize("นกแก้ว", engine="XX")  # engine does not exist
 
@@ -470,17 +308,9 @@ def test_word_tokenize(self):
             word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
         )
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="nlpo3"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="attacut"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="deepcut"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="icu"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="longest"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="mm"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="nercut"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="newmm"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="sefr_cut"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="tltk"))
-        self.assertIsNotNone(word_tokenize(self.text_1, engine="oskut"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="longest"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="mm"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="newmm"))
 
         with self.assertRaises(ValueError):
             word_tokenize("หมอนทอง", engine="XX")  # engine does not exist
@@ -489,33 +319,6 @@ def test_word_tokenize(self):
             "ไฟ" in word_tokenize("รถไฟฟ้า", custom_dict=dict_trie(["ไฟ"]))
         )
 
-    def test_attacut(self):
-        self.assertEqual(attacut.segment(None), [])
-        self.assertEqual(attacut.segment(""), [])
-        self.assertEqual(
-            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="attacut"),
-            ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
-        )
-        self.assertEqual(
-            attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-sc"),
-            ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
-        )
-        self.assertIsNotNone(
-            attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-c")
-        )
-
-    def test_deepcut(self):
-        self.assertEqual(deepcut.segment(None), [])
-        self.assertEqual(deepcut.segment(""), [])
-        self.assertIsNotNone(deepcut.segment("ทดสอบ", DEFAULT_WORD_DICT_TRIE))
-        self.assertIsNotNone(deepcut.segment("ทดสอบ", ["ทด", "สอบ"]))
-        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="deepcut"))
-        self.assertIsNotNone(
-            word_tokenize(
-                "ทดสอบ", engine="deepcut", custom_dict=DEFAULT_WORD_DICT_TRIE
-            )
-        )
-
     def test_etcc(self):
         self.assertEqual(etcc.segment(None), [])
         self.assertEqual(etcc.segment(""), [])
@@ -532,35 +335,6 @@ def test_etcc(self):
             )
         )
 
-    def test_icu(self):
-        self.assertEqual(pyicu.segment(None), [])
-        self.assertEqual(pyicu.segment(""), [])
-        self.assertEqual(
-            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="icu"),
-            ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
-        )
-
-    def test_tltk(self):
-        self.assertEqual(tltk.segment(None), [])
-        self.assertEqual(tltk.segment(""), [])
-        self.assertEqual(
-            tltk.syllable_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
-            [
-                "ฉัน",
-                "รัก",
-                "ภา",
-                "ษา",
-                "ไทย",
-                "เพราะ",
-                "ฉัน",
-                "เป็น",
-                "คน",
-                "ไทย",
-            ],
-        )
-        self.assertEqual(tltk.syllable_tokenize(None), [])
-        self.assertEqual(tltk.syllable_tokenize(""), [])
-
     def test_longest(self):
         self.assertEqual(longest.segment(None), [])
         self.assertEqual(longest.segment(""), [])
@@ -682,48 +456,29 @@ def test_newmm(self):
         )
 
     def test_newmm_longtext(self):
+        self.assertIsInstance(word_tokenize(LONG_TEXT, engine="newmm"), list)
         self.assertIsInstance(
-            word_tokenize(self.long_text, engine="newmm"), list
-        )
-        self.assertIsInstance(
-            word_tokenize(self.long_text, engine="newmm-safe"), list
+            word_tokenize(LONG_TEXT, engine="newmm-safe"), list
         )
 
     def test_newmm_dangertext(self):
         self.assertIsInstance(
-            word_tokenize(self.danger_text1, engine="newmm"), list
+            word_tokenize(DANGER_TEXT_1, engine="newmm"), list
         )
         self.assertIsInstance(
-            word_tokenize(self.danger_text2, engine="newmm"), list
+            word_tokenize(DANGER_TEXT_2, engine="newmm"), list
         )
         self.assertIsInstance(
-            word_tokenize(self.danger_text3, engine="newmm"), list
+            word_tokenize(DANGER_TEXT_3, engine="newmm"), list
         )
         self.assertIsInstance(
-            word_tokenize(self.danger_text1, engine="newmm-safe"), list
+            word_tokenize(DANGER_TEXT_1, engine="newmm-safe"), list
         )
         self.assertIsInstance(
-            word_tokenize(self.danger_text2, engine="newmm-safe"), list
+            word_tokenize(DANGER_TEXT_2, engine="newmm-safe"), list
         )
         self.assertIsInstance(
-            word_tokenize(self.danger_text3, engine="newmm-safe"), list
-        )
-
-    def test_nercut(self):
-        self.assertEqual(nercut.segment(None), [])
-        self.assertEqual(nercut.segment(""), [])
-        self.assertIsNotNone(nercut.segment("ทดสอบ"))
-        self.assertEqual(nercut.segment("ทันแน่ๆ"), ["ทัน", "แน่ๆ"])
-        self.assertEqual(nercut.segment("%1ครั้ง"), ["%", "1", "ครั้ง"])
-        self.assertEqual(nercut.segment("ทุ๊กกโคนน"), ["ทุ๊กกโคนน"])
-        self.assertIsNotNone(nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ"))
-        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="nercut"))
-
-    def test_ssg(self):
-        self.assertEqual(ssg.segment(None), [])
-        self.assertEqual(ssg.segment(""), [])
-        self.assertTrue(
-            "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
+            word_tokenize(DANGER_TEXT_3, engine="newmm-safe"), list
         )
 
     def test_tcc(self):
@@ -814,26 +569,6 @@ def test_tcc_p(self):
         self.assertEqual(list(tcc_p.tcc("")), [])
         self.assertEqual(tcc_p.tcc_pos(""), set())
 
-    def test_sefr_cut(self):
-        self.assertEqual(sefr_cut.segment(None), [])
-        self.assertEqual(sefr_cut.segment(""), [])
-        self.assertIsNotNone(
-            sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
-        )
-        self.assertIsNotNone(
-            sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tnhc"),
-        )
-
-    def test_oskut(self):
-        self.assertEqual(oskut.segment(None), [])
-        self.assertEqual(oskut.segment(""), [])
-        self.assertIsNotNone(
-            oskut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
-        )
-        self.assertIsNotNone(
-            oskut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="scads"),
-        )
-
     def test_word_detokenize(self):
         self.assertEqual(
             word_detokenize(["ผม", "เลี้ยง", "5", "ตัว"]), "ผมเลี้ยง 5 ตัว"
@@ -857,49 +592,3 @@ def test_word_detokenize(self):
             word_detokenize(["ม่ายย", " ", "ผม", "เลี้ยง", "5", "ตัว"]),
             "ม่ายย ผมเลี้ยง 5 ตัว",
         )
-
-    def test_numeric_data_format(self):
-        engines = ["attacut", "deepcut", "newmm", "sefr_cut"]
-
-        for engine in engines:
-            self.assertIn(
-                "127.0.0.1",
-                word_tokenize("ไอพีของคุณคือ 127.0.0.1 ครับ", engine=engine),
-            )
-
-            tokens = word_tokenize(
-                "เวลา 12:12pm มีโปรโมชั่น 11.11", engine=engine
-            )
-            self.assertTrue(
-                any(value in tokens for value in ["12:12pm", "12:12"]),
-                msg=f"{engine}: {tokens}",
-            )
-            self.assertIn("11.11", tokens)
-
-            self.assertIn(
-                "1,234,567.89",
-                word_tokenize("รางวัลมูลค่า 1,234,567.89 บาท", engine=engine),
-            )
-
-            tokens = word_tokenize("อัตราส่วน 2.5:1 คือ 5:2", engine=engine)
-            self.assertIn("2.5:1", tokens)
-            self.assertIn("5:2", tokens)
-
-        # try turning off `join_broken_num`
-        engine = "attacut"
-        self.assertNotIn(
-            "127.0.0.1",
-            word_tokenize(
-                "ไอพีของคุณคือ 127.0.0.1 ครับ",
-                engine=engine,
-                join_broken_num=False,
-            ),
-        )
-        self.assertNotIn(
-            "1,234,567.89",
-            word_tokenize(
-                "รางวัลมูลค่า 1,234,567.89 บาท",
-                engine=engine,
-                join_broken_num=False,
-            ),
-        )
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 048da92d4..f8f74705a 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -11,7 +11,7 @@
 )
 
 
-class TestToolsPackage(unittest.TestCase):
+class ToolsTestCase(unittest.TestCase):
     def test_path(self):
         data_filename = "ttc_freq.txt"
         self.assertTrue(
diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py
index 77c4c5eda..c9d8251a9 100644
--- a/tests/test_transliterate.py
+++ b/tests/test_transliterate.py
@@ -2,16 +2,9 @@
 
 import unittest
 
-import torch
+from pythainlp.transliterate import romanize, transliterate
 
-from pythainlp.corpus import remove
-from pythainlp.transliterate import pronunciate, puan, romanize, transliterate
-from pythainlp.transliterate.ipa import trans_list, xsampa_list
-from pythainlp.transliterate.thai2rom import ThaiTransliterator
-from pythainlp.transliterate.thai2rom_onnx import ThaiTransliterator_ONNX
-from pythainlp.transliterate.wunsen import WunsenTransliterate
-
-_BASIC_TESTS = {
+BASIC_TESTS = {
     None: "",
     "": "",
     "abc": "abc",
@@ -46,7 +39,7 @@
 # these are set of two-syllable words,
 # to test if the transliteration/romanization is consistent, say
 # romanize(1+2) = romanize(1) + romanize(2)
-_CONSISTENCY_TESTS = [
+CONSISTENCY_TESTS = [
     # ("กระจก", "กระ", "จก"),  # failed
     # ("ระเบิด", "ระ", "เบิด"),  # failed
     # ("หยากไย่", "หยาก", "ไย่"),  # failed
@@ -55,19 +48,18 @@
 ]
 
 
-class TestTransliteratePackage(unittest.TestCase):
+class TransliterateTestCase(unittest.TestCase):
     def test_romanize(self):
         self.assertEqual(romanize(None), "")
         self.assertEqual(romanize(""), "")
         self.assertEqual(romanize("แมว"), "maeo")
-        self.assertEqual(romanize("แมว", engine="tltk"), "maeo")
 
     def test_romanize_royin_basic(self):
-        for word, expect in _BASIC_TESTS.items():
+        for word, expect in BASIC_TESTS.items():
             self.assertEqual(romanize(word, engine="royin"), expect)
 
     def test_romanize_royin_consistency(self):
-        for word, part1, part2 in _CONSISTENCY_TESTS:
+        for word, part1, part2 in CONSISTENCY_TESTS:
             self.assertEqual(
                 romanize(word, engine="royin"),
                 (
@@ -76,34 +68,6 @@ def test_romanize_royin_consistency(self):
                 ),
             )
 
-    def test_romanize_thai2rom(self):
-        self.assertEqual(romanize("แมว", engine="thai2rom"), "maeo")
-        self.assertEqual(romanize("บ้านไร่", engine="thai2rom"), "banrai")
-        self.assertEqual(romanize("สุนัข", engine="thai2rom"), "sunak")
-        self.assertEqual(romanize("นก", engine="thai2rom"), "nok")
-        self.assertEqual(romanize("ความอิ่ม", engine="thai2rom"), "khwam-im")
-        self.assertEqual(
-            romanize("กานต์ ณรงค์", engine="thai2rom"), "kan narong"
-        )
-        self.assertEqual(romanize("สกุนต์", engine="thai2rom"), "sakun")
-        self.assertEqual(romanize("ชารินทร์", engine="thai2rom"), "charin")
-
-    def test_romanize_thai2rom_onnx(self):
-        self.assertEqual(romanize("แมว", engine="thai2rom_onnx"), "maeo")
-        self.assertEqual(romanize("บ้านไร่", engine="thai2rom_onnx"), "banrai")
-        self.assertEqual(romanize("สุนัข", engine="thai2rom_onnx"), "sunak")
-        self.assertEqual(romanize("นก", engine="thai2rom_onnx"), "nok")
-        self.assertEqual(
-            romanize("ความอิ่ม", engine="thai2rom_onnx"), "khwam-im"
-        )
-        self.assertEqual(
-            romanize("กานต์ ณรงค์", engine="thai2rom_onnx"), "kan narong"
-        )
-        self.assertEqual(romanize("สกุนต์", engine="thai2rom_onnx"), "sakun")
-        self.assertEqual(
-            romanize("ชารินทร์", engine="thai2rom_onnx"), "charin"
-        )
-
     def test_romanize_lookup(self):
         # found in v1.4
         self.assertEqual(romanize("บอล", engine="lookup"), "ball")
@@ -120,113 +84,13 @@ def test_romanize_lookup(self):
             romanize("คาราเมลคาปูชิโน่", engine="lookup"), "khanamenkhapuchino"
         )
         # not found in v1.4
-        ## default fallback
         self.assertEqual(romanize("ภาพยนตร์", engine="lookup"), "phapn")
         self.assertEqual(romanize("แมว", engine="lookup"), "maeo")
-        ## fallback = 'thai2rom'
-        self.assertEqual(
-            romanize("ความอิ่ม", engine="lookup", fallback_engine="thai2rom"),
-            "khwam-im",
-        )
-        self.assertEqual(
-            romanize("สามารถ", engine="lookup", fallback_engine="thai2rom"),
-            "samat",
-        )
-
-    def test_thai2rom_prepare_sequence(self):
-        transliterater = ThaiTransliterator()
-
-        UNK_TOKEN = 1  # UNK_TOKEN or <UNK> is represented by 1
-        END_TOKEN = 3  # END_TOKEN or <end> is represented by 3
-
-        self.assertListEqual(
-            transliterater._prepare_sequence_in("A")
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-        )
-
-        self.assertListEqual(
-            transliterater._prepare_sequence_in("♥")
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-        )
-
-        self.assertNotEqual(
-            transliterater._prepare_sequence_in("ก")
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-        )
-
-    def test_thai2rom_onnx_prepare_sequence(self):
-        transliterater = ThaiTransliterator_ONNX()
-
-        UNK_TOKEN = 1  # UNK_TOKEN or <UNK> is represented by 1
-        END_TOKEN = 3  # END_TOKEN or <end> is represented by 3
-
-        self.assertListEqual(
-            transliterater._prepare_sequence_in("A").tolist(),
-            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-        )
-
-        self.assertListEqual(
-            transliterater._prepare_sequence_in("♥").tolist(),
-            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-        )
-
-        self.assertNotEqual(
-            transliterater._prepare_sequence_in("ก").tolist(),
-            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
-            .cpu()
-            .detach()
-            .numpy()
-            .tolist(),
-        )
 
     def test_transliterate(self):
         self.assertEqual(transliterate(""), "")
-        self.assertEqual(transliterate("แมว", "pyicu"), "mæw")
-        self.assertEqual(transliterate("คน", engine="ipa"), "kʰon")
-        self.assertIsNotNone(transliterate("คน", engine="thaig2p"))
-        self.assertIsNotNone(transliterate("แมว", engine="thaig2p"))
-        self.assertIsNotNone(transliterate("คน", engine="thaig2p_v2"))
-        self.assertIsNotNone(transliterate("แมว", engine="thaig2p_v2"))
-        self.assertIsNotNone(transliterate("คน", engine="tltk_g2p"))
-        self.assertIsNotNone(transliterate("แมว", engine="tltk_g2p"))
-        self.assertIsNotNone(transliterate("คน", engine="tltk_ipa"))
-        self.assertIsNotNone(transliterate("แมว", engine="tltk_ipa"))
         self.assertIsNotNone(transliterate("คน", engine="iso_11940"))
         self.assertIsNotNone(transliterate("แมว", engine="iso_11940"))
-        self.assertIsNotNone(trans_list("คน"))
-        self.assertIsNotNone(xsampa_list("คน"))
 
     def test_transliterate_iso11940(self):
         self.assertEqual(
@@ -235,47 +99,3 @@ def test_transliterate_iso11940(self):
         self.assertEqual(
             transliterate("ภาษาไทย", engine="iso_11940"), "p̣hās̛̄āịthy"
         )
-
-    def test_transliterate_wunsen(self):
-        wt = WunsenTransliterate()
-        self.assertEqual(wt.transliterate("ohayō", lang="jp"), "โอฮาโย")
-        self.assertEqual(
-            wt.transliterate(
-                "ohayou", lang="jp", jp_input="Hepburn-no diacritic"
-            ),
-            "โอฮาโย",
-        )
-        self.assertEqual(
-            wt.transliterate("ohayō", lang="jp", system="RI35"), "โอะฮะโย"
-        )
-        self.assertEqual(
-            wt.transliterate("annyeonghaseyo", lang="ko"), "อันนย็องฮาเซโย"
-        )
-        self.assertEqual(wt.transliterate("xin chào", lang="vi"), "ซีน จ่าว")
-        self.assertEqual(wt.transliterate("ni3 hao3", lang="zh"), "หนี เห่า")
-        self.assertEqual(
-            wt.transliterate("ni3 hao3", lang="zh", zh_sandhi=False),
-            "หนี่ เห่า",
-        )
-        self.assertEqual(
-            wt.transliterate("ni3 hao3", lang="zh", system="RI49"), "หนี ห่าว"
-        )
-        with self.assertRaises(NotImplementedError):
-            wt.transliterate("xin chào", lang="vii")
-
-    def test_pronunciate(self):
-        self.assertEqual(pronunciate(""), "")
-        remove("thai_w2p")
-        self.assertIsNotNone(pronunciate("คน", engine="w2p"))
-        self.assertIsNotNone(pronunciate("แมว", engine="w2p"))
-        self.assertIsNotNone(pronunciate("มข.", engine="w2p"))
-        self.assertIsNotNone(pronunciate("มช.", engine="w2p"))
-        self.assertIsNotNone(pronunciate("jks", engine="w2p"))
-
-    def test_puan(self):
-        self.assertEqual(puan("แมว"), "แมว")
-        self.assertEqual(puan("นาริน"), "นิน-รา")
-        self.assertEqual(puan("นาริน", show_pronunciation=False), "นินรา")
-        self.assertEqual(
-            puan("การทำความดี", show_pronunciation=False), "ดานทำความกี"
-        )
diff --git a/tests/test_util.py b/tests/test_util.py
index 2559f7069..1033f6974 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -5,6 +5,7 @@
 """
 Unit tests for pythainlp.util module.
 """
+
 import os
 import unittest
 from collections import Counter
@@ -14,7 +15,6 @@
 from pythainlp.corpus.common import _THAI_WORDS_FILENAME
 from pythainlp.util import (
     Trie,
-    # abbreviation_to_full_text,
     arabic_digit_to_thai_digit,
     bahttext,
     collate,
@@ -44,7 +44,6 @@
     remove_tonemark,
     remove_trailing_repeat_consonants,
     remove_zw,
-    rhyme,
     sound_syllable,
     syllable_length,
     syllable_open_close_detector,
@@ -56,7 +55,6 @@
     thai_strftime,
     thai_strptime,
     thai_to_eng,
-    thai_word_tone_detector,
     thaiword_to_date,
     thaiword_to_num,
     thaiword_to_time,
@@ -67,10 +65,9 @@
     words_to_num,
 )
 from pythainlp.util.morse import morse_decode, morse_encode
-from pythainlp.util.spell_words import spell_word
 
 
-class TestUtilPackage(unittest.TestCase):
+class UtilTestCase(unittest.TestCase):
     # ### pythainlp.util.collate
 
     def test_collate(self):
@@ -541,8 +538,8 @@ def test_normalize(self):
         )
         self.assertEqual(
             maiyamok("เด็กๆๆชอบไปโรงเรียน"),
-            ['เด็ก', 'เด็ก', 'เด็ก', 'ชอบ', 'ไป', 'โรงเรียน'],
-        ) # 914
+            ["เด็ก", "เด็ก", "เด็ก", "ชอบ", "ไป", "โรงเรียน"],
+        )  # 914
         self.assertEqual(
             maiyamok(
                 ["ทำไม", "คน", "ดี", " ", "ๆ", "ๆ", " ", "ถึง", "ทำ", "ไม่ได้"]
@@ -745,12 +742,6 @@ def test_syllable_open_close_detector(self):
     def test_to_idna(self):
         self.assertEqual(to_idna("คนละครึ่ง.com"), "xn--42caj4e6bk1f5b1j.com")
 
-    def test_thai_word_tone_detector(self):
-        self.assertIsNotNone(thai_word_tone_detector("คนดี"))
-        self.assertEqual(
-            thai_word_tone_detector("ราคา"), [("รา", "m"), ("คา", "m")]
-        )
-
     def test_thai_strptime(self):
         self.assertIsNotNone(
             thai_strptime(
@@ -813,18 +804,6 @@ def test_tis620_to_utf8(self):
             tis620_to_utf8("¡ÃÐ·ÃÇ§ÍØµÊÒË¡ÃÃÁ"), "กระทรวงอุตสาหกรรม"
         )
 
-    def test_spell_word(self):
-        self.assertEqual(spell_word("เสือ"), ["สอ", "เอือ", "เสือ"])
-        self.assertEqual(spell_word("เสื้อ"), ["สอ", "เอือ", "ไม้โท", "เสื้อ"])
-        self.assertEqual(spell_word("คน"), ["คอ", "นอ", "คน"])
-        self.assertEqual(
-            spell_word("คนดี"), ["คอ", "นอ", "คน", "ดอ", "อี", "ดี", "คนดี"]
-        )
-
-    def test_rhyme(self):
-        self.assertIsInstance(rhyme("แมว"), list)
-        self.assertTrue(len(rhyme("แมว")) > 2)
-
     def test_remove_repeat_consonants(self):
         # update of pythainlp.copus.thai_words() able to break this
         self.assertEqual(
diff --git a/tests/test_augment.py b/tests/testx_augment.py
similarity index 97%
rename from tests/test_augment.py
rename to tests/testx_augment.py
index 8b97ed2e0..0ae9da27f 100644
--- a/tests/test_augment.py
+++ b/tests/testx_augment.py
@@ -15,7 +15,7 @@
 from pythainlp.augment.wordnet import postype2wordnet
 
 
-class TestTextaugmentPackage(unittest.TestCase):
+class AugmentTestCaseX(unittest.TestCase):
     def setUp(self):
         self.text = "เรารักคุณมากที่สุดในโลก"
         self.text2 = "เราอยู่ที่มหาวิทยาลัยขอนแก่น"
diff --git a/tests/test_coref.py b/tests/testx_coref.py
similarity index 92%
rename from tests/test_coref.py
rename to tests/testx_coref.py
index a2e0d301f..d7597c75e 100644
--- a/tests/test_coref.py
+++ b/tests/testx_coref.py
@@ -7,7 +7,7 @@
 from pythainlp.coref import coreference_resolution
 
 
-class TestParsePackage(unittest.TestCase):
+class CorefTestCaseX(unittest.TestCase):
     def test_coreference_resolution(self):
         pass
         # self.assertIsNotNone(
diff --git a/tests/testx_corpus.py b/tests/testx_corpus.py
new file mode 100644
index 000000000..864c0074e
--- /dev/null
+++ b/tests/testx_corpus.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import nltk
+from nltk.corpus import wordnet as wn
+
+from pythainlp.corpus import wordnet
+
+
+class CorpusTestCaseX(unittest.TestCase):
+    def test_wordnet(self):
+        nltk.download("omw-1.4", force=True)  # load wordnet
+        self.assertIsNotNone(wordnet.langs())
+        self.assertIn("tha", wordnet.langs())
+
+        self.assertEqual(
+            wordnet.synset("spy.n.01").lemma_names("tha"), ["สปาย", "สายลับ"]
+        )
+        self.assertIsNotNone(wordnet.synsets("นก"))
+        self.assertIsNotNone(wordnet.all_synsets(pos=wn.ADJ))
+
+        self.assertIsNotNone(wordnet.lemmas("นก"))
+        self.assertIsNotNone(wordnet.all_lemma_names(pos=wn.ADV))
+        self.assertIsNotNone(wordnet.lemma("cat.n.01.cat"))
+
+        self.assertEqual(wordnet.morphy("dogs"), "dog")
+
+        bird = wordnet.synset("bird.n.01")
+        mouse = wordnet.synset("mouse.n.01")
+        self.assertEqual(
+            wordnet.path_similarity(bird, mouse), bird.path_similarity(mouse)
+        )
+        self.assertEqual(
+            wordnet.wup_similarity(bird, mouse), bird.wup_similarity(mouse)
+        )
+        self.assertEqual(
+            wordnet.lch_similarity(bird, mouse), bird.lch_similarity(mouse)
+        )
+
+        cat_key = wordnet.synsets("แมว")[0].lemmas()[0].key()
+        self.assertIsNotNone(wordnet.lemma_from_key(cat_key))
diff --git a/tests/test_generate.py b/tests/testx_generate.py
similarity index 97%
rename from tests/test_generate.py
rename to tests/testx_generate.py
index 6e1db1389..58a8aae65 100644
--- a/tests/test_generate.py
+++ b/tests/testx_generate.py
@@ -8,7 +8,7 @@
 from pythainlp.generate.thai2fit import gen_sentence
 
 
-class TestGeneratePackage(unittest.TestCase):
+class GenerateTestCaseX(unittest.TestCase):
     def test_unigram(self):
         _tnc_unigram = Unigram("tnc")
         self.assertIsNotNone(_tnc_unigram.gen_sentence("ผม"))
diff --git a/tests/test_misspell.py b/tests/testx_misspell.py
similarity index 97%
rename from tests/test_misspell.py
rename to tests/testx_misspell.py
index 51b2bac44..dbefc053b 100644
--- a/tests/test_misspell.py
+++ b/tests/testx_misspell.py
@@ -20,7 +20,7 @@ def _count_difference(st1, st2):
     return count
 
 
-class TestTextMisspellPackage(unittest.TestCase):
+class MisspellTestCaseX(unittest.TestCase):
     def setUp(self):
         self.texts = ["เรารักคุณมากที่สุดในโลก", "เราอยู่ที่มหาวิทยาลัยขอนแก่น"]
 
diff --git a/tests/testx_soundex.py b/tests/testx_soundex.py
new file mode 100644
index 000000000..9f04210f9
--- /dev/null
+++ b/tests/testx_soundex.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from pythainlp.soundex.sound import audio_vector, word_approximation
+
+
+class SoundexTestCaseX(unittest.TestCase):
+    def test_word_approximation(self):
+        self.assertIsNotNone(word_approximation("รถ", ["รส", "รด", "คน"]))
+
+    def test_audio_vector(self):
+        self.assertIsNotNone(audio_vector("คน"))
diff --git a/tests/testx_spell.py b/tests/testx_spell.py
new file mode 100644
index 000000000..9c587de12
--- /dev/null
+++ b/tests/testx_spell.py
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from pythainlp.spell import (
+    correct,
+    correct_sent,
+    spell,
+    spell_sent,
+    symspellpy,
+)
+
+from .test_spell import SENT_TOKS
+
+
+class SpellTestCaseX(unittest.TestCase):
+    def test_spell(self):
+        result = spell("เน้ร", engine="phunspell")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        result = spell("เกสมร์", engine="phunspell")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        result = spell("เน้ร", engine="symspellpy")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        result = spell("เกสมร์", engine="symspellpy")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        result = spell("เน้ร", engine="tltk")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        result = spell("เดก", engine="tltk")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+    def test_word_correct(self):
+        result = correct("ทดสอง", engine="phunspell")
+        self.assertIsInstance(result, str)
+        self.assertNotEqual(result, "")
+
+        result = correct("ทดสอง", engine="symspellpy")
+        self.assertIsInstance(result, str)
+        self.assertNotEqual(result, "")
+
+        result = correct("ทดสอง", engine="wanchanberta_thai_grammarly")
+        self.assertIsInstance(result, str)
+        self.assertNotEqual(result, "")
+
+    def test_spell_sent(self):
+        self.assertIsNotNone(spell_sent(SENT_TOKS, engine="phunspell"))
+        self.assertIsNotNone(spell_sent(SENT_TOKS, engine="symspellpy"))
+
+    def test_correct_sent(self):
+        self.assertIsNotNone(correct_sent(SENT_TOKS, engine="phunspell"))
+        self.assertIsNotNone(correct_sent(SENT_TOKS, engine="symspellpy"))
+        self.assertIsNotNone(
+            correct_sent(SENT_TOKS, engine="wanchanberta_thai_grammarly")
+        )
+        self.assertIsNotNone(symspellpy.correct_sent(SENT_TOKS))
diff --git a/tests/testx_summarize.py b/tests/testx_summarize.py
new file mode 100644
index 000000000..9d2b5ad73
--- /dev/null
+++ b/tests/testx_summarize.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from pythainlp.summarize import extract_keywords, summarize
+
+INPUT_TEXT = (
+    "อาหาร หมายถึง ของแข็งหรือของเหลว "
+    "ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว "
+    "จะทำให้เกิดพลังงานและความร้อนแก่ร่างกาย "
+    "ทำให้ร่างกายเจริญเติบโต "
+    "ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย "
+    "ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ "
+    "อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"
+)
+
+
+class SummarizeTestCaseX(unittest.TestCase):
+    def test_summarize(self):
+        self.assertEqual(
+            summarize(text=INPUT_TEXT, n=1),
+            ["อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"],
+        )
+        # self.assertIsNotNone(summarize(text, engine="mt5-small"))
+        # self.assertIsNotNone(summarize([]))
+        # self.assertIsNotNone(summarize(text, 1, engine="mt5-small"))
+        self.assertIsNotNone(
+            summarize(INPUT_TEXT, 1, engine="mt5-cpe-kmutt-thai-sentence-sum")
+        )
+        self.assertIsNotNone(summarize(INPUT_TEXT, 1, engine="XX"))
+        with self.assertRaises(ValueError):
+            self.assertIsNotNone(summarize(INPUT_TEXT, 1, engine="mt5-cat"))
+
+    def test_keyword_extraction(self):
+        self.assertEqual(extract_keywords(""), [])
+        self.assertEqual(extract_keywords("   "), [])
+
+        # test default engine, common case
+        keywords = extract_keywords(INPUT_TEXT)
+        expected = ["ซ่อมแซมส่วน", "เจริญเติบโต", "อวัยวะต่างๆ", "ควบคุมการเปลี่ยนแปลง"]
+        for exp_kw in expected:
+            self.assertIn(exp_kw, keywords)
+
+        # test another engine
+        for max_kw in (5, 10):
+            keywords = extract_keywords(
+                INPUT_TEXT, engine="frequency", max_keywords=max_kw
+            )
+            self.assertEqual(len(keywords), max_kw)
+
+        # test invalid engine
+        with self.assertRaises(ValueError):
+            extract_keywords(INPUT_TEXT, engine="random engine")
+
+        # test different tokenizer
+        keywords = extract_keywords(INPUT_TEXT, tokenizer="attacut")
+
+        expected = ["อวัยวะต่างๆ", "ซ่อมแซมส่วน", "เจริญเติบโต", "เกิดพลังงาน"]
+        for exp_kw in expected:
+            self.assertIn(exp_kw, keywords)
+
+        # test overriding stop words
+        stpw = "เจริญเติบโต"
+        keywords = extract_keywords(INPUT_TEXT, stop_words=[stpw])
+        self.assertNotIn(stpw, keywords)
+
+    def test_keybert(self):
+        from pythainlp.summarize.keybert import KeyBERT
+        from pythainlp.tokenize import word_tokenize
+
+        keybert = KeyBERT()
+        # test ngram range
+        ng_ranges = [(1, 1), (1, 2), (2, 2), (3, 3)]
+        for ng_min, ng_max in ng_ranges:
+            keywords = keybert.extract_keywords(
+                INPUT_TEXT, keyphrase_ngram_range=(ng_min, ng_max)
+            )
+
+            for kw in keywords:
+                self.assertTrue(ng_min <= len(word_tokenize(kw)) <= ng_max)
+
+        # test max_keywords
+        max_kws = 10
+        keywords = keybert.extract_keywords(INPUT_TEXT, max_keywords=max_kws)
+        self.assertLessEqual(len(keywords), max_kws)
+
+        text_short = "เฮลโหล"
+        keywords = keybert.extract_keywords(text_short, max_keywords=max_kws)
+        self.assertLessEqual(len(keywords), max_kws)
diff --git a/tests/testx_tokenize.py b/tests/testx_tokenize.py
new file mode 100644
index 000000000..43007cf4e
--- /dev/null
+++ b/tests/testx_tokenize.py
@@ -0,0 +1,349 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from pythainlp.tokenize import (
+    DEFAULT_WORD_DICT_TRIE,
+    attacut,
+    deepcut,
+    nercut,
+    oskut,
+    paragraph_tokenize,
+    pyicu,
+    sefr_cut,
+    sent_tokenize,
+    ssg,
+    subword_tokenize,
+    tltk,
+    word_tokenize,
+)
+from pythainlp.tokenize import clause_tokenize as sent_clause_tokenize
+
+from .test_tokenize import (
+    SENT_1,
+    SENT_1_TOKS,
+    SENT_2,
+    SENT_2_TOKS,
+    SENT_3,
+    SENT_3_TOKS,
+    SENT_4,
+    TEXT_1,
+)
+
+
+# Tests for functions that need external imports
+class TokenizeTestCaseX(unittest.TestCase):
+    def testx_subword_tokenize(self):
+        self.assertEqual(subword_tokenize(None, engine="ssg"), [])
+        self.assertEqual(subword_tokenize(None, engine="han_solo"), [])
+        self.assertEqual(
+            subword_tokenize("แมวกินปลา", engine="ssg"), ["แมว", "กิน", "ปลา"]
+        )
+        self.assertTrue(
+            "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
+        )
+        self.assertFalse("า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg"))
+        self.assertEqual(
+            subword_tokenize("แมวกินปลา", engine="han_solo"),
+            ["แมว", "กิน", "ปลา"],
+        )
+        self.assertTrue(
+            "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="han_solo")
+        )
+        self.assertFalse(
+            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="han_solo")
+        )
+        self.assertEqual(subword_tokenize(None, engine="tltk"), [])
+        self.assertEqual(subword_tokenize("", engine="tltk"), [])
+        self.assertIsInstance(
+            subword_tokenize("สวัสดิีดาวอังคาร", engine="tltk"), list
+        )
+        self.assertFalse(
+            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tltk")
+        )
+        self.assertIsInstance(subword_tokenize("โควิด19", engine="tltk"), list)
+
+        self.assertEqual(subword_tokenize(None, engine="phayathai"), [])
+        self.assertEqual(subword_tokenize("", engine="phayathai"), [])
+        self.assertIsInstance(
+            subword_tokenize("สวัสดิีดาวอังคาร", engine="phayathai"), list
+        )
+        self.assertFalse(
+            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="phayathai")
+        )
+        self.assertIsInstance(
+            subword_tokenize("โควิด19", engine="phayathai"), list
+        )
+        self.assertEqual(subword_tokenize(None, engine="wangchanberta"), [])
+        self.assertEqual(subword_tokenize("", engine="wangchanberta"), [])
+        self.assertIsInstance(
+            subword_tokenize("สวัสดิีดาวอังคาร", engine="wangchanberta"), list
+        )
+        self.assertFalse(
+            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="wangchanberta")
+        )
+        self.assertIsInstance(
+            subword_tokenize("โควิด19", engine="wangchanberta"), list
+        )
+
+    def testx_sent_tokenize(self):
+        self.assertEqual(
+            sent_tokenize(SENT_1, engine="crfcut"),
+            SENT_1_TOKS,
+        )
+        self.assertEqual(
+            sent_tokenize(SENT_2, engine="crfcut"),
+            SENT_2_TOKS,
+        )
+        self.assertEqual(
+            sent_tokenize(SENT_3, engine="crfcut"),
+            SENT_3_TOKS,
+        )
+        self.assertEqual(
+            sent_tokenize(SENT_1),
+            SENT_1_TOKS,
+        )
+        self.assertEqual(
+            sent_tokenize(SENT_2),
+            SENT_2_TOKS,
+        )
+        self.assertEqual(
+            sent_tokenize(SENT_3),
+            SENT_3_TOKS,
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_1,
+                engine="tltk",
+            ),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_2,
+                engine="tltk",
+            ),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_3,
+                engine="tltk",
+            ),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_1,
+                engine="thaisum",
+            ),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_2,
+                engine="thaisum",
+            ),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_3,
+                engine="thaisum",
+            ),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_3,
+                engine="wtp",
+            ),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(
+                SENT_3,
+                engine="wtp-tiny",
+            ),
+        )
+        # self.assertIsNotNone(
+        #     sent_tokenize(
+        #         SENT_3,
+        #         engine="wtp-base",
+        #     ),
+        # )
+        # self.assertIsNotNone(
+        #     sent_tokenize(
+        #         SENT_3,
+        #         engine="wtp-large",
+        #     ),
+        # )
+        self.assertEqual(
+            sent_tokenize(SENT_4, engine="crfcut"),
+            [["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]],
+        )
+        self.assertEqual(
+            sent_tokenize(SENT_4, engine="thaisum"),
+            [["ผม", "กิน", "ข้าว", " ", "เธอ", "เล่น", "เกม"]],
+        )
+
+    def testx_word_tokenize(self):
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="nlpo3"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="attacut"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="deepcut"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="icu"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="nercut"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="sefr_cut"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="tltk"))
+        self.assertIsNotNone(word_tokenize(TEXT_1, engine="oskut"))
+
+    def test_numeric_data_format(self):
+        engines = ["attacut", "deepcut", "newmm", "sefr_cut"]
+
+        for engine in engines:
+            self.assertIn(
+                "127.0.0.1",
+                word_tokenize("ไอพีของคุณคือ 127.0.0.1 ครับ", engine=engine),
+            )
+
+            tokens = word_tokenize(
+                "เวลา 12:12pm มีโปรโมชั่น 11.11", engine=engine
+            )
+            self.assertTrue(
+                any(value in tokens for value in ["12:12pm", "12:12"]),
+                msg=f"{engine}: {tokens}",
+            )
+            self.assertIn("11.11", tokens)
+
+            self.assertIn(
+                "1,234,567.89",
+                word_tokenize("รางวัลมูลค่า 1,234,567.89 บาท", engine=engine),
+            )
+
+            tokens = word_tokenize("อัตราส่วน 2.5:1 คือ 5:2", engine=engine)
+            self.assertIn("2.5:1", tokens)
+            self.assertIn("5:2", tokens)
+
+        # try turning off `join_broken_num`
+        engine = "attacut"
+        self.assertNotIn(
+            "127.0.0.1",
+            word_tokenize(
+                "ไอพีของคุณคือ 127.0.0.1 ครับ",
+                engine=engine,
+                join_broken_num=False,
+            ),
+        )
+        self.assertNotIn(
+            "1,234,567.89",
+            word_tokenize(
+                "รางวัลมูลค่า 1,234,567.89 บาท",
+                engine=engine,
+                join_broken_num=False,
+            ),
+        )
+
+    def test_attacut(self):
+        self.assertEqual(attacut.segment(None), [])
+        self.assertEqual(attacut.segment(""), [])
+        self.assertEqual(
+            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="attacut"),
+            ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
+        )
+        self.assertEqual(
+            attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-sc"),
+            ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
+        )
+        self.assertIsNotNone(
+            attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-c")
+        )
+
+    def test_deepcut(self):
+        self.assertEqual(deepcut.segment(None), [])
+        self.assertEqual(deepcut.segment(""), [])
+        self.assertIsNotNone(deepcut.segment("ทดสอบ", DEFAULT_WORD_DICT_TRIE))
+        self.assertIsNotNone(deepcut.segment("ทดสอบ", ["ทด", "สอบ"]))
+        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="deepcut"))
+        self.assertIsNotNone(
+            word_tokenize(
+                "ทดสอบ", engine="deepcut", custom_dict=DEFAULT_WORD_DICT_TRIE
+            )
+        )
+
+    def test_icu(self):
+        self.assertEqual(pyicu.segment(None), [])
+        self.assertEqual(pyicu.segment(""), [])
+        self.assertEqual(
+            word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="icu"),
+            ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
+        )
+
+    def test_oskut(self):
+        self.assertEqual(oskut.segment(None), [])
+        self.assertEqual(oskut.segment(""), [])
+        self.assertIsNotNone(
+            oskut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
+        )
+        self.assertIsNotNone(
+            oskut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="scads"),
+        )
+
+    def test_nercut(self):
+        self.assertEqual(nercut.segment(None), [])
+        self.assertEqual(nercut.segment(""), [])
+        self.assertIsNotNone(nercut.segment("ทดสอบ"))
+        self.assertEqual(nercut.segment("ทันแน่ๆ"), ["ทัน", "แน่ๆ"])
+        self.assertEqual(nercut.segment("%1ครั้ง"), ["%", "1", "ครั้ง"])
+        self.assertEqual(nercut.segment("ทุ๊กกโคนน"), ["ทุ๊กกโคนน"])
+        self.assertIsNotNone(nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ"))
+        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="nercut"))
+
+    def test_sefr_cut(self):
+        self.assertEqual(sefr_cut.segment(None), [])
+        self.assertEqual(sefr_cut.segment(""), [])
+        self.assertIsNotNone(
+            sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
+        )
+        self.assertIsNotNone(
+            sefr_cut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tnhc"),
+        )
+
+    def test_ssg(self):
+        self.assertEqual(ssg.segment(None), [])
+        self.assertEqual(ssg.segment(""), [])
+        self.assertTrue(
+            "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
+        )
+
+    def test_tltk(self):
+        self.assertEqual(tltk.segment(None), [])
+        self.assertEqual(tltk.segment(""), [])
+        self.assertEqual(
+            tltk.syllable_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
+            [
+                "ฉัน",
+                "รัก",
+                "ภา",
+                "ษา",
+                "ไทย",
+                "เพราะ",
+                "ฉัน",
+                "เป็น",
+                "คน",
+                "ไทย",
+            ],
+        )
+        self.assertEqual(tltk.syllable_tokenize(None), [])
+        self.assertEqual(tltk.syllable_tokenize(""), [])
+
+    def test_paragraph_tokenize(self):
+        sent = (
+            "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา"
+            + "จากผลงานวิจัยที่เคยทำมาในอดีต"
+            + " มิได้ทำการศึกษาค้นคว้าใหม่อย่างกว้างขวางแต่อย่างใด"
+            + " จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้"
+        )
+        self.assertIsNotNone(paragraph_tokenize(sent))
+        with self.assertRaises(ValueError):
+            paragraph_tokenize(
+                sent, engine="ai2+2thai"
+            )  # engine does not exist
+
+    def test_clause_tokenize(self):
+        self.assertIsNotNone(sent_clause_tokenize(["ฉัน", "ทดสอบ"]))
+        self.assertIsInstance(sent_clause_tokenize(["ฉัน", "ทดสอบ"]), list)
diff --git a/tests/test_translate.py b/tests/testx_translate.py
similarity index 97%
rename from tests/test_translate.py
rename to tests/testx_translate.py
index e6809a3da..34be0f868 100644
--- a/tests/test_translate.py
+++ b/tests/testx_translate.py
@@ -10,7 +10,7 @@
 )
 
 
-class TestTranslatePackage(unittest.TestCase):
+class TranslateTestCaseX(unittest.TestCase):
     def test_translate(self):
         # remove("scb_1m_th-en_spm")
         self.assertIsNone(download_model_all())
diff --git a/tests/testx_transliterate.py b/tests/testx_transliterate.py
new file mode 100644
index 000000000..778f028b5
--- /dev/null
+++ b/tests/testx_transliterate.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+
+import torch
+
+from pythainlp.corpus import remove
+from pythainlp.transliterate import pronunciate, puan, romanize, transliterate
+from pythainlp.transliterate.ipa import trans_list, xsampa_list
+from pythainlp.transliterate.thai2rom import ThaiTransliterator
+from pythainlp.transliterate.thai2rom_onnx import ThaiTransliterator_ONNX
+from pythainlp.transliterate.wunsen import WunsenTransliterate
+
+
+class TransliterateTestCaseX(unittest.TestCase):
+    def test_romanize(self):
+        self.assertEqual(romanize("แมว", engine="tltk"), "maeo")
+
+    def test_romanize_thai2rom(self):
+        self.assertEqual(romanize("แมว", engine="thai2rom"), "maeo")
+        self.assertEqual(romanize("บ้านไร่", engine="thai2rom"), "banrai")
+        self.assertEqual(romanize("สุนัข", engine="thai2rom"), "sunak")
+        self.assertEqual(romanize("นก", engine="thai2rom"), "nok")
+        self.assertEqual(romanize("ความอิ่ม", engine="thai2rom"), "khwam-im")
+        self.assertEqual(
+            romanize("กานต์ ณรงค์", engine="thai2rom"), "kan narong"
+        )
+        self.assertEqual(romanize("สกุนต์", engine="thai2rom"), "sakun")
+        self.assertEqual(romanize("ชารินทร์", engine="thai2rom"), "charin")
+
+    def test_romanize_thai2rom_onnx(self):
+        self.assertEqual(romanize("แมว", engine="thai2rom_onnx"), "maeo")
+        self.assertEqual(romanize("บ้านไร่", engine="thai2rom_onnx"), "banrai")
+        self.assertEqual(romanize("สุนัข", engine="thai2rom_onnx"), "sunak")
+        self.assertEqual(romanize("นก", engine="thai2rom_onnx"), "nok")
+        self.assertEqual(
+            romanize("ความอิ่ม", engine="thai2rom_onnx"), "khwam-im"
+        )
+        self.assertEqual(
+            romanize("กานต์ ณรงค์", engine="thai2rom_onnx"), "kan narong"
+        )
+        self.assertEqual(romanize("สกุนต์", engine="thai2rom_onnx"), "sakun")
+        self.assertEqual(romanize("ชารินทร์", engine="thai2rom_onnx"), "charin")
+
+    def test_romanize_lookup(self):
+        self.assertEqual(
+            romanize("ความอิ่ม", engine="lookup", fallback_engine="thai2rom"),
+            "khwam-im",
+        )
+        self.assertEqual(
+            romanize("สามารถ", engine="lookup", fallback_engine="thai2rom"),
+            "samat",
+        )
+
+    def test_thai2rom_prepare_sequence(self):
+        transliterater = ThaiTransliterator()
+
+        UNK_TOKEN = 1  # UNK_TOKEN or <UNK> is represented by 1
+        END_TOKEN = 3  # END_TOKEN or <end> is represented by 3
+
+        self.assertListEqual(
+            transliterater._prepare_sequence_in("A")
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+        )
+
+        self.assertListEqual(
+            transliterater._prepare_sequence_in("♥")
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+        )
+
+        self.assertNotEqual(
+            transliterater._prepare_sequence_in("ก")
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+        )
+
+    def test_thai2rom_onnx_prepare_sequence(self):
+        transliterater = ThaiTransliterator_ONNX()
+
+        UNK_TOKEN = 1  # UNK_TOKEN or <UNK> is represented by 1
+        END_TOKEN = 3  # END_TOKEN or <end> is represented by 3
+
+        self.assertListEqual(
+            transliterater._prepare_sequence_in("A").tolist(),
+            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+        )
+
+        self.assertListEqual(
+            transliterater._prepare_sequence_in("♥").tolist(),
+            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+        )
+
+        self.assertNotEqual(
+            transliterater._prepare_sequence_in("ก").tolist(),
+            torch.tensor([UNK_TOKEN, END_TOKEN], dtype=torch.long)
+            .cpu()
+            .detach()
+            .numpy()
+            .tolist(),
+        )
+
+    def test_transliterate(self):
+        self.assertEqual(transliterate("แมว", "pyicu"), "mæw")
+        self.assertEqual(transliterate("คน", engine="ipa"), "kʰon")
+        self.assertIsNotNone(transliterate("คน", engine="thaig2p"))
+        self.assertIsNotNone(transliterate("แมว", engine="thaig2p"))
+        self.assertIsNotNone(transliterate("คน", engine="thaig2p_v2"))
+        self.assertIsNotNone(transliterate("แมว", engine="thaig2p_v2"))
+        self.assertIsNotNone(transliterate("คน", engine="tltk_g2p"))
+        self.assertIsNotNone(transliterate("แมว", engine="tltk_g2p"))
+        self.assertIsNotNone(transliterate("คน", engine="tltk_ipa"))
+        self.assertIsNotNone(transliterate("แมว", engine="tltk_ipa"))
+
+        self.assertIsNotNone(trans_list("คน"))
+        self.assertIsNotNone(xsampa_list("คน"))
+
+    def test_transliterate_wunsen(self):
+        wt = WunsenTransliterate()
+        self.assertEqual(wt.transliterate("ohayō", lang="jp"), "โอฮาโย")
+        self.assertEqual(
+            wt.transliterate(
+                "ohayou", lang="jp", jp_input="Hepburn-no diacritic"
+            ),
+            "โอฮาโย",
+        )
+        self.assertEqual(
+            wt.transliterate("ohayō", lang="jp", system="RI35"), "โอะฮะโย"
+        )
+        self.assertEqual(
+            wt.transliterate("annyeonghaseyo", lang="ko"), "อันนย็องฮาเซโย"
+        )
+        self.assertEqual(wt.transliterate("xin chào", lang="vi"), "ซีน จ่าว")
+        self.assertEqual(wt.transliterate("ni3 hao3", lang="zh"), "หนี เห่า")
+        self.assertEqual(
+            wt.transliterate("ni3 hao3", lang="zh", zh_sandhi=False),
+            "หนี่ เห่า",
+        )
+        self.assertEqual(
+            wt.transliterate("ni3 hao3", lang="zh", system="RI49"), "หนี ห่าว"
+        )
+        with self.assertRaises(NotImplementedError):
+            wt.transliterate("xin chào", lang="vii")
+
+    def test_pronunciate(self):
+        self.assertEqual(pronunciate(""), "")
+        remove("thai_w2p")
+        self.assertIsNotNone(pronunciate("คน", engine="w2p"))
+        self.assertIsNotNone(pronunciate("แมว", engine="w2p"))
+        self.assertIsNotNone(pronunciate("มข.", engine="w2p"))
+        self.assertIsNotNone(pronunciate("มช.", engine="w2p"))
+        self.assertIsNotNone(pronunciate("jks", engine="w2p"))
+
+    def test_puan(self):
+        self.assertEqual(puan("แมว"), "แมว")
+        self.assertEqual(puan("นาริน"), "นิน-รา")
+        self.assertEqual(puan("นาริน", show_pronunciation=False), "นินรา")
+        self.assertEqual(
+            puan("การทำความดี", show_pronunciation=False), "ดานทำความกี"
+        )
diff --git a/tests/test_ulmfit.py b/tests/testx_ulmfit.py
similarity index 99%
rename from tests/test_ulmfit.py
rename to tests/testx_ulmfit.py
index 81db7ba30..0459e463f 100644
--- a/tests/test_ulmfit.py
+++ b/tests/testx_ulmfit.py
@@ -40,7 +40,7 @@
 from pythainlp.ulmfit.tokenizer import BaseTokenizer as base_tokenizer
 
 
-class TestUlmfitPackage(unittest.TestCase):
+class UlmfitTestCaseX(unittest.TestCase):
     def test_ThaiTokenizer(self):
         self.thai = ThaiTokenizer()
         self.assertIsNotNone(self.thai.tokenizer("ทดสอบการตัดคำ"))
diff --git a/tests/testx_util.py b/tests/testx_util.py
new file mode 100644
index 000000000..dbaac68d8
--- /dev/null
+++ b/tests/testx_util.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Unit tests for pythainlp.util module.
+"""
+
+import unittest
+
+from pythainlp.util import rhyme, thai_word_tone_detector
+from pythainlp.util.spell_words import spell_word
+
+
+class UtilTestCaseX(unittest.TestCase):
+    def testx_rhyme(self):
+        self.assertIsInstance(rhyme("แมว"), list)
+        self.assertTrue(len(rhyme("แมว")) > 2)
+
+    def test_spell_word(self):
+        self.assertEqual(spell_word("เสือ"), ["สอ", "เอือ", "เสือ"])
+        self.assertEqual(spell_word("เสื้อ"), ["สอ", "เอือ", "ไม้โท", "เสื้อ"])
+        self.assertEqual(spell_word("คน"), ["คอ", "นอ", "คน"])
+        self.assertEqual(
+            spell_word("คนดี"), ["คอ", "นอ", "คน", "ดอ", "อี", "ดี", "คนดี"]
+        )
+
+    def testx_thai_word_tone_detector(self):
+        self.assertIsNotNone(thai_word_tone_detector("คนดี"))
+        self.assertEqual(
+            thai_word_tone_detector("ราคา"), [("รา", "m"), ("คา", "m")]
+        )
diff --git a/tests/test_wangchanberta.py b/tests/testx_wangchanberta.py
similarity index 95%
rename from tests/test_wangchanberta.py
rename to tests/testx_wangchanberta.py
index 09dca9822..5dc4b003a 100644
--- a/tests/test_wangchanberta.py
+++ b/tests/testx_wangchanberta.py
@@ -5,7 +5,7 @@
 from pythainlp.wangchanberta import ThaiNameTagger, segment
 
 
-class TestWangchanberta(unittest.TestCase):
+class WangchanbertaTestCaseX(unittest.TestCase):
     def test_thainer_wangchanberta(self):
         ner = ThaiNameTagger()
         self.assertIsNotNone(
diff --git a/tests/test_word_vector.py b/tests/testx_word_vector.py
similarity index 97%
rename from tests/test_word_vector.py
rename to tests/testx_word_vector.py
index 8ca160857..270fb9eb6 100644
--- a/tests/test_word_vector.py
+++ b/tests/testx_word_vector.py
@@ -5,7 +5,7 @@
 from pythainlp.word_vector import WordVector
 
 
-class TestWordVectorPackage(unittest.TestCase):
+class WordVectorTestCaseX(unittest.TestCase):
     def test_thai2vec(self):
         _wv = WordVector("thai2fit_wv")
         self.assertGreaterEqual(
diff --git a/tests/test_wsd.py b/tests/testx_wsd.py
similarity index 93%
rename from tests/test_wsd.py
rename to tests/testx_wsd.py
index 372c04304..6917a4d96 100644
--- a/tests/test_wsd.py
+++ b/tests/testx_wsd.py
@@ -7,7 +7,7 @@
 from pythainlp.wsd import get_sense
 
 
-class TestWsdPackage(unittest.TestCase):
+class WsdTestCaseX(unittest.TestCase):
     def test_get_sense(self):
         self.assertTrue(get_sense("เขากำลังอบขนมคุกกี้", "คุกกี้"))
         self.assertTrue(get_sense("เว็บนี้ต้องการคุกกี้ในการทำงาน", "คุกกี้"))