revert quant format test of auto_round (#1165)

CSY-ModelCloud · web-flow · commit c64b5e66e55e · 2025-01-26T10:36:24.000+08:00
* move auto_round test to a new file

* use wiki text

* ignore CI for test_quant_time.py,test_packing_speed.py
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -62,8 +62,8 @@ env:
   MAX_JOBS: 8
   RUNNER: 10.0.13.31
   TRANSFORMERS_DIFF_TESTS: "models/test_internlm.py,models/test_internlm2_5.py,models/test_xverse.py"
-  TORCH_2_5_TESTS: "test_evalplus.py,test_perplexity.py,test_q4_ipex.py,test_ipex_xpu.py,test_save_loaded_quantized_model.py,test_quant_formats.py,models/test_hymba.py,test_packing_speed.py"
-  IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py"
+  TORCH_2_5_TESTS: "test_evalplus.py,test_perplexity.py,test_q4_ipex.py,test_ipex_xpu.py,test_save_loaded_quantized_model.py,test_quant_formats.py,models/test_hymba.py"
+  IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py,test_quant_time.py,test_packing_speed.py"
   GPTQMODEL_FORCE_BUILD: 1
   repo: ${{ github.event.inputs.repo || github.repository }}
   ref: ${{ github.event.inputs.ref || github.ref }}
diff --git a/tests/test_quant_formats.py b/tests/test_quant_formats.py
@@ -49,7 +49,6 @@ def setUpClass(self):
 
         self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_id, use_fast=True)
 
-        # auto-round can't use self.load_dataset() from ModelTest
         traindata = load_dataset("json", data_files="/monster/data/model/dataset/c4-train.00000-of-01024.json.gz", split="train")
         self.calibration_dataset = [self.tokenizer(example["text"]) for example in traindata.select(range(128))]
 
@@ -60,7 +59,6 @@ def setUpClass(self):
             (QUANT_METHOD.GPTQ, BACKEND.IPEX, False, FORMAT.GPTQ, 4),
             (QUANT_METHOD.GPTQ, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ_V2, 4),
             (QUANT_METHOD.GPTQ, BACKEND.EXLLAMA_V2, False, FORMAT.GPTQ, 4),
-            (QUANT_METHOD.AUTO_ROUND, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ, 4),
         ]
     )
     def test_quantize(self, method: QUANT_METHOD, backend: BACKEND, sym: bool, format: FORMAT, bits: int):
diff --git a/tests/test_quant_formats_auto_round.py b/tests/test_quant_formats_auto_round.py
@@ -0,0 +1,137 @@
+# Copyright 2025 ModelCloud
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -- do not touch
+import os
+
+
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+# -- end do not touch
+
+import json  # noqa: E402
+import logging  # noqa: E402
+import tempfile  # noqa: E402
+
+from datasets import load_dataset  # noqa: E402
+from parameterized import parameterized  # noqa: E402
+from transformers import AutoTokenizer  # noqa: E402
+
+from gptqmodel import BACKEND, GPTQModel, __version__, get_best_device  # noqa: E402
+from gptqmodel.quantization import FORMAT, QUANT_CONFIG_FILENAME, QUANT_METHOD  # noqa: E402
+from gptqmodel.quantization.config import (  # noqa: E402
+    META_FIELD_QUANTIZER,
+    META_QUANTIZER_GPTQMODEL,
+    AutoRoundQuantizeConfig,
+    QuantizeConfig,
+)
+from gptqmodel.utils.torch import torch_empty_cache  # noqa: E402
+from models.model_test import ModelTest  # noqa: E402
+
+
+class TestQuantization(ModelTest):
+
+    @classmethod
+    def setUpClass(self):
+        self.pretrained_model_id = "/monster/data/model/TinyLlama-1.1B-intermediate-step-1431k-3T"
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_id, use_fast=True)
+
+        traindata = load_dataset("wikitext", "wikitext-2-raw-v1", split="train").filter(lambda x: len(x['text']) >= 512)
+        self.calibration_dataset = [self.tokenizer(example["text"]) for example in traindata.select(range(1024))]
+
+
+    @parameterized.expand(
+        [
+            (QUANT_METHOD.AUTO_ROUND, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ, 4),
+        ]
+    )
+    def test_quantize(self, method: QUANT_METHOD, backend: BACKEND, sym: bool, format: FORMAT, bits: int):
+        if method == QUANT_METHOD.GPTQ:
+            quantize_config = QuantizeConfig(
+                bits=bits,
+                group_size=128,
+                desc_act=False if format == FORMAT.MARLIN else True,
+                sym=sym,
+                format=format,
+                damp_percent=0.05
+            )
+        elif method == QUANT_METHOD.AUTO_ROUND:
+            quantize_config = AutoRoundQuantizeConfig(
+                bits=bits,
+                group_size=128,
+                sym=sym,
+                format=format,
+            )
+        else:
+            raise ValueError(f"Invalid quantization method: {method}")
+
+        model = GPTQModel.load(
+            self.pretrained_model_id,
+            quantize_config=quantize_config,
+        )
+        model.quantize(self.calibration_dataset, batch_size=32)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            model.save(tmpdirname)
+
+            logging.info(f"Saved config mem: {model.quantize_config}")
+
+            with open(tmpdirname + "/" + QUANT_CONFIG_FILENAME, "r") as f:
+                file_dict = json.loads(f.read())
+
+                # make sure the json dict saved to file matches config in memory
+                assert model.quantize_config.to_dict() == file_dict
+                logging.info(f"Saved config file: {file_dict}")
+
+            model = GPTQModel.load(
+                tmpdirname,
+                device=get_best_device(backend),
+                backend=backend,
+            )
+
+            logging.info(f"Loaded config: {model.quantize_config}")
+
+            versionable = model.quantize_config.meta_get_versionable(META_FIELD_QUANTIZER)
+            assert META_QUANTIZER_GPTQMODEL in [v[0] for v in versionable]
+            for producer, _version in versionable:
+                if producer == META_QUANTIZER_GPTQMODEL:
+                    assert _version == __version__
+
+            del model
+            torch_empty_cache()
+
+            # skip compat test with sym=False and v1 since we do meta version safety check
+            if not sym and format == FORMAT.GPTQ or format == FORMAT.IPEX:
+                return
+
+            # test compat: 1) with simple dict type 2) is_marlin_format
+            compat_quantize_config = {
+                "bits": bits,
+                "group_size": 128,
+                "sym": sym,
+                "desc_act": False if format == FORMAT.MARLIN else True,
+                "is_marlin_format": backend == BACKEND.MARLIN,
+            }
+
+            model = GPTQModel.load(
+                tmpdirname,
+                device=get_best_device(backend),
+                quantize_config=compat_quantize_config,
+            )
+            assert isinstance(model.quantize_config, QuantizeConfig)
+
+            del model
+            torch_empty_cache()
+

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,6 @@ def setUpClass(self):`
`49`	`49`
`50`	`50`	`self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_id, use_fast=True)`
`51`	`51`
`52`		`- # auto-round can't use self.load_dataset() from ModelTest`
`53`	`52`	`traindata = load_dataset("json", data_files="/monster/data/model/dataset/c4-train.00000-of-01024.json.gz", split="train")`
`54`	`53`	`self.calibration_dataset = [self.tokenizer(example["text"]) for example in traindata.select(range(128))]`
`55`	`54`
`@@ -60,7 +59,6 @@ def setUpClass(self):`
`60`	`59`	`(QUANT_METHOD.GPTQ, BACKEND.IPEX, False, FORMAT.GPTQ, 4),`
`61`	`60`	`(QUANT_METHOD.GPTQ, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ_V2, 4),`
`62`	`61`	`(QUANT_METHOD.GPTQ, BACKEND.EXLLAMA_V2, False, FORMAT.GPTQ, 4),`
`63`		`- (QUANT_METHOD.AUTO_ROUND, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ, 4),`
`64`	`62`	`]`
`65`	`63`	`)`
`66`	`64`	`def test_quantize(self, method: QUANT_METHOD, backend: BACKEND, sym: bool, format: FORMAT, bits: int):`