Skip to content

Commit c64b5e6

Browse files
revert quant format test of auto_round (#1165)
* move auto_round test to a new file * use wiki text * ignore CI for test_quant_time.py,test_packing_speed.py
1 parent 7aa59a0 commit c64b5e6

File tree

3 files changed

+139
-4
lines changed

3 files changed

+139
-4
lines changed

.github/workflows/unit_tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ env:
6262
MAX_JOBS: 8
6363
RUNNER: 10.0.13.31
6464
TRANSFORMERS_DIFF_TESTS: "models/test_internlm.py,models/test_internlm2_5.py,models/test_xverse.py"
65-
TORCH_2_5_TESTS: "test_evalplus.py,test_perplexity.py,test_q4_ipex.py,test_ipex_xpu.py,test_save_loaded_quantized_model.py,test_quant_formats.py,models/test_hymba.py,test_packing_speed.py"
66-
IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py"
65+
TORCH_2_5_TESTS: "test_evalplus.py,test_perplexity.py,test_q4_ipex.py,test_ipex_xpu.py,test_save_loaded_quantized_model.py,test_quant_formats.py,models/test_hymba.py"
66+
IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py,test_quant_time.py,test_packing_speed.py"
6767
GPTQMODEL_FORCE_BUILD: 1
6868
repo: ${{ github.event.inputs.repo || github.repository }}
6969
ref: ${{ github.event.inputs.ref || github.ref }}

tests/test_quant_formats.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def setUpClass(self):
4949

5050
self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_id, use_fast=True)
5151

52-
# auto-round can't use self.load_dataset() from ModelTest
5352
traindata = load_dataset("json", data_files="/monster/data/model/dataset/c4-train.00000-of-01024.json.gz", split="train")
5453
self.calibration_dataset = [self.tokenizer(example["text"]) for example in traindata.select(range(128))]
5554

@@ -60,7 +59,6 @@ def setUpClass(self):
6059
(QUANT_METHOD.GPTQ, BACKEND.IPEX, False, FORMAT.GPTQ, 4),
6160
(QUANT_METHOD.GPTQ, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ_V2, 4),
6261
(QUANT_METHOD.GPTQ, BACKEND.EXLLAMA_V2, False, FORMAT.GPTQ, 4),
63-
(QUANT_METHOD.AUTO_ROUND, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ, 4),
6462
]
6563
)
6664
def test_quantize(self, method: QUANT_METHOD, backend: BACKEND, sym: bool, format: FORMAT, bits: int):
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# Copyright 2025 ModelCloud
2+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# -- do not touch
17+
import os
18+
19+
20+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
21+
# -- end do not touch
22+
23+
import json # noqa: E402
24+
import logging # noqa: E402
25+
import tempfile # noqa: E402
26+
27+
from datasets import load_dataset # noqa: E402
28+
from parameterized import parameterized # noqa: E402
29+
from transformers import AutoTokenizer # noqa: E402
30+
31+
from gptqmodel import BACKEND, GPTQModel, __version__, get_best_device # noqa: E402
32+
from gptqmodel.quantization import FORMAT, QUANT_CONFIG_FILENAME, QUANT_METHOD # noqa: E402
33+
from gptqmodel.quantization.config import ( # noqa: E402
34+
META_FIELD_QUANTIZER,
35+
META_QUANTIZER_GPTQMODEL,
36+
AutoRoundQuantizeConfig,
37+
QuantizeConfig,
38+
)
39+
from gptqmodel.utils.torch import torch_empty_cache # noqa: E402
40+
from models.model_test import ModelTest # noqa: E402
41+
42+
43+
class TestQuantization(ModelTest):
44+
45+
@classmethod
46+
def setUpClass(self):
47+
self.pretrained_model_id = "/monster/data/model/TinyLlama-1.1B-intermediate-step-1431k-3T"
48+
49+
self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model_id, use_fast=True)
50+
51+
traindata = load_dataset("wikitext", "wikitext-2-raw-v1", split="train").filter(lambda x: len(x['text']) >= 512)
52+
self.calibration_dataset = [self.tokenizer(example["text"]) for example in traindata.select(range(1024))]
53+
54+
55+
@parameterized.expand(
56+
[
57+
(QUANT_METHOD.AUTO_ROUND, BACKEND.EXLLAMA_V2, True, FORMAT.GPTQ, 4),
58+
]
59+
)
60+
def test_quantize(self, method: QUANT_METHOD, backend: BACKEND, sym: bool, format: FORMAT, bits: int):
61+
if method == QUANT_METHOD.GPTQ:
62+
quantize_config = QuantizeConfig(
63+
bits=bits,
64+
group_size=128,
65+
desc_act=False if format == FORMAT.MARLIN else True,
66+
sym=sym,
67+
format=format,
68+
damp_percent=0.05
69+
)
70+
elif method == QUANT_METHOD.AUTO_ROUND:
71+
quantize_config = AutoRoundQuantizeConfig(
72+
bits=bits,
73+
group_size=128,
74+
sym=sym,
75+
format=format,
76+
)
77+
else:
78+
raise ValueError(f"Invalid quantization method: {method}")
79+
80+
model = GPTQModel.load(
81+
self.pretrained_model_id,
82+
quantize_config=quantize_config,
83+
)
84+
model.quantize(self.calibration_dataset, batch_size=32)
85+
86+
with tempfile.TemporaryDirectory() as tmpdirname:
87+
model.save(tmpdirname)
88+
89+
logging.info(f"Saved config mem: {model.quantize_config}")
90+
91+
with open(tmpdirname + "/" + QUANT_CONFIG_FILENAME, "r") as f:
92+
file_dict = json.loads(f.read())
93+
94+
# make sure the json dict saved to file matches config in memory
95+
assert model.quantize_config.to_dict() == file_dict
96+
logging.info(f"Saved config file: {file_dict}")
97+
98+
model = GPTQModel.load(
99+
tmpdirname,
100+
device=get_best_device(backend),
101+
backend=backend,
102+
)
103+
104+
logging.info(f"Loaded config: {model.quantize_config}")
105+
106+
versionable = model.quantize_config.meta_get_versionable(META_FIELD_QUANTIZER)
107+
assert META_QUANTIZER_GPTQMODEL in [v[0] for v in versionable]
108+
for producer, _version in versionable:
109+
if producer == META_QUANTIZER_GPTQMODEL:
110+
assert _version == __version__
111+
112+
del model
113+
torch_empty_cache()
114+
115+
# skip compat test with sym=False and v1 since we do meta version safety check
116+
if not sym and format == FORMAT.GPTQ or format == FORMAT.IPEX:
117+
return
118+
119+
# test compat: 1) with simple dict type 2) is_marlin_format
120+
compat_quantize_config = {
121+
"bits": bits,
122+
"group_size": 128,
123+
"sym": sym,
124+
"desc_act": False if format == FORMAT.MARLIN else True,
125+
"is_marlin_format": backend == BACKEND.MARLIN,
126+
}
127+
128+
model = GPTQModel.load(
129+
tmpdirname,
130+
device=get_best_device(backend),
131+
quantize_config=compat_quantize_config,
132+
)
133+
assert isinstance(model.quantize_config, QuantizeConfig)
134+
135+
del model
136+
torch_empty_cache()
137+

0 commit comments

Comments
 (0)