|
25 | 25 | from ..looper.named_module import NamedModule
|
26 | 26 | from ..models import BaseGPTQModel
|
27 | 27 | from ..models.writer import (PROCESS_LOG_FWD_TIME, PROCESS_LOG_LAYER, PROCESS_LOG_MODULE,
|
28 |
| - PROCESS_LOG_NAME, PROCESS_LOG_TIME, QUANT_LOG_DAMP, QUANT_LOG_LOSS) |
| 28 | + PROCESS_LOG_NAME, PROCESS_LOG_TIME, QUANT_LOG_DAMP, QUANT_LOG_LOSS, QUANT_LOG_NSAMPLES) |
29 | 29 | from ..quantization.config import QUANT_METHOD, QuantizeConfig
|
30 | 30 | from ..quantization.gptq import CPU
|
31 | 31 | from ..quantization.qqq import QQQ
|
@@ -121,7 +121,7 @@ def process(self, module: NamedModule):
|
121 | 121 | # logger.info(f"Quantizing module START: {name}, {gptq[name].shape()}")
|
122 | 122 | ## Need to return the quantized_weight for offloading
|
123 | 123 | g = gptq[module.name]
|
124 |
| - wq, scale, zero, g_idx, duration, avg_loss, damp_percent, scale_extra = g.quantize() |
| 124 | + wq, scale, zero, g_idx, duration, avg_loss, damp_percent, scale_extra, nsamples = g.quantize() |
125 | 125 | ## Assign the quantized weight to the weight
|
126 | 126 | #gptq[name].layer.weight.data = q_full_weight.to(device=gptq[name].device)
|
127 | 127 |
|
@@ -151,6 +151,7 @@ def process(self, module: NamedModule):
|
151 | 151 | PROCESS_LOG_LAYER: module.layer_index,
|
152 | 152 | PROCESS_LOG_MODULE: module.name,
|
153 | 153 | QUANT_LOG_LOSS: f"{avg_loss:.5f}",
|
| 154 | + QUANT_LOG_NSAMPLES: f"{nsamples}", |
154 | 155 | QUANT_LOG_DAMP: f"{damp_percent:.5f}",
|
155 | 156 | PROCESS_LOG_TIME: f"{duration:.3f}",
|
156 | 157 | PROCESS_LOG_FWD_TIME: f"{self.fwd_time:.3f}",
|
|
0 commit comments