Skip to content

Commit 775d775

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents 69ced25 + 21c17b5 commit 775d775

File tree

7 files changed

+193
-72
lines changed

7 files changed

+193
-72
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1070,7 +1070,8 @@ jobs:
10701070
write-host "Downloading AMD HIP SDK Installer"
10711071
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
10721072
write-host "Installing AMD HIP SDK"
1073-
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1073+
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
1074+
$proc.WaitForExit(600000)
10741075
write-host "Completed AMD HIP SDK installation"
10751076
10761077
- name: Verify ROCm

.github/workflows/release.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,8 @@ jobs:
557557
write-host "Downloading AMD HIP SDK Installer"
558558
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
559559
write-host "Installing AMD HIP SDK"
560-
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
560+
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
561+
$proc.WaitForExit(600000)
561562
write-host "Completed AMD HIP SDK installation"
562563
563564
- name: Verify ROCm

convert_hf_to_gguf.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,12 @@ def _find_param(self, obj: dict[str, Any], keys: Iterable[str], optional: bool =
13441344
return None
13451345
raise KeyError(f"could not find any of: {keys}")
13461346

1347+
def tensor_force_quant(self, name, new_name, bid, n_dims):
1348+
del bid, name, n_dims # unused
1349+
if ".patch_embd.weight" in new_name:
1350+
return gguf.GGMLQuantizationType.F16 if self.ftype == gguf.LlamaFileType.MOSTLY_F16 else gguf.GGMLQuantizationType.F32
1351+
return False
1352+
13471353

13481354
@ModelBase.register("GPTNeoXForCausalLM")
13491355
class GPTNeoXModel(TextModel):
@@ -2315,10 +2321,9 @@ def set_gguf_parameters(self):
23152321
self.gguf_writer.add_vision_use_gelu(True)
23162322

23172323
def tensor_force_quant(self, name, new_name, bid, n_dims):
2318-
del bid, new_name, n_dims # unused
23192324
if ".embeddings." in name:
23202325
return gguf.GGMLQuantizationType.F32
2321-
return False
2326+
return super().tensor_force_quant(name, new_name, bid, n_dims)
23222327

23232328
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
23242329
del bid # unused
@@ -3306,12 +3311,9 @@ def set_gguf_parameters(self):
33063311
self.gguf_writer.add_vision_attention_layernorm_eps(self.global_config.get("rms_norm_eps", 1e-6))
33073312

33083313
def tensor_force_quant(self, name, new_name, bid, n_dims):
3309-
del bid, name, n_dims # unused
3310-
if ".patch_embd." in new_name:
3311-
return gguf.GGMLQuantizationType.F16
33123314
if ".position_embd." in new_name:
33133315
return gguf.GGMLQuantizationType.F32
3314-
return False
3316+
return super().tensor_force_quant(name, new_name, bid, n_dims)
33153317

33163318
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
33173319
del bid # unused
@@ -3384,10 +3386,9 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
33843386
yield ("audio_tower.embed_positions.weight", pos_embd)
33853387

33863388
def tensor_force_quant(self, name, new_name, bid, n_dims):
3387-
del bid, new_name, n_dims # unused
33883389
if ".conv" in name and ".weight" in name:
33893390
return gguf.GGMLQuantizationType.F16
3390-
return False
3391+
return super().tensor_force_quant(name, new_name, bid, n_dims)
33913392

33923393
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
33933394
if name.startswith("thinker."):
@@ -3433,12 +3434,9 @@ def set_gguf_parameters(self):
34333434
self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / downsample_ratio))
34343435

34353436
def tensor_force_quant(self, name, new_name, bid, n_dims):
3436-
del bid, name, n_dims # unused
3437-
if ".patch_embd." in new_name:
3438-
return gguf.GGMLQuantizationType.F16
34393437
if ".position_embd." in new_name:
34403438
return gguf.GGMLQuantizationType.F32
3441-
return False
3439+
return super().tensor_force_quant(name, new_name, bid, n_dims)
34423440

34433441
def _mapping_interns1_name(self, name):
34443442
names_map = {
@@ -5072,13 +5070,12 @@ def set_gguf_parameters(self):
50725070
self.gguf_writer.add_vision_projector_scale_factor(proj_scale_factor)
50735071

50745072
def tensor_force_quant(self, name, new_name, bid, n_dims):
5075-
del bid, new_name, n_dims # unused
50765073
# related to https://github.com/ggml-org/llama.cpp/issues/13025
50775074
if "input_projection" in name:
50785075
return gguf.GGMLQuantizationType.F16
50795076
if ".embeddings." in name:
50805077
return gguf.GGMLQuantizationType.F32
5081-
return False
5078+
return super().tensor_force_quant(name, new_name, bid, n_dims)
50825079

50835080
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
50845081
del bid # unused
@@ -7737,10 +7734,9 @@ def set_gguf_parameters(self):
77377734
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-5))
77387735

77397736
def tensor_force_quant(self, name, new_name, bid, n_dims):
7740-
del bid, new_name, n_dims # unused
77417737
if ".conv" in name and ".weight" in name:
77427738
return gguf.GGMLQuantizationType.F16
7743-
return False
7739+
return super().tensor_force_quant(name, new_name, bid, n_dims)
77447740

77457741
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
77467742
del bid # unused

0 commit comments

Comments
 (0)