-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Fix ARM builds #1529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Fix ARM builds #1529
Changes from 7 commits
5baa276
0c81f25
5824bac
5433ab2
4eb6bff
8d7d9bf
0d78609
6b6a34f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -170,7 +170,7 @@ struct MulMat { | |
| funcs[n_left-1](n, vx, bx, info, nrc_x); | ||
| } | ||
| } | ||
| static IQK_NOINLINE bool set_mul_mat(int typeA, int typeB,int ne00, MulMat& mm, int Ny); | ||
| static IQK_NOINLINE bool set_mul_mat(int typeA, int ne00, MulMat& mm, int& row_size_q8, int Ny); | ||
| private: | ||
| template <typename Dequantizer> static IQK_NOINLINE void set_functions(MulMat& m); | ||
| }; | ||
|
|
@@ -1020,8 +1020,9 @@ bool iqk_mul_mat(long Nx, long Ny, long ne00, | |
| float * C, long stride_C, int ith, int nth) { | ||
|
|
||
| MulMat mm; | ||
| int row_size_q8; | ||
|
|
||
| if (!MulMat::set_mul_mat(typeA, typeB, ne00, mm, Ny)) { | ||
| if (!MulMat::set_mul_mat(typeA, ne00, mm, row_size_q8, Ny)) { | ||
| return false; | ||
| } | ||
|
|
||
|
|
@@ -4444,13 +4445,14 @@ template <int nrc> struct Q80 { | |
| } | ||
|
|
||
| inline const int8_t * quant_data(int iy, int i) const { | ||
| const block_q8_0_x4 * y4 = (const block_q8_0_x4 *)y[iy] + i; | ||
| const block_q8_0 * y4 = (const block_q8_0 *)y[iy] + i; | ||
| return y4->qs; | ||
| } | ||
|
|
||
| inline float16x4_t load_scales(int iy, int i) const { | ||
| const block_q8_0_x4 * y4 = (const block_q8_0_x4 *)y[iy] + i; | ||
| return vld1_f16((const float16_t *)y4->d); | ||
| const block_q8_0 * y4 = (const block_q8_0 *)y[iy] + i; | ||
| float16_t d_val = GGML_FP16_TO_FP32(y4->d); | ||
| return vdup_n_f16(d_val); | ||
|
Comment on lines
+4453
to
+4455
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The conversion from return vdup_n_f16(y4->d);There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @johnnynunez Since you have tested on your device, could you please attach your running screenshot and your platform info from nvidia-smi and lscpu, for example? ( Because our next version will drop the support for llamafile ( for performance consideration) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GH200 @KMSorSMS Architecture: aarch64
CPU op-mode(s): 64-bit
Byte Order: Little Endian
CPU(s): 64
On-line CPU(s) list: 0-63
Vendor ID: ARM
Model name: Neoverse-V2
Model: 0
Thread(s) per core: 1
Core(s) per socket: 64
Socket(s): 1
Stepping: r0p0
BogoMIPS: 2000.00
Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc f
lagm sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bf16 dgh bti
NUMA:
NUMA node(s): 9
NUMA node0 CPU(s): 0-63
NUMA node1 CPU(s):
NUMA node2 CPU(s):
NUMA node3 CPU(s):
NUMA node4 CPU(s):
NUMA node5 CPU(s):
NUMA node6 CPU(s):
NUMA node7 CPU(s):
NUMA node8 CPU(s):
Vulnerabilities:
Gather data sampling: Not affected
Itlb multihit: Not affected
L1tf: Not affected
Mds: Not affected
Meltdown: Not affected
Mmio stale data: Not affected
Reg file data sampling: Not affected
Retbleed: Not affected
Spec rstack overflow: Not affected
Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl
Spectre v1: Mitigation; __user pointer sanitization
Spectre v2: Not affected
Srbds: Not affected
Tsx async abort: Not affected |
||
| } | ||
|
|
||
| template <typename Dequantizer> | ||
|
|
@@ -4485,13 +4487,17 @@ template <int nrc> struct Q81 { | |
| } | ||
|
|
||
| inline const int8_t * quant_data(int iy, int i) const { | ||
| const block_q8_1_x4 * y4 = (const block_q8_1_x4 *)y[iy] + i; | ||
| const block_q8_1 * y4 = (const block_q8_1 *)y[iy] + i; | ||
| return y4->qs; | ||
| } | ||
|
|
||
| inline float16x8_t load_scales(int iy, int i) const { | ||
| const block_q8_1_x4 * y4 = (const block_q8_1_x4 *)y[iy] + i; | ||
| return vld1q_f16((const float16_t *)y4->d); | ||
| const block_q8_1 * y4 = (const block_q8_1 *)y[iy] + i; | ||
| float16_t d_val = GGML_FP16_TO_FP32(y4->d); | ||
| float16_t s_val = GGML_FP16_TO_FP32(y4->s); | ||
| float16x4_t d_vec = vdup_n_f16(d_val); | ||
| float16x4_t s_vec = vdup_n_f16(s_val); | ||
| return vcombine_f16(d_vec, s_vec); | ||
| } | ||
|
|
||
| template <typename Dequantizer> | ||
|
|
||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change removes support for Windows (
win32) and other non-Linux platforms from theget_cpu_instructmethod. The previous implementation had specific logic for Windows and a fallback for other platforms. On non-Linux systems, this function will now returnNone, which will likely cause failures later in the build process. While this PR focuses on ARM (which is often Linux-based), breaking existing platform support is a significant regression. Please restore the support for other platforms like Windows to avoid breaking builds for other users.