Skip to content

Commit 663b06b

Browse files
committed
Enable compiling arm/neon with MSVC for windows on arm64
1 parent 07453a2 commit 663b06b

File tree

6 files changed

+417
-282
lines changed

6 files changed

+417
-282
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 246 additions & 252 deletions
Large diffs are not rendered by default.

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include "../types/xsimd_neon64_register.hpp"
2020
#include "../types/xsimd_utils.hpp"
21+
#include "xsimd_neon_dispatcher.hpp"
2122

2223
namespace xsimd
2324
{
@@ -779,8 +780,8 @@ namespace xsimd
779780

780781
#define WRAP_CAST(SUFFIX, TYPE) \
781782
namespace wrap { \
782-
inline float64x2_t vreinterpretq_f64_##SUFFIX(TYPE a) { return ::vreinterpretq_f64_##SUFFIX(a); } \
783-
inline TYPE vreinterpretq_##SUFFIX##_f64(float64x2_t a) { return ::vreinterpretq_##SUFFIX##_f64(a); } \
783+
inline float64x2_t _vreinterpretq_f64_##SUFFIX(TYPE a) { return vreinterpretq_f64_##SUFFIX(a); } \
784+
inline TYPE _vreinterpretq_##SUFFIX##_f64(float64x2_t a) { return vreinterpretq_##SUFFIX##_f64(a); } \
784785
}
785786

786787
WRAP_CAST(u8, uint8x16_t)
@@ -798,19 +799,13 @@ namespace xsimd
798799
template <class A, class T>
799800
batch<double, A> bitwise_cast(batch<T, A> const& arg, batch<double, A> const&, requires_arch<neon64>)
800801
{
801-
using caster_type = detail::bitwise_caster_impl<float64x2_t,
802-
uint8x16_t, int8x16_t,
803-
uint16x8_t, int16x8_t,
804-
uint32x4_t, int32x4_t,
805-
uint64x2_t, int64x2_t,
806-
float32x4_t>;
807-
const caster_type caster = {
808-
std::make_tuple(wrap::vreinterpretq_f64_u8, wrap::vreinterpretq_f64_s8, wrap::vreinterpretq_f64_u16, wrap::vreinterpretq_f64_s16,
809-
wrap::vreinterpretq_f64_u32, wrap::vreinterpretq_f64_s32, wrap::vreinterpretq_f64_u64, wrap::vreinterpretq_f64_s64,
810-
wrap::vreinterpretq_f64_f32)
811-
};
812802
using register_type = typename batch<T, A>::register_type;
813-
return caster.apply(register_type(arg));
803+
register_type result;
804+
NEON_DISPATCHER_UNARY(wrap::_vreinterpretq_f64_u8, wrap::_vreinterpretq_f64_s8, wrap::_vreinterpretq_f64_u16,
805+
wrap::_vreinterpretq_f64_s16, wrap::_vreinterpretq_f64_u32, wrap::_vreinterpretq_f64_s32,
806+
wrap::_vreinterpretq_f64_u64, wrap::_vreinterpretq_f64_s64, wrap::_vreinterpretq_f64_f32,
807+
T, register_type(arg), result);
808+
return result;
814809
}
815810

816811
namespace detail
@@ -834,20 +829,13 @@ namespace xsimd
834829
template <class A, class R>
835830
batch<R, A> bitwise_cast(batch<double, A> const& arg, batch<R, A> const&, requires_arch<neon64>)
836831
{
837-
using caster_type = detail::bitwise_caster_neon64<float64x2_t,
838-
uint8x16_t, int8x16_t,
839-
uint16x8_t, int16x8_t,
840-
uint32x4_t, int32x4_t,
841-
uint64x2_t, int64x2_t,
842-
float32x4_t>;
843-
const caster_type caster = {
844-
std::make_tuple(wrap::vreinterpretq_u8_f64, wrap::vreinterpretq_s8_f64, wrap::vreinterpretq_u16_f64, wrap::vreinterpretq_s16_f64,
845-
wrap::vreinterpretq_u32_f64, wrap::vreinterpretq_s32_f64, wrap::vreinterpretq_u64_f64, wrap::vreinterpretq_s64_f64,
846-
wrap::vreinterpretq_f32_f64)
847-
};
848832
using src_register_type = typename batch<double, A>::register_type;
849833
using dst_register_type = typename batch<R, A>::register_type;
850-
return caster.apply<dst_register_type>(src_register_type(arg));
834+
src_register_type result;
835+
NEON_DISPATCHER_UNARY(wrap::_vreinterpretq_u8_f64, wrap::_vreinterpretq_s8_f64, wrap::_vreinterpretq_u16_f64, wrap::_vreinterpretq_s16_f64,
836+
wrap::_vreinterpretq_u32_f64, wrap::_vreinterpretq_s32_f64, wrap::_vreinterpretq_u64_f64, wrap::_vreinterpretq_s64_f64,
837+
wrap::_vreinterpretq_f32_f64, R, src_register_type(arg), result);
838+
return dst_register_type(result);
851839
}
852840

853841
template <class A>
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
****************************************************************************/
11+
12+
// Few macros to select neon intrinsic function based on the scalar type
13+
#define NEON_DISPATCHER_BINARY(U8, S8, U16, S16, U32, S32, U64, S64, F32, type, arg1, arg2, result)\
14+
if (std::is_same<type, uint8_t>::value) {\
15+
result = U8(arg1, arg2);\
16+
} else if(std::is_same<type, int8_t>::value) {\
17+
result = S8(arg1, arg2);\
18+
} else if(std::is_same<type, uint16_t>::value) {\
19+
result = U16(arg1, arg2);\
20+
} else if(std::is_same<type, int16_t>::value) {\
21+
result = S16(arg1, arg2);\
22+
} else if(std::is_same<type, uint32_t>::value) {\
23+
result = U32(arg1, arg2);\
24+
} else if(std::is_same<type, int32_t>::value) {\
25+
result = S32(arg1, arg2);\
26+
} else if(std::is_same<type, uint64_t>::value) {\
27+
result = U64(arg1, arg2);\
28+
} else if(std::is_same<type, int64_t>::value) {\
29+
result = S64(arg1, arg2);\
30+
} else if(std::is_same<type, float32_t>::value) {\
31+
result = F32(arg1, arg2);\
32+
} else {\
33+
assert(false && "unsupported type");\
34+
}
35+
36+
#define NEON_DISPATCHER_BINARY_EXCLUDE_64(U8, S8, U16, S16, U32, S32, F32, type, arg1, arg2, result)\
37+
if (std::is_same<type, uint8_t>::value) {\
38+
result = U8(arg1, arg2);\
39+
} else if(std::is_same<type, int8_t>::value) {\
40+
result = S8(arg1, arg2);\
41+
} else if(std::is_same<type, uint16_t>::value) {\
42+
result = U16(arg1, arg2);\
43+
} else if(std::is_same<type, int16_t>::value) {\
44+
result = S16(arg1, arg2);\
45+
} else if(std::is_same<type, uint32_t>::value) {\
46+
result = U32(arg1, arg2);\
47+
} else if(std::is_same<type, int32_t>::value) {\
48+
result = S32(arg1, arg2);\
49+
} else if(std::is_same<type, float32_t>::value) {\
50+
result = F32(arg1, arg2);\
51+
} else {\
52+
assert(false && "unsupported type");\
53+
}
54+
55+
#define NEON_DISPATCHER_UNARY(U8, S8, U16, S16, U32, S32, U64, S64, F32, type, arg, result)\
56+
if (std::is_same<type, uint8_t>::value) {\
57+
result = U8(arg);\
58+
} else if(std::is_same<type, int8_t>::value) {\
59+
result = S8(arg);\
60+
} else if(std::is_same<type, uint16_t>::value) {\
61+
result = U16(arg);\
62+
} else if(std::is_same<type, int16_t>::value) {\
63+
result = S16(arg);\
64+
} else if(std::is_same<type, uint32_t>::value) {\
65+
result = U32(arg);\
66+
} else if(std::is_same<type, int32_t>::value) {\
67+
result = S32(arg);\
68+
} else if(std::is_same<type, uint64_t>::value) {\
69+
result = U64(arg);\
70+
} else if(std::is_same<type, int64_t>::value) {\
71+
result = S64(arg);\
72+
} else if(std::is_same<type, float32_t>::value) {\
73+
result = F32(arg);\
74+
} else {\
75+
assert(false && "unsupported type");\
76+
}
77+
78+
#define NEON_DISPATCHER_UNARY_EXCLUDE_64(U8, S8, U16, S16, U32, S32, F32, type, arg, result)\
79+
if (std::is_same<type, uint8_t>::value) {\
80+
result = U8(arg);\
81+
} else if(std::is_same<type, int8_t>::value) {\
82+
result = S8(arg);\
83+
} else if(std::is_same<type, uint16_t>::value) {\
84+
result = U16(arg);\
85+
} else if(std::is_same<type, int16_t>::value) {\
86+
result = S16(arg);\
87+
} else if(std::is_same<type, uint32_t>::value) {\
88+
result = U32(arg);\
89+
} else if(std::is_same<type, int32_t>::value) {\
90+
result = S32(arg);\
91+
} else if(std::is_same<type, float32_t>::value) {\
92+
result = F32(arg);\
93+
} else {\
94+
assert(false && "unsupported type");\
95+
}
96+
97+
#define NEON_DISPATCHER_SELECT(U8, S8, U16, S16, U32, S32, U64, S64, F32, type, cond, arg1, arg2, result)\
98+
if (std::is_same<type, uint8_t>::value) {\
99+
result = U8(cond, arg1, arg2);\
100+
} else if(std::is_same<type, int8_t>::value) {\
101+
result = S8(cond, arg1, arg2);\
102+
} else if(std::is_same<type, uint16_t>::value) {\
103+
result = U16(cond, arg1, arg2);\
104+
} else if(std::is_same<type, int16_t>::value) {\
105+
result = S16(cond, arg1, arg2);\
106+
} else if(std::is_same<type, uint32_t>::value) {\
107+
result = U32(cond, arg1, arg2);\
108+
} else if(std::is_same<type, int32_t>::value) {\
109+
result = S32(cond, arg1, arg2);\
110+
} else if(std::is_same<type, uint64_t>::value) {\
111+
result = U64(cond, arg1, arg2);\
112+
} else if(std::is_same<type, int64_t>::value) {\
113+
result = S64(cond, arg1, arg2);\
114+
} else if(std::is_same<type, float32_t>::value) {\
115+
result = F32(cond, arg1, arg2);\
116+
} else {\
117+
assert(false && "unsupported type");\
118+
}
119+

include/xsimd/config/xsimd_config.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,9 @@
213213
#else
214214
#define XSIMD_WITH_NEON64 0
215215
#endif
216+
#elif defined(_MSC_VER) && defined(_M_ARM64)
217+
#define XSIMD_WITH_NEON 1
218+
#define XSIMD_WITH_NEON64 1
216219
#else
217220
#define XSIMD_WITH_NEON 0
218221
#define XSIMD_WITH_NEON64 0

include/xsimd/types/xsimd_batch.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ namespace xsimd
183183
private:
184184

185185
template<size_t... Is>
186-
batch(T const* data, detail::index_sequence<Is...>);
186+
batch(T const* data, xsimd::detail::index_sequence<Is...>);
187187

188188
batch logical_and(batch const& other) const;
189189
batch logical_or(batch const& other) const;
@@ -242,13 +242,13 @@ namespace xsimd
242242
private:
243243

244244
template<size_t... Is>
245-
batch_bool(bool const* data, detail::index_sequence<Is...>);
245+
batch_bool(bool const* data, xsimd::detail::index_sequence<Is...>);
246246

247247
template <class U, class... V, size_t I, size_t... Is>
248-
static register_type make_register(detail::index_sequence<I, Is...>, U u, V... v);
248+
static register_type make_register(xsimd::detail::index_sequence<I, Is...>, U u, V... v);
249249

250250
template <class... V>
251-
static register_type make_register(detail::index_sequence<>, V... v);
251+
static register_type make_register(xsimd::detail::index_sequence<>, V... v);
252252
};
253253

254254
template <class T, class A>

include/xsimd/types/xsimd_neon_register.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,37 @@ namespace xsimd
150150
: detail::neon_bool_simd_register<T, neon>
151151
{
152152
};
153+
154+
// Few macros and function to support MSVC
155+
#if defined(_MSC_VER) && !defined(__clang__)
156+
#define INITIALIZER_LIST_TO_NEON_VECTOR(T, args) (neon_vector_initializer_constructor<T>(args))
157+
// Convert an initialiser list to neon vector type
158+
// Note: MSVC does not provide a initialiser_list constructor for neon vector type.
159+
template<class S, class T>
160+
S neon_vector_initializer_constructor(std::initializer_list<T> data){
161+
S target;
162+
if (std::is_signed<T>::value) {
163+
switch(data.size()) {
164+
case 16: std::copy(data.begin(), data.end(), target.n128_i8); break;
165+
case 8: std::copy(data.begin(), data.end(), target.n128_i16); break;
166+
case 4: std::copy(data.begin(), data.end(), target.n128_i32); break;
167+
case 2: std::copy(data.begin(), data.end(), target.n128_i64); break;
168+
}
169+
} else {
170+
switch(data.size()) {
171+
case 16: std::copy(data.begin(), data.end(), target.n128_u8); break;
172+
case 8: std::copy(data.begin(), data.end(), target.n128_u16); break;
173+
case 4: std::copy(data.begin(), data.end(), target.n128_u32); break;
174+
case 2: std::copy(data.begin(), data.end(), target.n128_u64); break;
175+
}
176+
}
177+
return target;
178+
}
179+
#define REINTERPRET_CAST(T, R) (R)
180+
#else
181+
#define INITIALIZER_LIST_TO_NEON_VECTOR(T, args) (T args)
182+
#define REINTERPRET_CAST(T, R) reinterpret_cast<T>(R)
183+
#endif
153184

154185
}
155186
#endif

0 commit comments

Comments
 (0)