From ea68ead90dea3abbfc8bc0c5b3414fb5cc4ca6d1 Mon Sep 17 00:00:00 2001 From: Shauren Date: Sat, 17 Feb 2024 12:01:53 +0100 Subject: Dep/SFMT: Support NEON on ARM with MSVC --- dep/SFMT/SFMT-neon.h | 11 +++++++++-- dep/SFMT/sfmt_01_msvc_arm_support.diff | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 dep/SFMT/sfmt_01_msvc_arm_support.diff diff --git a/dep/SFMT/SFMT-neon.h b/dep/SFMT/SFMT-neon.h index 8790b8b60ee..cffb3a514e0 100644 --- a/dep/SFMT/SFMT-neon.h +++ b/dep/SFMT/SFMT-neon.h @@ -10,6 +10,13 @@ #ifndef SFMT_NEON_H #define SFMT_NEON_H +#ifdef _MSC_VER +// The .n128_u64 field is first. Combine pairs of 32-bit integers in little-endian order. +#define sfmt_neon_init_uint32x4_t(w,x,y,z) { .n128_u32 = { (w), (x), (y), (z) } } +#else +#define sfmt_neon_init_uint32x4_t(w,x,y,z) { (w), (x), (y), (z) } +#endif + inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, uint32x4_t c, uint32x4_t d); @@ -27,8 +34,8 @@ inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, uint32x4_t c, uint32x4_t d) { uint32x4_t v, x, y, z; - static const uint32x4_t vzero = {0,0,0,0}; - static const uint32x4_t vmask = {SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4}; + static const uint32x4_t vzero = sfmt_neon_init_uint32x4_t(0, 0, 0, 0); + static const uint32x4_t vmask = sfmt_neon_init_uint32x4_t(SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4); #define rotate_bytes(A, B, C) vreinterpretq_u32_u8(vextq_u8(vreinterpretq_u8_u32(A),vreinterpretq_u8_u32(B),(C))) diff --git a/dep/SFMT/sfmt_01_msvc_arm_support.diff b/dep/SFMT/sfmt_01_msvc_arm_support.diff new file mode 100644 index 00000000000..769b3c5dfc1 --- /dev/null +++ b/dep/SFMT/sfmt_01_msvc_arm_support.diff @@ -0,0 +1,29 @@ +diff --git a/dep/SFMT/SFMT-neon.h b/dep/SFMT/SFMT-neon.h +index 8790b8b60e..cffb3a514e 100644 +--- a/dep/SFMT/SFMT-neon.h ++++ b/dep/SFMT/SFMT-neon.h +@@ -10,6 +10,13 @@ + #ifndef SFMT_NEON_H + #define SFMT_NEON_H + ++#ifdef _MSC_VER ++// The .n128_u64 field is first. Combine pairs of 32-bit integers in little-endian order. ++#define sfmt_neon_init_uint32x4_t(w,x,y,z) { .n128_u32 = { (w), (x), (y), (z) } } ++#else ++#define sfmt_neon_init_uint32x4_t(w,x,y,z) { (w), (x), (y), (z) } ++#endif ++ + inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, + uint32x4_t c, uint32x4_t d); + +@@ -27,8 +34,8 @@ inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, + uint32x4_t c, uint32x4_t d) + { + uint32x4_t v, x, y, z; +- static const uint32x4_t vzero = {0,0,0,0}; +- static const uint32x4_t vmask = {SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4}; ++ static const uint32x4_t vzero = sfmt_neon_init_uint32x4_t(0, 0, 0, 0); ++ static const uint32x4_t vmask = sfmt_neon_init_uint32x4_t(SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4); + + #define rotate_bytes(A, B, C) vreinterpretq_u32_u8(vextq_u8(vreinterpretq_u8_u32(A),vreinterpretq_u8_u32(B),(C))) + -- cgit v1.2.3