diff options
author | Shauren <shauren.trinity@gmail.com> | 2024-02-17 12:01:53 +0100 |
---|---|---|
committer | Shauren <shauren.trinity@gmail.com> | 2024-02-17 12:01:53 +0100 |
commit | ea68ead90dea3abbfc8bc0c5b3414fb5cc4ca6d1 (patch) | |
tree | 2160fe2275d03f061d17f5924b285f0b70d04bac /dep | |
parent | ddba74567c9f6e9cf4596ea9b052d254a0ee50f6 (diff) |
Dep/SFMT: Support NEON on ARM with MSVC
Diffstat (limited to 'dep')
-rw-r--r-- | dep/SFMT/SFMT-neon.h | 11 | ||||
-rw-r--r-- | dep/SFMT/sfmt_01_msvc_arm_support.diff | 29 |
2 files changed, 38 insertions, 2 deletions
diff --git a/dep/SFMT/SFMT-neon.h b/dep/SFMT/SFMT-neon.h index 8790b8b60ee..cffb3a514e0 100644 --- a/dep/SFMT/SFMT-neon.h +++ b/dep/SFMT/SFMT-neon.h @@ -10,6 +10,13 @@ #ifndef SFMT_NEON_H #define SFMT_NEON_H +#ifdef _MSC_VER +// The .n128_u64 field is first. Combine pairs of 32-bit integers in little-endian order. +#define sfmt_neon_init_uint32x4_t(w,x,y,z) { .n128_u32 = { (w), (x), (y), (z) } } +#else +#define sfmt_neon_init_uint32x4_t(w,x,y,z) { (w), (x), (y), (z) } +#endif + inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, uint32x4_t c, uint32x4_t d); @@ -27,8 +34,8 @@ inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, uint32x4_t c, uint32x4_t d) { uint32x4_t v, x, y, z; - static const uint32x4_t vzero = {0,0,0,0}; - static const uint32x4_t vmask = {SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4}; + static const uint32x4_t vzero = sfmt_neon_init_uint32x4_t(0, 0, 0, 0); + static const uint32x4_t vmask = sfmt_neon_init_uint32x4_t(SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4); #define rotate_bytes(A, B, C) vreinterpretq_u32_u8(vextq_u8(vreinterpretq_u8_u32(A),vreinterpretq_u8_u32(B),(C))) diff --git a/dep/SFMT/sfmt_01_msvc_arm_support.diff b/dep/SFMT/sfmt_01_msvc_arm_support.diff new file mode 100644 index 00000000000..769b3c5dfc1 --- /dev/null +++ b/dep/SFMT/sfmt_01_msvc_arm_support.diff @@ -0,0 +1,29 @@ +diff --git a/dep/SFMT/SFMT-neon.h b/dep/SFMT/SFMT-neon.h +index 8790b8b60e..cffb3a514e 100644 +--- a/dep/SFMT/SFMT-neon.h ++++ b/dep/SFMT/SFMT-neon.h +@@ -10,6 +10,13 @@ + #ifndef SFMT_NEON_H + #define SFMT_NEON_H + ++#ifdef _MSC_VER ++// The .n128_u64 field is first. Combine pairs of 32-bit integers in little-endian order. ++#define sfmt_neon_init_uint32x4_t(w,x,y,z) { .n128_u32 = { (w), (x), (y), (z) } } ++#else ++#define sfmt_neon_init_uint32x4_t(w,x,y,z) { (w), (x), (y), (z) } ++#endif ++ + inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, + uint32x4_t c, uint32x4_t d); + +@@ -27,8 +34,8 @@ inline static void neon_recursion(uint32x4_t * r, uint32x4_t a, uint32x4_t b, + uint32x4_t c, uint32x4_t d) + { + uint32x4_t v, x, y, z; +- static const uint32x4_t vzero = {0,0,0,0}; +- static const uint32x4_t vmask = {SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4}; ++ static const uint32x4_t vzero = sfmt_neon_init_uint32x4_t(0, 0, 0, 0); ++ static const uint32x4_t vmask = sfmt_neon_init_uint32x4_t(SFMT_MSK1, SFMT_MSK2, SFMT_MSK3, SFMT_MSK4); + + #define rotate_bytes(A, B, C) vreinterpretq_u32_u8(vextq_u8(vreinterpretq_u8_u32(A),vreinterpretq_u8_u32(B),(C))) + |