aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@kernel.org>2025-11-12 10:14:33 -0800
committerEric Biggers <ebiggers@kernel.org>2025-11-12 10:15:07 -0800
commit5dc8d277520be6f0be11f36712e557167b3964c8 (patch)
treed437f6e6b5cfcdd3a560ad3b9605ddcdc67b874e /lib
parent8dcac98a477c299f1f2347187979f508e98d6308 (diff)
parent4fa617cc6851488759f79dc8f66822ae332d98f0 (diff)
Merge tag 'arm64-fpsimd-on-stack-for-v6.19' into libcrypto-fpsimd-on-stack
Pull fpsimd-on-stack changes from Ard Biesheuvel: "Shared tag/branch for arm64 FP/SIMD changes going through libcrypto" Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/crc/arm/crc-t10dif.h19
-rw-r--r--lib/crc/arm/crc32.h11
-rw-r--r--lib/crc/arm64/crc-t10dif.h19
-rw-r--r--lib/crc/arm64/crc32.h16
-rw-r--r--lib/raid6/neon.c17
-rw-r--r--lib/raid6/recov_neon.c15
6 files changed, 35 insertions, 62 deletions
diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h
index 63441de5e3f1..afc0ebf97f19 100644
--- a/lib/crc/arm/crc-t10dif.h
+++ b/lib/crc/arm/crc-t10dif.h
@@ -5,7 +5,6 @@
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@@ -19,22 +18,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
if (static_branch_likely(&have_pmull)) {
- if (likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
+ scoped_ksimd()
+ return crc_t10dif_pmull64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
- static_branch_likely(&have_neon) &&
- likely(may_use_simd())) {
+ static_branch_likely(&have_neon)) {
u8 buf[16] __aligned(16);
- kernel_neon_begin();
- crc_t10dif_pmull8(crc, data, length, buf);
- kernel_neon_end();
+ scoped_ksimd()
+ crc_t10dif_pmull8(crc, data, length, buf);
return crc_t10dif_generic(0, buf, sizeof(buf));
}
diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h
index 7b76f52f6907..f33de6b22cd4 100644
--- a/lib/crc/arm/crc32.h
+++ b/lib/crc/arm/crc32.h
@@ -8,7 +8,6 @@
#include <linux/cpufeature.h>
#include <asm/hwcap.h>
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
@@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32_pmull_le(p, n, crc);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_pmull_le(p, n, crc);
p += n;
len -= n;
}
@@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
- kernel_neon_begin();
- crc = crc32c_pmull_le(p, n, crc);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32c_pmull_le(p, n, crc);
p += n;
len -= n;
}
diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h
index f88db2971805..b8338139ed77 100644
--- a/lib/crc/arm64/crc-t10dif.h
+++ b/lib/crc/arm64/crc-t10dif.h
@@ -7,7 +7,6 @@
#include <linux/cpufeature.h>
-#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
@@ -21,22 +20,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
if (static_branch_likely(&have_pmull)) {
- if (likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc_t10dif_pmull_p64(crc, data, length);
- kernel_neon_end();
- return crc;
- }
+ scoped_ksimd()
+ return crc_t10dif_pmull_p64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
- static_branch_likely(&have_asimd) &&
- likely(may_use_simd())) {
+ static_branch_likely(&have_asimd)) {
u8 buf[16];
- kernel_neon_begin();
- crc_t10dif_pmull_p8(crc, data, length, buf);
- kernel_neon_end();
+ scoped_ksimd()
+ crc_t10dif_pmull_p8(crc, data, length, buf);
return crc_t10dif_generic(0, buf, sizeof(buf));
}
diff --git a/lib/crc/arm64/crc32.h b/lib/crc/arm64/crc32.h
index 31e649cd40a2..1939a5dee477 100644
--- a/lib/crc/arm64/crc32.h
+++ b/lib/crc/arm64/crc32.h
@@ -2,7 +2,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <asm/neon.h>
#include <asm/simd.h>
// The minimum input length to consider the 4-way interleaved code path
@@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32_le_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_le_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
@@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32c_le_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32c_le_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
@@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
- kernel_neon_begin();
- crc = crc32_be_arm64_4way(crc, p, len);
- kernel_neon_end();
+ scoped_ksimd()
+ crc = crc32_be_arm64_4way(crc, p, len);
p += round_down(len, 64);
len %= 64;
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 0a2e76035ea9..6d9474ce6da9 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -8,10 +8,9 @@
#include <linux/raid/pq.h>
#ifdef __KERNEL__
-#include <asm/neon.h>
+#include <asm/simd.h>
#else
-#define kernel_neon_begin()
-#define kernel_neon_end()
+#define scoped_ksimd()
#define cpu_has_neon() (1)
#endif
@@ -32,10 +31,9 @@
{ \
void raid6_neon ## _n ## _gen_syndrome_real(int, \
unsigned long, void**); \
- kernel_neon_begin(); \
- raid6_neon ## _n ## _gen_syndrome_real(disks, \
+ scoped_ksimd() \
+ raid6_neon ## _n ## _gen_syndrome_real(disks, \
(unsigned long)bytes, ptrs); \
- kernel_neon_end(); \
} \
static void raid6_neon ## _n ## _xor_syndrome(int disks, \
int start, int stop, \
@@ -43,10 +41,9 @@
{ \
void raid6_neon ## _n ## _xor_syndrome_real(int, \
int, int, unsigned long, void**); \
- kernel_neon_begin(); \
- raid6_neon ## _n ## _xor_syndrome_real(disks, \
- start, stop, (unsigned long)bytes, ptrs); \
- kernel_neon_end(); \
+ scoped_ksimd() \
+ raid6_neon ## _n ## _xor_syndrome_real(disks, \
+ start, stop, (unsigned long)bytes, ptrs);\
} \
struct raid6_calls const raid6_neonx ## _n = { \
raid6_neon ## _n ## _gen_syndrome, \
diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c
index 70e1404c1512..9d99aeabd31a 100644
--- a/lib/raid6/recov_neon.c
+++ b/lib/raid6/recov_neon.c
@@ -7,11 +7,10 @@
#include <linux/raid/pq.h>
#ifdef __KERNEL__
-#include <asm/neon.h>
+#include <asm/simd.h>
#include "neon.h"
#else
-#define kernel_neon_begin()
-#define kernel_neon_end()
+#define scoped_ksimd()
#define cpu_has_neon() (1)
#endif
@@ -55,9 +54,8 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
raid6_gfexp[failb]]];
- kernel_neon_begin();
- __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
- kernel_neon_end();
+ scoped_ksimd()
+ __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
}
static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
@@ -86,9 +84,8 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
/* Now, pick the proper data tables */
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
- kernel_neon_begin();
- __raid6_datap_recov_neon(bytes, p, q, dq, qmul);
- kernel_neon_end();
+ scoped_ksimd()
+ __raid6_datap_recov_neon(bytes, p, q, dq, qmul);
}
const struct raid6_recov_calls raid6_recov_neon = {