diff options
| author | Eric Biggers <ebiggers@kernel.org> | 2025-11-12 10:14:33 -0800 |
|---|---|---|
| committer | Eric Biggers <ebiggers@kernel.org> | 2025-11-12 10:15:07 -0800 |
| commit | 5dc8d277520be6f0be11f36712e557167b3964c8 (patch) | |
| tree | d437f6e6b5cfcdd3a560ad3b9605ddcdc67b874e /lib | |
| parent | 8dcac98a477c299f1f2347187979f508e98d6308 (diff) | |
| parent | 4fa617cc6851488759f79dc8f66822ae332d98f0 (diff) | |
Merge tag 'arm64-fpsimd-on-stack-for-v6.19' into libcrypto-fpsimd-on-stack
Pull fpsimd-on-stack changes from Ard Biesheuvel:
"Shared tag/branch for arm64 FP/SIMD changes going through libcrypto"
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/crc/arm/crc-t10dif.h | 19 | ||||
| -rw-r--r-- | lib/crc/arm/crc32.h | 11 | ||||
| -rw-r--r-- | lib/crc/arm64/crc-t10dif.h | 19 | ||||
| -rw-r--r-- | lib/crc/arm64/crc32.h | 16 | ||||
| -rw-r--r-- | lib/raid6/neon.c | 17 | ||||
| -rw-r--r-- | lib/raid6/recov_neon.c | 15 |
6 files changed, 35 insertions, 62 deletions
diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h index 63441de5e3f1..afc0ebf97f19 100644 --- a/lib/crc/arm/crc-t10dif.h +++ b/lib/crc/arm/crc-t10dif.h @@ -5,7 +5,6 @@ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> */ -#include <asm/neon.h> #include <asm/simd.h> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); @@ -19,22 +18,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len, static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { if (static_branch_likely(&have_pmull)) { - if (likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc_t10dif_pmull64(crc, data, length); - kernel_neon_end(); - return crc; - } + scoped_ksimd() + return crc_t10dif_pmull64(crc, data, length); } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && - static_branch_likely(&have_neon) && - likely(may_use_simd())) { + static_branch_likely(&have_neon)) { u8 buf[16] __aligned(16); - kernel_neon_begin(); - crc_t10dif_pmull8(crc, data, length, buf); - kernel_neon_end(); + scoped_ksimd() + crc_t10dif_pmull8(crc, data, length, buf); return crc_t10dif_generic(0, buf, sizeof(buf)); } diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h index 7b76f52f6907..f33de6b22cd4 100644 --- a/lib/crc/arm/crc32.h +++ b/lib/crc/arm/crc32.h @@ -8,7 +8,6 @@ #include <linux/cpufeature.h> #include <asm/hwcap.h> -#include <asm/neon.h> #include <asm/simd.h> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32); @@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) len -= n; } n = round_down(len, 16); - kernel_neon_begin(); - crc = crc32_pmull_le(p, n, crc); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_pmull_le(p, n, crc); p += n; len -= n; } @@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) len -= n; } n = round_down(len, 16); - kernel_neon_begin(); - crc = crc32c_pmull_le(p, n, crc); - kernel_neon_end(); + scoped_ksimd() + crc = crc32c_pmull_le(p, n, crc); p += n; len -= n; } diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h index f88db2971805..b8338139ed77 100644 --- a/lib/crc/arm64/crc-t10dif.h +++ b/lib/crc/arm64/crc-t10dif.h @@ -7,7 +7,6 @@ #include <linux/cpufeature.h> -#include <asm/neon.h> #include <asm/simd.h> static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd); @@ -21,22 +20,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len); static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { if (static_branch_likely(&have_pmull)) { - if (likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc_t10dif_pmull_p64(crc, data, length); - kernel_neon_end(); - return crc; - } + scoped_ksimd() + return crc_t10dif_pmull_p64(crc, data, length); } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && - static_branch_likely(&have_asimd) && - likely(may_use_simd())) { + static_branch_likely(&have_asimd)) { u8 buf[16]; - kernel_neon_begin(); - crc_t10dif_pmull_p8(crc, data, length, buf); - kernel_neon_end(); + scoped_ksimd() + crc_t10dif_pmull_p8(crc, data, length, buf); return crc_t10dif_generic(0, buf, sizeof(buf)); } diff --git a/lib/crc/arm64/crc32.h b/lib/crc/arm64/crc32.h index 31e649cd40a2..1939a5dee477 100644 --- a/lib/crc/arm64/crc32.h +++ b/lib/crc/arm64/crc32.h @@ -2,7 +2,6 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <asm/neon.h> #include <asm/simd.h> // The minimum input length to consider the 4-way interleaved code path @@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32_le_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_le_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; @@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32c_le_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32c_le_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; @@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32_be_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_be_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 0a2e76035ea9..6d9474ce6da9 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c @@ -8,10 +8,9 @@ #include <linux/raid/pq.h> #ifdef __KERNEL__ -#include <asm/neon.h> +#include <asm/simd.h> #else -#define kernel_neon_begin() -#define kernel_neon_end() +#define scoped_ksimd() #define cpu_has_neon() (1) #endif @@ -32,10 +31,9 @@ { \ void raid6_neon ## _n ## _gen_syndrome_real(int, \ unsigned long, void**); \ - kernel_neon_begin(); \ - raid6_neon ## _n ## _gen_syndrome_real(disks, \ + scoped_ksimd() \ + raid6_neon ## _n ## _gen_syndrome_real(disks, \ (unsigned long)bytes, ptrs); \ - kernel_neon_end(); \ } \ static void raid6_neon ## _n ## _xor_syndrome(int disks, \ int start, int stop, \ @@ -43,10 +41,9 @@ { \ void raid6_neon ## _n ## _xor_syndrome_real(int, \ int, int, unsigned long, void**); \ - kernel_neon_begin(); \ - raid6_neon ## _n ## _xor_syndrome_real(disks, \ - start, stop, (unsigned long)bytes, ptrs); \ - kernel_neon_end(); \ + scoped_ksimd() \ + raid6_neon ## _n ## _xor_syndrome_real(disks, \ + start, stop, (unsigned long)bytes, ptrs);\ } \ struct raid6_calls const raid6_neonx ## _n = { \ raid6_neon ## _n ## _gen_syndrome, \ diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c index 70e1404c1512..9d99aeabd31a 100644 --- a/lib/raid6/recov_neon.c +++ b/lib/raid6/recov_neon.c @@ -7,11 +7,10 @@ #include <linux/raid/pq.h> #ifdef __KERNEL__ -#include <asm/neon.h> +#include <asm/simd.h> #include "neon.h" #else -#define kernel_neon_begin() -#define kernel_neon_end() +#define scoped_ksimd() #define cpu_has_neon() (1) #endif @@ -55,9 +54,8 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; - kernel_neon_begin(); - __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); - kernel_neon_end(); + scoped_ksimd() + __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); } static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, @@ -86,9 +84,8 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, /* Now, pick the proper data tables */ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; - kernel_neon_begin(); - __raid6_datap_recov_neon(bytes, p, q, dq, qmul); - kernel_neon_end(); + scoped_ksimd() + __raid6_datap_recov_neon(bytes, p, q, dq, qmul); } const struct raid6_recov_calls raid6_recov_neon = { |
