diff options
| author | Eric Biggers <ebiggers@kernel.org> | 2025-07-12 16:22:59 -0700 |
|---|---|---|
| committer | Eric Biggers <ebiggers@kernel.org> | 2025-07-14 11:11:48 -0700 |
| commit | 00d549bb89e471b7df550459fcb51ffbded39cbf (patch) | |
| tree | eb4f699859d57af7537194687503bf92841d1e09 /arch/arm64/crypto | |
| parent | 70cb6ca58fddb02e269fe743ba75d53d577b5b1c (diff) | |
lib/crypto: arm64/sha1: Migrate optimized code into library
Instead of exposing the arm64-optimized SHA-1 code via arm64-specific
crypto_shash algorithms, instead just implement the sha1_blocks()
library function. This is much simpler, it makes the SHA-1 library
functions be arm64-optimized, and it fixes the longstanding issue where
the arm64-optimized SHA-1 code was disabled by default. SHA-1 still
remains available through crypto_shash, but individual architectures no
longer need to handle it.
Remove support for SHA-1 finalization from assembly code, since the
library does not yet support architecture-specific overrides of the
finalization. (Support for that has been omitted for now, for
simplicity and because usually it isn't performance-critical.)
To match sha1_blocks(), change the type of the nblocks parameter and the
return value of __sha1_ce_transform() from int to size_t. Update the
assembly code accordingly.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250712232329.818226-9-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'arch/arm64/crypto')
| -rw-r--r-- | arch/arm64/crypto/Kconfig | 11 | ||||
| -rw-r--r-- | arch/arm64/crypto/Makefile | 3 | ||||
| -rw-r--r-- | arch/arm64/crypto/sha1-ce-core.S | 150 | ||||
| -rw-r--r-- | arch/arm64/crypto/sha1-ce-glue.c | 118 |
4 files changed, 0 insertions, 282 deletions
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index a9ead99f72c2..3bb5b513d5ae 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -25,17 +25,6 @@ config CRYPTO_NHPOLY1305_NEON Architecture: arm64 using: - NEON (Advanced SIMD) extensions -config CRYPTO_SHA1_ARM64_CE - tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_HASH - select CRYPTO_SHA1 - help - SHA-1 secure hash algorithm (FIPS 180) - - Architecture: arm64 using: - - ARMv8 Crypto Extensions - config CRYPTO_SHA3_ARM64 tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 228101f125d5..a8b2cdbe202c 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -5,9 +5,6 @@ # Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> # -obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o -sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o - obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S deleted file mode 100644 index 9b1f2d82a6fe..000000000000 --- a/arch/arm64/crypto/sha1-ce-core.S +++ /dev/null @@ -1,150 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - - .text - .arch armv8-a+crypto - - k0 .req v0 - k1 .req v1 - k2 .req v2 - k3 .req v3 - - t0 .req v4 - t1 .req v5 - - dga .req q6 - dgav .req v6 - dgb .req s7 - dgbv .req v7 - - dg0q .req q12 - dg0s .req s12 - dg0v .req v12 - dg1s .req s13 - dg1v .req v13 - dg2s .req s14 - - .macro add_only, op, ev, rc, s0, dg1 - .ifc \ev, ev - add t1.4s, v\s0\().4s, \rc\().4s - sha1h dg2s, dg0s - .ifnb \dg1 - sha1\op dg0q, \dg1, t0.4s - .else - sha1\op dg0q, dg1s, t0.4s - .endif - .else - .ifnb \s0 - add t0.4s, v\s0\().4s, \rc\().4s - .endif - sha1h dg1s, dg0s - sha1\op dg0q, dg2s, t1.4s - .endif - .endm - - .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 - sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s - add_only \op, \ev, \rc, \s1, \dg1 - sha1su1 v\s0\().4s, v\s3\().4s - .endm - - .macro loadrc, k, val, tmp - movz \tmp, :abs_g0_nc:\val - movk \tmp, :abs_g1:\val - dup \k, \tmp - .endm - - /* - * int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, - * int blocks) - */ -SYM_FUNC_START(__sha1_ce_transform) - /* load round constants */ - loadrc k0.4s, 0x5a827999, w6 - loadrc k1.4s, 0x6ed9eba1, w6 - loadrc k2.4s, 0x8f1bbcdc, w6 - loadrc k3.4s, 0xca62c1d6, w6 - - /* load state */ - ld1 {dgav.4s}, [x0] - ldr dgb, [x0, #16] - - /* load sha1_ce_state::finalize */ - ldr_l w4, sha1_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] - - /* load input */ -0: ld1 {v8.4s-v11.4s}, [x1], #64 - sub w2, w2, #1 - -CPU_LE( rev32 v8.16b, v8.16b ) -CPU_LE( rev32 v9.16b, v9.16b ) -CPU_LE( rev32 v10.16b, v10.16b ) -CPU_LE( rev32 v11.16b, v11.16b ) - -1: add t0.4s, v8.4s, k0.4s - mov dg0v.16b, dgav.16b - - add_update c, ev, k0, 8, 9, 10, 11, dgb - add_update c, od, k0, 9, 10, 11, 8 - add_update c, ev, k0, 10, 11, 8, 9 - add_update c, od, k0, 11, 8, 9, 10 - add_update c, ev, k1, 8, 9, 10, 11 - - add_update p, od, k1, 9, 10, 11, 8 - add_update p, ev, k1, 10, 11, 8, 9 - add_update p, od, k1, 11, 8, 9, 10 - add_update p, ev, k1, 8, 9, 10, 11 - add_update p, od, k2, 9, 10, 11, 8 - - add_update m, ev, k2, 10, 11, 8, 9 - add_update m, od, k2, 11, 8, 9, 10 - add_update m, ev, k2, 8, 9, 10, 11 - add_update m, od, k2, 9, 10, 11, 8 - add_update m, ev, k3, 10, 11, 8, 9 - - add_update p, od, k3, 11, 8, 9, 10 - add_only p, ev, k3, 9 - add_only p, od, k3, 10 - add_only p, ev, k3, 11 - add_only p, od - - /* update state */ - add dgbv.2s, dgbv.2s, dg1v.2s - add dgav.4s, dgav.4s, dg0v.4s - - cbz w2, 2f - cond_yield 3f, x5, x6 - b 0b - - /* - * Final block: add padding and total bit count. - * Skip if the input size was not a round multiple of the block size, - * the padding is handled by the C code in that case. - */ -2: cbz x4, 3f - ldr_l w4, sha1_ce_offsetof_count, x4 - ldr x4, [x0, x4] - movi v9.2d, #0 - mov x8, #0x80000000 - movi v10.2d, #0 - ror x7, x4, #29 // ror(lsl(x4, 3), 32) - fmov d8, x8 - mov x4, #0 - mov v11.d[0], xzr - mov v11.d[1], x7 - b 1b - - /* store new state */ -3: st1 {dgav.4s}, [x0] - str dgb, [x0, #16] - mov w0, w2 - ret -SYM_FUNC_END(__sha1_ce_transform) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c deleted file mode 100644 index 65b6980817e5..000000000000 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ /dev/null @@ -1,118 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions - * - * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - */ - -#include <asm/neon.h> -#include <asm/simd.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <crypto/sha1.h> -#include <crypto/sha1_base.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> - -MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("sha1"); - -struct sha1_ce_state { - struct sha1_state sst; - u32 finalize; -}; - -extern const u32 sha1_ce_offsetof_count; -extern const u32 sha1_ce_offsetof_finalize; - -asmlinkage int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, - int blocks); - -static void sha1_ce_transform(struct sha1_state *sst, u8 const *src, - int blocks) -{ - while (blocks) { - int rem; - - kernel_neon_begin(); - rem = __sha1_ce_transform(container_of(sst, - struct sha1_ce_state, - sst), src, blocks); - kernel_neon_end(); - src += (blocks - rem) * SHA1_BLOCK_SIZE; - blocks = rem; - } -} - -const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); -const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); - -static int sha1_ce_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha1_ce_state *sctx = shash_desc_ctx(desc); - - sctx->finalize = 0; - return sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform); -} - -static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct sha1_ce_state *sctx = shash_desc_ctx(desc); - bool finalized = false; - - /* - * Allow the asm code to perform the finalization if there is no - * partial data and the input is a round multiple of the block size. - */ - if (len >= SHA1_BLOCK_SIZE) { - unsigned int remain = len - round_down(len, SHA1_BLOCK_SIZE); - - finalized = !remain; - sctx->finalize = finalized; - sha1_base_do_update_blocks(desc, data, len, sha1_ce_transform); - data += len - remain; - len = remain; - } - if (!finalized) { - sctx->finalize = 0; - sha1_base_do_finup(desc, data, len, sha1_ce_transform); - } - return sha1_base_finish(desc, out); -} - -static struct shash_alg alg = { - .init = sha1_base_init, - .update = sha1_ce_update, - .finup = sha1_ce_finup, - .descsize = sizeof(struct sha1_ce_state), - .statesize = SHA1_STATE_SIZE, - .digestsize = SHA1_DIGEST_SIZE, - .base = { - .cra_name = "sha1", - .cra_driver_name = "sha1-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | - CRYPTO_AHASH_ALG_FINUP_MAX, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha1_ce_mod_init(void) -{ - return crypto_register_shash(&alg); -} - -static void __exit sha1_ce_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_cpu_feature_match(SHA1, sha1_ce_mod_init); -module_exit(sha1_ce_mod_fini); |
