/* SPDX-License-Identifier: GPL-2.0-or-later */
#
# Accelerated AES-GCM stitched implementation for ppc64le.
#
# Copyright 2024- IBM Inc.
#
#===================================================================================
# Written by Danny Tsen <dtsen@us.ibm.com>
#
# GHASH is based on the Karatsuba multiplication method.
#
# Xi xor X1
#
# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
# (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
# (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
# (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
# (X4.h * H.h + X4.l * H.l + X4 * H)
#
# Xi = v0
# H Poly = v2
# Hash keys = v3 - v14
# ( H.l, H, H.h)
# ( H^2.l, H^2, H^2.h)
# ( H^3.l, H^3, H^3.h)
# ( H^4.l, H^4, H^4.h)
#
# v30 is IV
# v31 - counter 1
#
# AES used,
# vs0 - round key 0
# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
#
# This implementation uses stitched AES-GCM approach to improve overall performance.
# AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
#
# ===================================================================================
#
#include <asm/ppc_asm.h>
#include <linux/linkage.h>
.machine "any"
.text
.macro SAVE_GPR GPR OFFSET FRAME
std \GPR,\OFFSET(\FRAME)
.endm
.macro SAVE_VRS VRS OFFSET FRAME
stxv \VRS+32, \OFFSET(\FRAME)
.endm
.macro RESTORE_GPR GPR OFFSET FRAME
ld \GPR,\OFFSET(\FRAME)
.endm
.macro RESTORE_VRS VRS OFFSET FRAME
lxv \VRS+32, \OFFSET(\FRAME)
.endm
.macro SAVE_REGS
mflr 0
std 0, 16(1)
stdu 1,-512(1)
SAVE_GPR 14, 112, 1
SAVE_GPR 15, 120, 1
SAVE_GPR 16, 128, 1
SAVE_GPR 17, 136, 1
SAVE_GPR 18, 144, 1
SAVE_GPR 19, 152, 1
SAVE_GPR 20, 160, 1
SAVE_GPR 21, 168, 1
SAVE_GPR 22, 176, 1
SAVE_GPR 23, 184, 1
SAVE_GPR 24, 192, 1
addi 9, 1, 256
SAVE_VRS 20, 0, 9
SAVE_VRS 21, 16, 9
SAVE_VRS 22, 32, 9
SAVE_VRS 23, 48, 9
SAVE_VRS 24, 64, 9
SAVE_VRS 25, 80, 9
SAVE_VRS 26, 96, 9
SAVE_VRS 27, 112, 9
SAVE_VRS 28, 128, 9
SAVE_VRS 29, 144, 9
SAVE_VRS 30, 160, 9
SAVE_VRS 31, 176, 9
.endm # SAVE_REGS
.macro RESTORE_REGS
addi 9, 1, 256
RESTORE_VRS 20, 0, 9
RESTORE_VRS 21, 16, 9
RESTORE_VRS 22, 32, 9
RESTORE_VRS 23, 48, 9
RESTORE_VRS 24, 64, 9
RESTORE_VRS 25, 80, 9
<