aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-06 14:01:20 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-06 14:01:20 -0800
commit509d3f45847627f4c5cdce004c3ec79262b5239c (patch)
tree3f5d650b393eeb0e560f78958bb20d6645ca55e0 /lib
parent09670b8c38b37bc2d6fc5d01fa7e02c38f7adf36 (diff)
parentaa514a297a0c175239f24a2e582ebd37f0727494 (diff)
Merge tag 'mm-nonmm-stable-2025-12-06-11-14' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton: - "panic: sys_info: Refactor and fix a potential issue" (Andy Shevchenko) fixes a build issue and does some cleanup in ib/sys_info.c - "Implement mul_u64_u64_div_u64_roundup()" (David Laight) enhances the 64-bit math code on behalf of a PWM driver and beefs up the test module for these library functions - "scripts/gdb/symbols: make BPF debug info available to GDB" (Ilya Leoshkevich) makes BPF symbol names, sizes, and line numbers available to the GDB debugger - "Enable hung_task and lockup cases to dump system info on demand" (Feng Tang) adds a sysctl which can be used to cause additional info dumping when the hung-task and lockup detectors fire - "lib/base64: add generic encoder/decoder, migrate users" (Kuan-Wei Chiu) adds a general base64 encoder/decoder to lib/ and migrates several users away from their private implementations - "rbree: inline rb_first() and rb_last()" (Eric Dumazet) makes TCP a little faster - "liveupdate: Rework KHO for in-kernel users" (Pasha Tatashin) reworks the KEXEC Handover interfaces in preparation for Live Update Orchestrator (LUO), and possibly for other future clients - "kho: simplify state machine and enable dynamic updates" (Pasha Tatashin) increases the flexibility of KEXEC Handover. Also preparation for LUO - "Live Update Orchestrator" (Pasha Tatashin) is a major new feature targeted at cloud environments. Quoting the cover letter: This series introduces the Live Update Orchestrator, a kernel subsystem designed to facilitate live kernel updates using a kexec-based reboot. This capability is critical for cloud environments, allowing hypervisors to be updated with minimal downtime for running virtual machines. LUO achieves this by preserving the state of selected resources, such as memory, devices and their dependencies, across the kernel transition. As a key feature, this series includes support for preserving memfd file descriptors, which allows critical in-memory data, such as guest RAM or any other large memory region, to be maintained in RAM across the kexec reboot. Mike Rappaport merits a mention here, for his extensive review and testing work. - "kexec: reorganize kexec and kdump sysfs" (Sourabh Jain) moves the kexec and kdump sysfs entries from /sys/kernel/ to /sys/kernel/kexec/ and adds back-compatibility symlinks which can hopefully be removed one day - "kho: fixes for vmalloc restoration" (Mike Rapoport) fixes a BUG which was being hit during KHO restoration of vmalloc() regions * tag 'mm-nonmm-stable-2025-12-06-11-14' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (139 commits) calibrate: update header inclusion Reinstate "resource: avoid unnecessary lookups in find_next_iomem_res()" vmcoreinfo: track and log recoverable hardware errors kho: fix restoring of contiguous ranges of order-0 pages kho: kho_restore_vmalloc: fix initialization of pages array MAINTAINERS: TPM DEVICE DRIVER: update the W-tag init: replace simple_strtoul with kstrtoul to improve lpj_setup KHO: fix boot failure due to kmemleak access to non-PRESENT pages Documentation/ABI: new kexec and kdump sysfs interface Documentation/ABI: mark old kexec sysfs deprecated kexec: move sysfs entries to /sys/kernel/kexec test_kho: always print restore status kho: free chunks using free_page() instead of kfree() selftests/liveupdate: add kexec test for multiple and empty sessions selftests/liveupdate: add simple kexec-based selftest for LUO selftests/liveupdate: add userspace API selftests docs: add documentation for memfd preservation via LUO mm: memfd_luo: allow preserving memfd liveupdate: luo_file: add private argument to store runtime state mm: shmem: export some functions to internal.h ...
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug63
-rw-r--r--lib/base64.c189
-rw-r--r--lib/dynamic_debug.c1
-rw-r--r--lib/math/div64.c185
-rw-r--r--lib/math/test_mul_u64_u64_div_u64.c191
-rw-r--r--lib/plist.c4
-rw-r--r--lib/ratelimit.c2
-rw-r--r--lib/rbtree.c29
-rw-r--r--lib/sys_info.c169
-rw-r--r--lib/test_kho.c140
-rw-r--r--lib/tests/Makefile1
-rw-r--r--lib/tests/base64_kunit.c294
-rw-r--r--lib/usercopy.c4
-rw-r--r--lib/xxhash.c29
-rw-r--r--lib/xz/xz_dec_bcj.c95
-rw-r--r--lib/xz/xz_private.h4
16 files changed, 927 insertions, 473 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c2654075377e..ba36939fda79 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -342,8 +342,7 @@ config DEBUG_INFO_COMPRESSED_ZLIB
depends on $(cc-option,-gz=zlib)
depends on $(ld-option,--compress-debug-sections=zlib)
help
- Compress the debug information using zlib. Requires GCC 5.0+ or Clang
- 5.0+, binutils 2.26+, and zlib.
+ Compress the debug information using zlib.
Users of dpkg-deb via debian/rules may find an increase in
size of their debug .deb packages with this config set, due to the
@@ -493,23 +492,23 @@ config DEBUG_SECTION_MISMATCH
bool "Enable full Section mismatch analysis"
depends on CC_IS_GCC
help
- The section mismatch analysis checks if there are illegal
- references from one section to another section.
- During linktime or runtime, some sections are dropped;
- any use of code/data previously in these sections would
- most likely result in an oops.
- In the code, functions and variables are annotated with
- __init,, etc. (see the full list in include/linux/init.h),
- which results in the code/data being placed in specific sections.
+ The section mismatch analysis checks if there are illegal references
+ from one section to another. During linktime or runtime, some
+ sections are dropped; any use of code/data previously in these
+ sections would most likely result in an oops.
+
+ In the code, functions and variables are annotated with __init,
+ __initdata, and so on (see the full list in include/linux/init.h).
+ This directs the toolchain to place code/data in specific sections.
+
The section mismatch analysis is always performed after a full
- kernel build, and enabling this option causes the following
- additional step to occur:
- - Add the option -fno-inline-functions-called-once to gcc commands.
- When inlining a function annotated with __init in a non-init
- function, we would lose the section information and thus
- the analysis would not catch the illegal reference.
- This option tells gcc to inline less (but it does result in
- a larger kernel).
+ kernel build, and enabling this option causes the option
+ -fno-inline-functions-called-once to be added to gcc commands.
+
+ However, when inlining a function annotated with __init in
+ a non-init function, we would lose the section information and thus
+ the analysis would not catch the illegal reference. This option
+ tells gcc to inline less (but it does result in a larger kernel).
config SECTION_MISMATCH_WARN_ONLY
bool "Make section mismatch errors non-fatal"
@@ -1260,12 +1259,13 @@ config DEFAULT_HUNG_TASK_TIMEOUT
Keeping the default should be fine in most cases.
config BOOTPARAM_HUNG_TASK_PANIC
- bool "Panic (Reboot) On Hung Tasks"
+ int "Number of hung tasks to trigger kernel panic"
depends on DETECT_HUNG_TASK
+ default 0
help
- Say Y here to enable the kernel to panic on "hung tasks",
- which are bugs that cause the kernel to leave a task stuck
- in uninterruptible "D" state.
+ When set to a non-zero value, a kernel panic will be triggered
+ if the number of hung tasks found during a single scan reaches
+ this value.
The panic can be used in combination with panic_timeout,
to cause the system to reboot automatically after a
@@ -2817,8 +2817,25 @@ config CMDLINE_KUNIT_TEST
If unsure, say N.
+config BASE64_KUNIT
+ tristate "KUnit test for base64 decoding and encoding" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This builds the base64 unit tests.
+
+ The tests cover the encoding and decoding logic of Base64 functions
+ in the kernel.
+ In addition to correctness checks, simple performance benchmarks
+ for both encoding and decoding are also included.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
config BITS_TEST
- tristate "KUnit test for bits.h" if !KUNIT_ALL_TESTS
+ tristate "KUnit test for bit functions and macros" if !KUNIT_ALL_TESTS
depends on KUNIT
default KUNIT_ALL_TESTS
help
diff --git a/lib/base64.c b/lib/base64.c
index b736a7a431c5..41961a444028 100644
--- a/lib/base64.c
+++ b/lib/base64.c
@@ -1,12 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * base64.c - RFC4648-compliant base64 encoding
+ * base64.c - Base64 with support for multiple variants
*
* Copyright (c) 2020 Hannes Reinecke, SUSE
*
* Based on the base64url routines from fs/crypto/fname.c
- * (which are using the URL-safe base64 encoding),
- * modified to use the standard coding table from RFC4648 section 4.
+ * (which are using the URL-safe Base64 encoding),
+ * modified to support multiple Base64 variants.
*/
#include <linux/kernel.h>
@@ -15,89 +15,170 @@
#include <linux/string.h>
#include <linux/base64.h>
-static const char base64_table[65] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char base64_tables[][65] = {
+ [BASE64_STD] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
+ [BASE64_URLSAFE] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
+ [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
+};
+/*
+ * Initialize the base64 reverse mapping for a single character
+ * This macro maps a character to its corresponding base64 value,
+ * returning -1 if the character is invalid.
+ * char 'A'-'Z' maps to 0-25, 'a'-'z' maps to 26-51, '0'-'9' maps to 52-61,
+ * ch_62 maps to 62, ch_63 maps to 63, and other characters return -1
+ */
+#define INIT_1(v, ch_62, ch_63) \
+ [v] = (v) >= 'A' && (v) <= 'Z' ? (v) - 'A' \
+ : (v) >= 'a' && (v) <= 'z' ? (v) - 'a' + 26 \
+ : (v) >= '0' && (v) <= '9' ? (v) - '0' + 52 \
+ : (v) == (ch_62) ? 62 : (v) == (ch_63) ? 63 : -1
+
+/*
+ * Recursive macros to generate multiple Base64 reverse mapping table entries.
+ * Each macro generates a sequence of entries in the lookup table:
+ * INIT_2 generates 2 entries, INIT_4 generates 4, INIT_8 generates 8, and so on up to INIT_32.
+ */
+#define INIT_2(v, ...) INIT_1(v, __VA_ARGS__), INIT_1((v) + 1, __VA_ARGS__)
+#define INIT_4(v, ...) INIT_2(v, __VA_ARGS__), INIT_2((v) + 2, __VA_ARGS__)
+#define INIT_8(v, ...) INIT_4(v, __VA_ARGS__), INIT_4((v) + 4, __VA_ARGS__)
+#define INIT_16(v, ...) INIT_8(v, __VA_ARGS__), INIT_8((v) + 8, __VA_ARGS__)
+#define INIT_32(v, ...) INIT_16(v, __VA_ARGS__), INIT_16((v) + 16, __VA_ARGS__)
+
+#define BASE64_REV_INIT(ch_62, ch_63) { \
+ [0 ... 0x1f] = -1, \
+ INIT_32(0x20, ch_62, ch_63), \
+ INIT_32(0x40, ch_62, ch_63), \
+ INIT_32(0x60, ch_62, ch_63), \
+ [0x80 ... 0xff] = -1 }
+
+static const s8 base64_rev_maps[][256] = {
+ [BASE64_STD] = BASE64_REV_INIT('+', '/'),
+ [BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'),
+ [BASE64_IMAP] = BASE64_REV_INIT('+', ',')
+};
+
+#undef BASE64_REV_INIT
+#undef INIT_32
+#undef INIT_16
+#undef INIT_8
+#undef INIT_4
+#undef INIT_2
+#undef INIT_1
/**
- * base64_encode() - base64-encode some binary data
+ * base64_encode() - Base64-encode some binary data
* @src: the binary data to encode
* @srclen: the length of @src in bytes
- * @dst: (output) the base64-encoded string. Not NUL-terminated.
+ * @dst: (output) the Base64-encoded string. Not NUL-terminated.
+ * @padding: whether to append '=' padding characters
+ * @variant: which base64 variant to use
*
- * Encodes data using base64 encoding, i.e. the "Base 64 Encoding" specified
- * by RFC 4648, including the '='-padding.
+ * Encodes data using the selected Base64 variant.
*
- * Return: the length of the resulting base64-encoded string in bytes.
+ * Return: the length of the resulting Base64-encoded string in bytes.
*/
-int base64_encode(const u8 *src, int srclen, char *dst)
+int base64_encode(const u8 *src, int srclen, char *dst, bool padding, enum base64_variant variant)
{
u32 ac = 0;
- int bits = 0;
- int i;
char *cp = dst;
+ const char *base64_table = base64_tables[variant];
- for (i = 0; i < srclen; i++) {
- ac = (ac << 8) | src[i];
- bits += 8;
- do {
- bits -= 6;
- *cp++ = base64_table[(ac >> bits) & 0x3f];
- } while (bits >= 6);
- }
- if (bits) {
- *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
- bits -= 6;
+ while (srclen >= 3) {
+ ac = src[0] << 16 | src[1] << 8 | src[2];
+ *cp++ = base64_table[ac >> 18];
+ *cp++ = base64_table[(ac >> 12) & 0x3f];
+ *cp++ = base64_table[(ac >> 6) & 0x3f];
+ *cp++ = base64_table[ac & 0x3f];
+
+ src += 3;
+ srclen -= 3;
}
- while (bits < 0) {
- *cp++ = '=';
- bits += 2;
+
+ switch (srclen) {
+ case 2:
+ ac = src[0] << 16 | src[1] << 8;
+ *cp++ = base64_table[ac >> 18];
+ *cp++ = base64_table[(ac >> 12) & 0x3f];
+ *cp++ = base64_table[(ac >> 6) & 0x3f];
+ if (padding)
+ *cp++ = '=';
+ break;
+ case 1:
+ ac = src[0] << 16;
+ *cp++ = base64_table[ac >> 18];
+ *cp++ = base64_table[(ac >> 12) & 0x3f];
+ if (padding) {
+ *cp++ = '=';
+ *cp++ = '=';
+ }
+ break;
}
return cp - dst;
}
EXPORT_SYMBOL_GPL(base64_encode);
/**
- * base64_decode() - base64-decode a string
+ * base64_decode() - Base64-decode a string
* @src: the string to decode. Doesn't need to be NUL-terminated.
* @srclen: the length of @src in bytes
* @dst: (output) the decoded binary data
+ * @padding: whether to append '=' padding characters
+ * @variant: which base64 variant to use
*
- * Decodes a string using base64 encoding, i.e. the "Base 64 Encoding"
- * specified by RFC 4648, including the '='-padding.
- *
- * This implementation hasn't been optimized for performance.
+ * Decodes a string using the selected Base64 variant.
*
* Return: the length of the resulting decoded binary data in bytes,
- * or -1 if the string isn't a valid base64 string.
+ * or -1 if the string isn't a valid Base64 string.
*/
-int base64_decode(const char *src, int srclen, u8 *dst)
+int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
{
- u32 ac = 0;
- int bits = 0;
- int i;
u8 *bp = dst;
+ s8 input[4];
+ s32 val;
+ const u8 *s = (const u8 *)src;
+ const s8 *base64_rev_tables = base64_rev_maps[variant];
- for (i = 0; i < srclen; i++) {
- const char *p = strchr(base64_table, src[i]);
+ while (srclen >= 4) {
+ input[0] = base64_rev_tables[s[0]];
+ input[1] = base64_rev_tables[s[1]];
+ input[2] = base64_rev_tables[s[2]];
+ input[3] = base64_rev_tables[s[3]];
- if (src[i] == '=') {
- ac = (ac << 6);
- bits += 6;
- if (bits >= 8)
- bits -= 8;
- continue;
- }
- if (p == NULL || src[i] == 0)
- return -1;
- ac = (ac << 6) | (p - base64_table);
- bits += 6;
- if (bits >= 8) {
- bits -= 8;
- *bp++ = (u8)(ac >> bits);
+ val = input[0] << 18 | input[1] << 12 | input[2] << 6 | input[3];
+
+ if (unlikely(val < 0)) {
+ if (!padding || srclen != 4 || s[3] != '=')
+ return -1;
+ padding = 0;
+ srclen = s[2] == '=' ? 2 : 3;
+ break;
}
+
+ *bp++ = val >> 16;
+ *bp++ = val >> 8;
+ *bp++ = val;
+
+ s += 4;
+ srclen -= 4;
}
- if (ac & ((1 << bits) - 1))
+
+ if (likely(!srclen))
+ return bp - dst;
+ if (padding || srclen == 1)
return -1;
+
+ val = (base64_rev_tables[s[0]] << 12) | (base64_rev_tables[s[1]] << 6);
+ *bp++ = val >> 10;
+
+ if (srclen == 2) {
+ if (val & 0x800003ff)
+ return -1;
+ } else {
+ val |= base64_rev_tables[s[2]];
+ if (val & 0x80000003)
+ return -1;
+ *bp++ = val >> 2;
+ }
return bp - dst;
}
EXPORT_SYMBOL_GPL(base64_decode);
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 5a007952f7f2..7d7892e57a01 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -95,6 +95,7 @@ static const struct { unsigned flag:8; char opt_char; } opt_array[] = {
{ _DPRINTK_FLAGS_INCL_SOURCENAME, 's' },
{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
{ _DPRINTK_FLAGS_INCL_TID, 't' },
+ { _DPRINTK_FLAGS_INCL_STACK, 'd' },
{ _DPRINTK_FLAGS_NONE, '_' },
};
diff --git a/lib/math/div64.c b/lib/math/div64.c
index bf77b9843175..d1e92ea24fce 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -177,94 +177,157 @@ EXPORT_SYMBOL(div64_s64);
* Iterative div/mod for use when dividend is not expected to be much
* bigger than divisor.
*/
+#ifndef iter_div_u64_rem
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
{
return __iter_div_u64_rem(dividend, divisor, remainder);
}
EXPORT_SYMBOL(iter_div_u64_rem);
+#endif
-#ifndef mul_u64_u64_div_u64
-u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
-{
- if (ilog2(a) + ilog2(b) <= 62)
- return div64_u64(a * b, c);
+#if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)
-#if defined(__SIZEOF_INT128__)
+#define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)
+#if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
+static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
+{
/* native 64x64=128 bits multiplication */
- u128 prod = (u128)a * b;
- u64 n_lo = prod, n_hi = prod >> 64;
+ u128 prod = (u128)a * b + c;
+ *p_lo = prod;
+ return prod >> 64;
+}
#else
-
- /* perform a 64x64=128 bits multiplication manually */
- u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
+static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
+{
+ /* perform a 64x64=128 bits multiplication in 32bit chunks */
u64 x, y, z;
- x = (u64)a_lo * b_lo;
- y = (u64)a_lo * b_hi + (u32)(x >> 32);
- z = (u64)a_hi * b_hi + (u32)(y >> 32);
- y = (u64)a_hi * b_lo + (u32)y;
- z += (u32)(y >> 32);
- x = (y << 32) + (u32)x;
-
- u64 n_lo = x, n_hi = z;
+ /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
+ x = mul_add(a, b, c);
+ y = mul_add(a, b >> 32, c >> 32);
+ y = add_u64_u32(y, x >> 32);
+ z = mul_add(a >> 32, b >> 32, y >> 32);
+ y = mul_add(a >> 32, b, y);
+ *p_lo = (y << 32) + (u32)x;
+ return add_u64_u32(z, y >> 32);
+}
+#endif
+#ifndef BITS_PER_ITER
+#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
#endif
- /* make sure c is not zero, trigger runtime exception otherwise */
- if (unlikely(c == 0)) {
- unsigned long zero = 0;
+#if BITS_PER_ITER == 32
+#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
+#define add_u64_long(a, b) ((a) + (b))
+#else
+#undef BITS_PER_ITER
+#define BITS_PER_ITER 16
+static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
+{
+ u64 n_lo = mul_add(a, b, c);
+ u64 n_med = mul_add(a >> 32, b, c >> 32);
- OPTIMIZER_HIDE_VAR(zero);
- return ~0UL/zero;
- }
+ n_med = add_u64_u32(n_med, n_lo >> 32);
+ *p_lo = n_med << 32 | (u32)n_lo;
+ return n_med >> 32;
+}
+
+#define add_u64_long(a, b) add_u64_u32(a, b)
+#endif
+
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
+{
+ unsigned long d_msig, q_digit;
+ unsigned int reps, d_z_hi;
+ u64 quotient, n_lo, n_hi;
+ u32 overflow;
- int shift = __builtin_ctzll(c);
+ n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);
- /* try reducing the fraction in case the dividend becomes <= 64 bits */
- if ((n_hi >> shift) == 0) {
- u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo;
+ if (!n_hi)
+ return div64_u64(n_lo, d);
- return div64_u64(n, c >> shift);
- /*
- * The remainder value if needed would be:
- * res = div64_u64_rem(n, c >> shift, &rem);
- * rem = (rem << shift) + (n_lo - (n << shift));
- */
- }
+ if (unlikely(n_hi >= d)) {
+ /* trigger runtime exception if divisor is zero */
+ if (d == 0) {
+ unsigned long zero = 0;
- if (n_hi >= c) {
+ OPTIMIZER_HIDE_VAR(zero);
+ return ~0UL/zero;
+ }
/* overflow: result is unrepresentable in a u64 */
- return -1;
+ return ~0ULL;
}
- /* Do the full 128 by 64 bits division */
-
- shift = __builtin_clzll(c);
- c <<= shift;
+ /* Left align the divisor, shifting the dividend to match */
+ d_z_hi = __builtin_clzll(d);
+ if (d_z_hi) {
+ d <<= d_z_hi;
+ n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
+ n_lo <<= d_z_hi;
+ }
- int p = 64 + shift;
- u64 res = 0;
- bool carry;
+ reps = 64 / BITS_PER_ITER;
+ /* Optimise loop count for small dividends */
+ if (!(u32)(n_hi >> 32)) {
+ reps -= 32 / BITS_PER_ITER;
+ n_hi = n_hi << 32 | n_lo >> 32;
+ n_lo <<= 32;
+ }
+#if BITS_PER_ITER == 16
+ if (!(u32)(n_hi >> 48)) {
+ reps--;
+ n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
+ n_lo <<= 16;
+ }
+#endif
- do {
- carry = n_hi >> 63;
- shift = carry ? 1 : __builtin_clzll(n_hi);
- if (p < shift)
- break;
- p -= shift;
- n_hi <<= shift;
- n_hi |= n_lo >> (64 - shift);
- n_lo <<= shift;
- if (carry || (n_hi >= c)) {
- n_hi -= c;
- res |= 1ULL << p;
+ /* Invert the dividend so we can use add instead of subtract. */
+ n_lo = ~n_lo;
+ n_hi = ~n_hi;
+
+ /*
+ * Get the most significant BITS_PER_ITER bits of the divisor.
+ * This is used to get a low 'guestimate' of the quotient digit.
+ */
+ d_msig = (d >> (64 - BITS_PER_ITER)) + 1;
+
+ /*
+ * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
+ * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
+ * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
+ */
+ quotient = 0;
+ while (reps--) {
+ q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
+ /* Shift 'n' left to align with the product q_digit * d */
+ overflow = n_hi >> (64 - BITS_PER_ITER);
+ n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
+ n_lo <<= BITS_PER_ITER;
+ /* Add product to negated divisor */
+ overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
+ /* Adjust for the q_digit 'guestimate' being low */
+ while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
+ q_digit++;
+ n_hi += d;
+ overflow += n_hi < d;
}
- } while (n_hi);
- /* The remainder value if needed would be n_hi << p */
+ quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
+ }
- return res;
+ /*
+ * The above only ensures the remainder doesn't overflow,
+ * it can still be possible to add (aka subtract) another copy
+ * of the divisor.
+ */
+ if ((n_hi + d) > n_hi)
+ quotient++;
+ return quotient;
}
-EXPORT_SYMBOL(mul_u64_u64_div_u64);
+#if !defined(test_mul_u64_add_u64_div_u64)
+EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
+#endif
#endif
diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c
index 58d058de4e73..338d014f0c73 100644
--- a/lib/math/test_mul_u64_u64_div_u64.c
+++ b/lib/math/test_mul_u64_u64_div_u64.c
@@ -10,80 +10,141 @@
#include <linux/printk.h>
#include <linux/math64.h>
-typedef struct { u64 a; u64 b; u64 c; u64 result; } test_params;
+typedef struct { u64 a; u64 b; u64 d; u64 result; uint round_up;} test_params;
static test_params test_values[] = {
/* this contains many edge values followed by a couple random values */
-{ 0xb, 0x7, 0x3, 0x19 },
-{ 0xffff0000, 0xffff0000, 0xf, 0x1110eeef00000000 },
-{ 0xffffffff, 0xffffffff, 0x1, 0xfffffffe00000001 },
-{ 0xffffffff, 0xffffffff, 0x2, 0x7fffffff00000000 },
-{ 0x1ffffffff, 0xffffffff, 0x2, 0xfffffffe80000000 },
-{ 0x1ffffffff, 0xffffffff, 0x3, 0xaaaaaaa9aaaaaaab },
-{ 0x1ffffffff, 0x1ffffffff, 0x4, 0xffffffff00000000 },
-{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff },
-{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851 },
-{ 0x7fffffffffffffff, 0x2, 0x3, 0x5555555555555554 },
-{ 0xffffffffffffffff, 0x2, 0x8000000000000000, 0x3 },
-{ 0xffffffffffffffff, 0x2, 0xc000000000000000, 0x2 },
-{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007 },
-{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001 },
-{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001 },
-{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000 },
-{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001 },
-{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002 },
-{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8 },
-{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca },
-{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b },
-{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9 },
-{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe },
-{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18 },
-{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35 },
-{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f },
-{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961 },
-{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216 },
+{ 0xb, 0x7, 0x3, 0x19, 1 },
+{ 0xffff0000, 0xffff0000, 0xf, 0x1110eeef00000000, 0 },
+{ 0xffffffff, 0xffffffff, 0x1, 0xfffffffe00000001, 0 },
+{ 0xffffffff, 0xffffffff, 0x2, 0x7fffffff00000000, 1 },
+{ 0x1ffffffff, 0xffffffff, 0x2, 0xfffffffe80000000, 1 },
+{ 0x1ffffffff, 0xffffffff, 0x3, 0xaaaaaaa9aaaaaaab, 0 },
+{ 0x1ffffffff, 0x1ffffffff, 0x4, 0xffffffff00000000, 1 },
+{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff, 1 },
+{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851, 1 },
+{ 0x7fffffffffffffff, 0x2, 0x3, 0x5555555555555554, 1 },
+{ 0xffffffffffffffff, 0x2, 0x8000000000000000, 0x3, 1 },
+{ 0xffffffffffffffff, 0x2, 0xc000000000000000, 0x2, 1 },
+{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007, 1 },
+{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001, 1 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001, 0 },
+{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000, 1 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001, 1 },
+{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002, 1 },
+{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8, 1 },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca, 1 },
+{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b, 1 },
+{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9, 1 },
+{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe, 0 },
+{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18, 1 },
+{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35, 1 },
+{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f, 1 },
+{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961, 1 },
+{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216, 1 },
};
/*
* The above table can be verified with the following shell script:
- *
- * #!/bin/sh
- * sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4/p' \
- * lib/math/test_mul_u64_u64_div_u64.c |
- * while read a b c r; do
- * expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $c | bc )
- * given=$( printf "%X\n" $r )
- * if [ "$expected" = "$given" ]; then
- * echo "$a * $b / $c = $r OK"
- * else
- * echo "$a * $b / $c = $r is wrong" >&2
- * echo "should be equivalent to 0x$expected" >&2
- * exit 1
- * fi
- * done
+
+#!/bin/sh
+sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4 \5/p' \
+ lib/math/test_mul_u64_u64_div_u64.c |
+while read a b d r e; do
+ expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $d | bc )
+ given=$( printf "%X\n" $r )
+ if [ "$expected" = "$given" ]; then
+ echo "$a * $b / $d = $r OK"
+ else
+ echo "$a * $b / $d = $r is wrong" >&2
+ echo "should be equivalent to 0x$expected" >&2
+ exit 1
+ fi
+ expected=$( printf "obase=16; ibase=16; (%X * %X + %X) / %X\n" $a $b $((d-1)) $d | bc )
+ given=$( printf "%X\n" $((r + e)) )
+ if [ "$expected" = "$given" ]; then
+ echo "$a * $b +/ $d = $(printf '%#x' $((r + e))) OK"
+ else
+ echo "$a * $b +/ $d = $(printf '%#x' $((r + e))) is wrong" >&2
+ echo "should be equivalent to 0x$expected" >&2
+ exit 1
+ fi
+done
+
*/
-static int __init test_init(void)
+static u64 test_mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d);
+#if __LONG_WIDTH__ >= 64
+#define TEST_32BIT_DIV
+static u64 test_mul_u64_add_u64_div_u64_32bit(u64 a, u64 b, u64 c, u64 d);
+#endif
+
+static int __init test_run(unsigned int fn_no, const char *fn_name)
{
+ u64 start_time;
+ int errors = 0;
+ int tests = 0;
int i;
- pr_info("Starting mul_u64_u64_div_u64() test\n");
+ start_time = ktime_get_ns();
for (i = 0; i < ARRAY_SIZE(test_values); i++) {
u64 a = test_values[i].a;
u64 b = test_values[i].b;
- u64 c = test_values[i].c;
+ u64 d = test_values[i].d;
u64 expected_result = test_values[i].result;
- u64 result = mul_u64_u64_div_u64(a, b, c);
+ u64 result, result_up;
+
+ switch (fn_no) {
+ default:
+ result = mul_u64_u64_div_u64(a, b, d);
+ result_up = mul_u64_u64_div_u64_roundup(a, b, d);
+ break;
+ case 1:
+ result = test_mul_u64_add_u64_div_u64(a, b, 0, d);
+ result_up = test_mul_u64_add_u64_div_u64(a, b, d - 1, d);
+ break;
+#ifdef TEST_32BIT_DIV
+ case 2:
+ result = test_mul_u64_add_u64_div_u64_32bit(a, b, 0, d);
+ result_up = test_mul_u64_add_u64_div_u64_32bit(a, b, d - 1, d);
+ break;
+#endif
+ }
+
+ tests += 2;
if (result != expected_result) {
- pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, c);
+ pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, d);
pr_err("ERROR: expected result: %016llx\n", expected_result);
pr_err("ERROR: obtained result: %016llx\n", result);
+ errors++;
+ }
+ expected_result += test_values[i].round_up;
+ if (result_up != expected_result) {
+ pr_err("ERROR: 0x%016llx * 0x%016llx +/ 0x%016llx\n", a, b, d);
+ pr_err("ERROR: expected result: %016llx\n", expected_result);
+ pr_err("ERROR: obtained result: %016llx\n", result_up);
+ errors++;
}
}
- pr_info("Completed mul_u64_u64_div_u64() test\n");
+ pr_info("Completed %s() test, %d tests, %d errors, %llu ns\n",
+ fn_name, tests, errors, ktime_get_ns() - start_time);
+ return errors;
+}
+
+static int __init test_init(void)
+{
+ pr_info("Starting mul_u64_u64_div_u64() test\n");
+ if (test_run(0, "mul_u64_u64_div_u64"))
+ return -EINVAL;
+ if (test_run(1, "test_mul_u64_u64_div_u64"))
+ return -EINVAL;
+#ifdef TEST_32BIT_DIV
+ if (test_run(2, "test_mul_u64_u64_div_u64_32bit"))
+ return -EINVAL;
+#endif
return 0;
}
@@ -91,6 +152,36 @@ static void __exit test_exit(void)
{
}
+/* Compile the generic mul_u64_add_u64_div_u64() code */
+#undef __div64_32
+#define __div64_32 __div64_32
+#define div_s64_rem div_s64_rem
+#define div64_u64_rem div64_u64_rem
+#define div64_u64 div64_u64
+#define div64_s64 div64_s64
+#define iter_div_u64_rem iter_div_u64_rem
+
+#undef mul_u64_add_u64_div_u64
+#define mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64
+#define test_mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64
+
+#include "div64.c"
+
+#ifdef TEST_32BIT_DIV
+/* Recompile the generic code for 32bit long */
+#undef test_mul_u64_add_u64_div_u64
+#define test_mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64_32bit
+#undef BITS_PER_ITER
+#define BITS_PER_ITER 16
+
+#define mul_u64_u64_add_u64 mul_u64_u64_add_u64_32bit
+#undef mul_u64_long_add_u64
+#undef add_u64_long
+#undef mul_add
+
+#include "div64.c"
+#endif
+
module_init(test_init);
module_exit(test_exit);
diff --git a/lib/plist.c b/lib/plist.c
index 330febb4bd7d..ba677c31e8f3 100644
--- a/lib/plist.c
+++ b/