aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 14:25:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 14:25:26 -0700
commitb96a3e9142fdf346b05b20e867b4f0dfca119e96 (patch)
treeb338a8f8930abc24888fc3871c6627f6ad46e23b /tools
parent651a00bc56403161351090a9d7ddbd7095975324 (diff)
parent52ae298e3e5c9be5bb95e1c6d9199e5210f2a156 (diff)
Merge tag 'mm-stable-2023-08-28-18-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Some swap cleanups from Ma Wupeng ("fix WARN_ON in add_to_avail_list") - Peter Xu has a series (mm/gup: Unify hugetlb, speed up thp") which reduces the special-case code for handling hugetlb pages in GUP. It also speeds up GUP handling of transparent hugepages. - Peng Zhang provides some maple tree speedups ("Optimize the fast path of mas_store()"). - Sergey Senozhatsky has improved te performance of zsmalloc during compaction (zsmalloc: small compaction improvements"). - Domenico Cerasuolo has developed additional selftest code for zswap ("selftests: cgroup: add zswap test program"). - xu xin has doe some work on KSM's handling of zero pages. These changes are mainly to enable the user to better understand the effectiveness of KSM's treatment of zero pages ("ksm: support tracking KSM-placed zero-pages"). - Jeff Xu has fixes the behaviour of memfd's MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED sysctl ("mm/memfd: fix sysctl MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED"). - David Howells has fixed an fscache optimization ("mm, netfs, fscache: Stop read optimisation when folio removed from pagecache"). - Axel Rasmussen has given userfaultfd the ability to simulate memory poisoning ("add UFFDIO_POISON to simulate memory poisoning with UFFD"). - Miaohe Lin has contributed some routine maintenance work on the memory-failure code ("mm: memory-failure: remove unneeded PageHuge() check"). - Peng Zhang has contributed some maintenance work on the maple tree code ("Improve the validation for maple tree and some cleanup"). - Hugh Dickins has optimized the collapsing of shmem or file pages into THPs ("mm: free retracted page table by RCU"). - Jiaqi Yan has a patch series which permits us to use the healthy subpages within a hardware poisoned huge page for general purposes ("Improve hugetlbfs read on HWPOISON hugepages"). - Kemeng Shi has done some maintenance work on the pagetable-check code ("Remove unused parameters in page_table_check"). - More folioification work from Matthew Wilcox ("More filesystem folio conversions for 6.6"), ("Followup folio conversions for zswap"). And from ZhangPeng ("Convert several functions in page_io.c to use a folio"). - page_ext cleanups from Kemeng Shi ("minor cleanups for page_ext"). - Baoquan He has converted some architectures to use the GENERIC_IOREMAP ioremap()/iounmap() code ("mm: ioremap: Convert architectures to take GENERIC_IOREMAP way"). - Anshuman Khandual has optimized arm64 tlb shootdown ("arm64: support batched/deferred tlb shootdown during page reclamation/migration"). - Better maple tree lockdep checking from Liam Howlett ("More strict maple tree lockdep"). Liam also developed some efficiency improvements ("Reduce preallocations for maple tree"). - Cleanup and optimization to the secondary IOMMU TLB invalidation, from Alistair Popple ("Invalidate secondary IOMMU TLB on permission upgrade"). - Ryan Roberts fixes some arm64 MM selftest issues ("selftests/mm fixes for arm64"). - Kemeng Shi provides some maintenance work on the compaction code ("Two minor cleanups for compaction"). - Some reduction in mmap_lock pressure from Matthew Wilcox ("Handle most file-backed faults under the VMA lock"). - Aneesh Kumar contributes code to use the vmemmap optimization for DAX on ppc64, under some circumstances ("Add support for DAX vmemmap optimization for ppc64"). - page-ext cleanups from Kemeng Shi ("add page_ext_data to get client data in page_ext"), ("minor cleanups to page_ext header"). - Some zswap cleanups from Johannes Weiner ("mm: zswap: three cleanups"). - kmsan cleanups from ZhangPeng ("minor cleanups for kmsan"). - VMA handling cleanups from Kefeng Wang ("mm: convert to vma_is_initial_heap/stack()"). - DAMON feature work from SeongJae Park ("mm/damon/sysfs-schemes: implement DAMOS tried total bytes file"), ("Extend DAMOS filters for address ranges and DAMON monitoring targets"). - Compaction work from Kemeng Shi ("Fixes and cleanups to compaction"). - Liam Howlett has improved the maple tree node replacement code ("maple_tree: Change replacement strategy"). - ZhangPeng has a general code cleanup - use the K() macro more widely ("cleanup with helper macro K()"). - Aneesh Kumar brings memmap-on-memory to ppc64 ("Add support for memmap on memory feature on ppc64"). - pagealloc cleanups from Kemeng Shi ("Two minor cleanups for pcp list in page_alloc"), ("Two minor cleanups for get pageblock migratetype"). - Vishal Moola introduces a memory descriptor for page table tracking, "struct ptdesc" ("Split ptdesc from struct page"). - memfd selftest maintenance work from Aleksa Sarai ("memfd: cleanups for vm.memfd_noexec"). - MM include file rationalization from Hugh Dickins ("arch: include asm/cacheflush.h in asm/hugetlb.h"). - THP debug output fixes from Hugh Dickins ("mm,thp: fix sloppy text output"). - kmemleak improvements from Xiaolei Wang ("mm/kmemleak: use object_cache instead of kmemleak_initialized"). - More folio-related cleanups from Matthew Wilcox ("Remove _folio_dtor and _folio_order"). - A VMA locking scalability improvement from Suren Baghdasaryan ("Per-VMA lock support for swap and userfaults"). - pagetable handling cleanups from Matthew Wilcox ("New page table range API"). - A batch of swap/thp cleanups from David Hildenbrand ("mm/swap: stop using page->private on tail pages for THP_SWAP + cleanups"). - Cleanups and speedups to the hugetlb fault handling from Matthew Wilcox ("Change calling convention for ->huge_fault"). - Matthew Wilcox has also done some maintenance work on the MM subsystem documentation ("Improve mm documentation"). * tag 'mm-stable-2023-08-28-18-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (489 commits) maple_tree: shrink struct maple_tree maple_tree: clean up mas_wr_append() secretmem: convert page_is_secretmem() to folio_is_secretmem() nios2: fix flush_dcache_page() for usage from irq context hugetlb: add documentation for vma_kernel_pagesize() mm: add orphaned kernel-doc to the rst files. mm: fix clean_record_shared_mapping_range kernel-doc mm: fix get_mctgt_type() kernel-doc mm: fix kernel-doc warning from tlb_flush_rmaps() mm: remove enum page_entry_size mm: allow ->huge_fault() to be called without the mmap_lock held mm: move PMD_ORDER to pgtable.h mm: remove checks for pte_index memcg: remove duplication detection for mem_cgroup_uncharge_swap mm/huge_memory: work on folio->swap instead of page->private when splitting folio mm/swap: inline folio_set_swap_entry() and folio_swap_entry() mm/swap: use dedicated entry for swap in folio mm/swap: stop using page->private on tail pages for THP_SWAP selftests/mm: fix WARNING comparing pointer to 0 selftests: cgroup: fix test_kmem_memcg_deletion kernel mem check ...
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/radix-tree/maple.c134
-rw-r--r--tools/testing/selftests/bpf/progs/get_branch_snapshot.c4
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c21
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c286
-rw-r--r--tools/testing/selftests/damon/sysfs.sh6
-rw-r--r--tools/testing/selftests/kselftest.h9
-rw-r--r--tools/testing/selftests/kselftest/runner.sh7
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c329
-rw-r--r--tools/testing/selftests/mm/.gitignore1
-rw-r--r--tools/testing/selftests/mm/Makefile81
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c322
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c200
-rw-r--r--tools/testing/selftests/mm/madv_populate.c26
-rw-r--r--tools/testing/selftests/mm/map_populate.c2
-rw-r--r--tools/testing/selftests/mm/migration.c12
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c1
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh80
-rw-r--r--tools/testing/selftests/mm/settings2
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c4
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c12
-rw-r--r--tools/testing/selftests/mm/uffd-common.c5
-rw-r--r--tools/testing/selftests/mm/uffd-common.h3
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c32
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c117
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c2
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c4
28 files changed, 1513 insertions, 192 deletions
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 75ea2081a317..e5da1cad70ba 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -45,6 +45,13 @@ struct rcu_test_struct2 {
unsigned long last[RCU_RANGE_COUNT];
};
+struct rcu_test_struct3 {
+ struct maple_tree *mt;
+ unsigned long index;
+ unsigned long last;
+ bool stop;
+};
+
struct rcu_reader_struct {
unsigned int id;
int mod;
@@ -34954,6 +34961,70 @@ void run_check_rcu(struct maple_tree *mt, struct rcu_test_struct *vals)
MT_BUG_ON(mt, !vals->seen_entry2);
}
+static void *rcu_slot_store_reader(void *ptr)
+{
+ struct rcu_test_struct3 *test = ptr;
+ MA_STATE(mas, test->mt, test->index, test->index);
+
+ rcu_register_thread();
+
+ rcu_read_lock();
+ while (!test->stop) {
+ mas_walk(&mas);
+ /* The length of growth to both sides must be equal. */
+ RCU_MT_BUG_ON(test, (test->index - mas.index) !=
+ (mas.last - test->last));
+ }
+ rcu_read_unlock();
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static noinline void run_check_rcu_slot_store(struct maple_tree *mt)
+{
+ pthread_t readers[20];
+ int range_cnt = 200, i, limit = 10000;
+ unsigned long len = ULONG_MAX / range_cnt, start, end;
+ struct rcu_test_struct3 test = {.stop = false, .mt = mt};
+
+ start = range_cnt / 2 * len;
+ end = start + len - 1;
+ test.index = start;
+ test.last = end;
+
+ for (i = 0; i < range_cnt; i++) {
+ mtree_store_range(mt, i * len, i * len + len - 1,
+ xa_mk_value(i * 100), GFP_KERNEL);
+ }
+
+ mt_set_in_rcu(mt);
+ MT_BUG_ON(mt, !mt_in_rcu(mt));
+
+ for (i = 0; i < ARRAY_SIZE(readers); i++) {
+ if (pthread_create(&readers[i], NULL, rcu_slot_store_reader,
+ &test)) {
+ perror("creating reader thread");
+ exit(1);
+ }
+ }
+
+ usleep(5);
+
+ while (limit--) {
+ /* Step by step, expand the most middle range to both sides. */
+ mtree_store_range(mt, --start, ++end, xa_mk_value(100),
+ GFP_KERNEL);
+ }
+
+ test.stop = true;
+
+ while (i--)
+ pthread_join(readers[i], NULL);
+
+ mt_validate(mt);
+}
+
static noinline
void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
{
@@ -35206,6 +35277,10 @@ static noinline void __init check_rcu_threaded(struct maple_tree *mt)
run_check_rcu(mt, &vals);
mtree_destroy(mt);
+ /* Check expanding range in RCU mode */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ run_check_rcu_slot_store(mt);
+ mtree_destroy(mt);
/* Forward writer for rcu stress */
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
@@ -35383,7 +35458,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
for (i = 0; i <= max; i++)
mtree_test_store_range(mt, i * 10, i * 10 + 5, &i);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Spanning store */
+ mas_set_range(&mas, 470, 500);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated == 0);
@@ -35392,105 +35469,108 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mas_push_node(&mas, mn);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Slot store does not need allocations */
+ mas_set_range(&mas, 6, 9);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
- height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 0);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+
+ mas_set_range(&mas, 6, 10);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1);
mas_store_prealloc(&mas, ptr);
+ MT_BUG_ON(mt, mas_allocated(&mas) != 0);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Split */
+ mas_set_range(&mas, 54, 54);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
- MT_BUG_ON(mt, allocated == 0);
- MT_BUG_ON(mt, allocated != 1 + height * 3);
+ MT_BUG_ON(mt, allocated != 1 + height * 2);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mt_set_non_kernel(1);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0);
+ /* Spanning store */
+ mas_set_range(&mas, 1, 100);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated != 0);
mas_destroy(&mas);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
+ /* Spanning store */
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated == 0);
MT_BUG_ON(mt, allocated != 1 + height * 3);
mas_store_prealloc(&mas, ptr);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
+ mas_set_range(&mas, 0, 200);
mt_set_non_kernel(1);
- MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0);
+ MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0);
allocated = mas_allocated(&mas);
height = mas_mt_height(&mas);
MT_BUG_ON(mt, allocated != 0);
diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
index a1b139888048..511ac634eef0 100644
--- a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
+++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
@@ -15,7 +15,7 @@ long total_entries = 0;
#define ENTRY_CNT 32
struct perf_branch_entry entries[ENTRY_CNT] = {};
-static inline bool in_range(__u64 val)
+static inline bool gbs_in_range(__u64 val)
{
return (val >= address_low) && (val < address_high);
}
@@ -31,7 +31,7 @@ int BPF_PROG(test1, int n, int ret)
for (i = 0; i < ENTRY_CNT; i++) {
if (i >= total_entries)
break;
- if (in_range(entries[i].from) && in_range(entries[i].to))
+ if (gbs_in_range(entries[i].from) && gbs_in_range(entries[i].to))
test1_hits++;
else if (!test1_hits)
wasted_entries++;
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index c4a57e69f749..4d556df4f77b 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -5,4 +5,5 @@ test_freezer
test_kmem
test_kill
test_cpu
+test_zswap
wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 3d263747d2ad..27dbdd7bb4bb 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
TEST_GEN_PROGS += test_kill
TEST_GEN_PROGS += test_cpu
+TEST_GEN_PROGS += test_zswap
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
@@ -23,3 +24,4 @@ $(OUTPUT)/test_core: cgroup_util.c
$(OUTPUT)/test_freezer: cgroup_util.c
$(OUTPUT)/test_kill: cgroup_util.c
$(OUTPUT)/test_cpu: cgroup_util.c
+$(OUTPUT)/test_zswap: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index ed2e50bb1e76..c82f974b85c9 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -162,11 +162,11 @@ static int cg_run_in_subcgroups(const char *parent,
* allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
* threads. Then it checks the sanity of numbers on the parent level:
* the total size of the cgroups should be roughly equal to
- * anon + file + slab + kernel_stack.
+ * anon + file + kernel + sock.
*/
static int test_kmem_memcg_deletion(const char *root)
{
- long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum;
+ long current, anon, file, kernel, sock, sum;
int ret = KSFT_FAIL;
char *parent;
@@ -184,29 +184,22 @@ static int test_kmem_memcg_deletion(const char *root)
goto cleanup;
current = cg_read_long(parent, "memory.current");
- slab = cg_read_key_long(parent, "memory.stat", "slab ");
anon = cg_read_key_long(parent, "memory.stat", "anon ");
file = cg_read_key_long(parent, "memory.stat", "file ");
- kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
- pagetables = cg_read_key_long(parent, "memory.stat", "pagetables ");
- percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+ kernel = cg_read_key_long(parent, "memory.stat", "kernel ");
sock = cg_read_key_long(parent, "memory.stat", "sock ");
- if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
- kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0)
+ if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)
goto cleanup;
- sum = slab + anon + file + kernel_stack + pagetables + percpu + sock;
+ sum = anon + file + kernel + sock;
if (abs(sum - current) < MAX_VMSTAT_ERROR) {
ret = KSFT_PASS;
} else {
printf("memory.current = %ld\n", current);
- printf("slab + anon + file + kernel_stack = %ld\n", sum);
- printf("slab = %ld\n", slab);
+ printf("anon + file + kernel + sock = %ld\n", sum);
printf("anon = %ld\n", anon);
printf("file = %ld\n", file);
- printf("kernel_stack = %ld\n", kernel_stack);
- printf("pagetables = %ld\n", pagetables);
- printf("percpu = %ld\n", percpu);
+ printf("kernel = %ld\n", kernel);
printf("sock = %ld\n", sock);
}
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
new file mode 100644
index 000000000000..49def87a909b
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/sysinfo.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+static int read_int(const char *path, size_t *value)
+{
+ FILE *file;
+ int ret = 0;
+
+ file = fopen(path, "r");
+ if (!file)
+ return -1;
+ if (fscanf(file, "%ld", value) != 1)
+ ret = -1;
+ fclose(file);
+ return ret;
+}
+
+static int set_min_free_kb(size_t value)
+{
+ FILE *file;
+ int ret;
+
+ file = fopen("/proc/sys/vm/min_free_kbytes", "w");
+ if (!file)
+ return -1;
+ ret = fprintf(file, "%ld\n", value);
+ fclose(file);
+ return ret;
+}
+
+static int read_min_free_kb(size_t *value)
+{
+ return read_int("/proc/sys/vm/min_free_kbytes", value);
+}
+
+static int get_zswap_stored_pages(size_t *value)
+{
+ return read_int("/sys/kernel/debug/zswap/stored_pages", value);
+}
+
+static int get_zswap_written_back_pages(size_t *value)
+{
+ return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
+}
+
+static int allocate_bytes(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ char *mem = (char *)malloc(size);
+
+ if (!mem)
+ return -1;
+ for (int i = 0; i < size; i += 4095)
+ mem[i] = 'a';
+ free(mem);
+ return 0;
+}
+
+/*
+ * When trying to store a memcg page in zswap, if the memcg hits its memory
+ * limit in zswap, writeback should not be triggered.
+ *
+ * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
+ * not zswap"). Needs to be revised when a per memcg writeback mechanism is
+ * implemented.
+ */
+static int test_no_invasive_cgroup_shrink(const char *root)
+{
+ size_t written_back_before, written_back_after;
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ /* Set up */
+ test_group = cg_name(root, "no_shrink_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "1M"))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.max", "10K"))
+ goto out;
+ if (get_zswap_written_back_pages(&written_back_before))
+ goto out;
+
+ /* Allocate 10x memory.max to push memory into zswap */
+ if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
+ goto out;
+
+ /* Verify that no writeback happened because of the memcg allocation */
+ if (get_zswap_written_back_pages(&written_back_after))
+ goto out;
+ if (written_back_after == written_back_before)
+ ret = KSFT_PASS;
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+struct no_kmem_bypass_child_args {
+ size_t target_alloc_bytes;
+ size_t child_allocated;
+};
+
+static int no_kmem_bypass_child(const char *cgroup, void *arg)
+{
+ struct no_kmem_bypass_child_args *values = arg;
+ void *allocation;
+
+ allocation = malloc(values->target_alloc_bytes);
+ if (!allocation) {
+ values->child_allocated = true;
+ return -1;
+ }
+ for (long i = 0; i < values->target_alloc_bytes; i += 4095)
+ ((char *)allocation)[i] = 'a';
+ values->child_allocated = true;
+ pause();
+ free(allocation);
+ return 0;
+}
+
+/*
+ * When pages owned by a memcg are pushed to zswap by kswapd, they should be
+ * charged to that cgroup. This wasn't the case before commit
+ * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
+ *
+ * The test first allocates memory in a memcg, then raises min_free_kbytes to
+ * a very high value so that the allocation falls below low wm, then makes
+ * another allocation to trigger kswapd that should push the memcg-owned pages
+ * to zswap and verifies that the zswap pages are correctly charged.
+ *
+ * To be run on a VM with at most 4G of memory.
+ */
+static int test_no_kmem_bypass(const char *root)
+{
+ size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
+ struct no_kmem_bypass_child_args *values;
+ size_t trigger_allocation_size;
+ int wait_child_iteration = 0;
+ long stored_pages_threshold;
+ struct sysinfo sys_info;
+ int ret = KSFT_FAIL;
+ int child_status;
+ char *test_group;
+ pid_t child_pid;
+
+ /* Read sys info and compute test values accordingly */
+ if (sysinfo(&sys_info) != 0)
+ return KSFT_FAIL;
+ if (sys_info.totalram > 5000000000)
+ return KSFT_SKIP;
+ values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
+ PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (values == MAP_FAILED)
+ return KSFT_FAIL;
+ if (read_min_free_kb(&min_free_kb_original))
+ return KSFT_FAIL;
+ min_free_kb_high = sys_info.totalram / 2000;
+ min_free_kb_low = sys_info.totalram / 500000;
+ values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
+ sys_info.totalram * 5 / 100;
+ stored_pages_threshold = sys_info.totalram / 5 / 4096;
+ trigger_allocation_size = sys_info.totalram / 20;
+
+ /* Set up test memcg */
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ goto out;
+ test_group = cg_name(root, "kmem_bypass_test");
+ if (!test_group)
+ goto out;
+
+ /* Spawn memcg child and wait for it to allocate */
+ set_min_free_kb(min_free_kb_low);
+ if (cg_create(test_group))
+ goto out;
+ values->child_allocated = false;
+ child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
+ if (child_pid < 0)
+ goto out;
+ while (!values->child_allocated && wait_child_iteration++ < 10000)
+ usleep(1000);
+
+ /* Try to wakeup kswapd and let it push child memory to zswap */
+ set_min_free_kb(min_free_kb_high);
+ for (int i = 0; i < 20; i++) {
+ size_t stored_pages;
+ char *trigger_allocation = malloc(trigger_allocation_size);
+
+ if (!trigger_allocation)
+ break;
+ for (int i = 0; i < trigger_allocation_size; i += 4095)
+ trigger_allocation[i] = 'b';
+ usleep(100000);
+ free(trigger_allocation);
+ if (get_zswap_stored_pages(&stored_pages))
+ break;
+ if (stored_pages < 0)
+ break;
+ /* If memory was pushed to zswap, verify it belongs to memcg */
+ if (stored_pages > stored_pages_threshold) {
+ int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
+ int delta = stored_pages * 4096 - zswapped;
+ int result_ok = delta < stored_pages * 4096 / 4;
+
+ ret = result_ok ? KSFT_PASS : KSFT_FAIL;
+ break;
+ }
+ }
+
+ kill(child_pid, SIGTERM);
+ waitpid(child_pid, &child_status, 0);
+out:
+ set_min_free_kb(min_free_kb_original);
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct zswap_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_no_kmem_bypass),
+ T(test_no_invasive_cgroup_shrink),
+};
+#undef T
+
+static bool zswap_configured(void)
+{
+ return access("/sys/module/zswap", F_OK) == 0;
+}
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (!zswap_configured())
+ ksft_exit_skip("zswap isn't configured\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index bcd4734ca094..60a9a305aef0 100644
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -84,6 +84,7 @@ test_tried_regions()
{
tried_regions_dir=$1
ensure_dir "$tried_regions_dir" "exist"
+ ensure_file "$tried_regions_dir/total_bytes" "exist" "400"
}
test_stats()
@@ -102,9 +103,14 @@ test_filter()
ensure_file "$filter_dir/type" "exist" "600"
ensure_write_succ "$filter_dir/type" "anon" "valid input"
ensure_write_succ "$filter_dir/type" "memcg" "valid input"
+ ensure_write_succ "$filter_dir/type" "addr" "valid input"
+ ensure_write_succ "$filter_dir/type" "target" "valid input"
ensure_write_fail "$filter_dir/type" "foo" "invalid input"
ensure_file "$filter_dir/matching" "exist" "600"
ensure_file "$filter_dir/memcg_path" "exist" "600"
+ ensure_file "$filter_dir/addr_start" "exist" "600"
+ ensure_file "$filter_dir/addr_end" "exist" "600"
+ ensure_file "$filter_dir/damon_target_idx" "exist" "600"
}
test_filters()
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 829be379545a..529d29a35900 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -113,6 +113,15 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
static inline void ksft_print_header(void)
{
+ /*
+ * Force line buf