aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-05 13:52:43 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-05 13:52:43 -0800
commit7203ca412fc8e8a0588e9adc0f777d3163f8dff3 (patch)
tree7cbdcdb0bc0533f0133d472f95629099c123c3f9 /include
parentac20755937e037e586b1ca18a6717d31b1cbce93 (diff)
parentfaf3c923523e5c8fc3baaa413d62e913774ae52f (diff)
Merge tag 'mm-stable-2025-12-03-21-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "__vmalloc()/kvmalloc() and no-block support" (Uladzislau Rezki) Rework the vmalloc() code to support non-blocking allocations (GFP_ATOIC, GFP_NOWAIT) "ksm: fix exec/fork inheritance" (xu xin) Fix a rare case where the KSM MMF_VM_MERGE_ANY prctl state is not inherited across fork/exec "mm/zswap: misc cleanup of code and documentations" (SeongJae Park) Some light maintenance work on the zswap code "mm/page_owner: add debugfs files 'show_handles' and 'show_stacks_handles'" (Mauricio Faria de Oliveira) Enhance the /sys/kernel/debug/page_owner debug feature by adding unique identifiers to differentiate the various stack traces so that userspace monitoring tools can better match stack traces over time "mm/page_alloc: pcp->batch cleanups" (Joshua Hahn) Minor alterations to the page allocator's per-cpu-pages feature "Improve UFFDIO_MOVE scalability by removing anon_vma lock" (Lokesh Gidra) Address a scalability issue in userfaultfd's UFFDIO_MOVE operation "kasan: cleanups for kasan_enabled() checks" (Sabyrzhan Tasbolatov) "drivers/base/node: fold node register and unregister functions" (Donet Tom) Clean up the NUMA node handling code a little "mm: some optimizations for prot numa" (Kefeng Wang) Cleanups and small optimizations to the NUMA allocation hinting code "mm/page_alloc: Batch callers of free_pcppages_bulk" (Joshua Hahn) Address long lock hold times at boot on large machines. These were causing (harmless) softlockup warnings "optimize the logic for handling dirty file folios during reclaim" (Baolin Wang) Remove some now-unnecessary work from page reclaim "mm/damon: allow DAMOS auto-tuned for per-memcg per-node memory usage" (SeongJae Park) Enhance the DAMOS auto-tuning feature "mm/damon: fixes for address alignment issues in DAMON_LRU_SORT and DAMON_RECLAIM" (Quanmin Yan) Fix DAMON_LRU_SORT and DAMON_RECLAIM with certain userspace configuration "expand mmap_prepare functionality, port more users" (Lorenzo Stoakes) Enhance the new(ish) file_operations.mmap_prepare() method and port additional callsites from the old ->mmap() over to ->mmap_prepare() "Fix stale IOTLB entries for kernel address space" (Lu Baolu) Fix a bug (and possible security issue on non-x86) in the IOMMU code. In some situations the IOMMU could be left hanging onto a stale kernel pagetable entry "mm/huge_memory: cleanup __split_unmapped_folio()" (Wei Yang) Clean up and optimize the folio splitting code "mm, swap: misc cleanup and bugfix" (Kairui Song) Some cleanups and a minor fix in the swap discard code "mm/damon: misc documentation fixups" (SeongJae Park) "mm/damon: support pin-point targets removal" (SeongJae Park) Permit userspace to remove a specific monitoring target in the middle of the current targets list "mm: MISC follow-up patches for linux/pgalloc.h" (Harry Yoo) A couple of cleanups related to mm header file inclusion "mm/swapfile.c: select swap devices of default priority round robin" (Baoquan He) improve the selection of swap devices for NUMA machines "mm: Convert memory block states (MEM_*) macros to enums" (Israel Batista) Change the memory block labels from macros to enums so they will appear in kernel debug info "ksm: perform a range-walk to jump over holes in break_ksm" (Pedro Demarchi Gomes) Address an inefficiency when KSM unmerges an address range "mm/damon/tests: fix memory bugs in kunit tests" (SeongJae Park) Fix leaks and unhandled malloc() failures in DAMON userspace unit tests "some cleanups for pageout()" (Baolin Wang) Clean up a couple of minor things in the page scanner's writeback-for-eviction code "mm/hugetlb: refactor sysfs/sysctl interfaces" (Hui Zhu) Move hugetlb's sysfs/sysctl handling code into a new file "introduce VM_MAYBE_GUARD and make it sticky" (Lorenzo Stoakes) Make the VMA guard regions available in /proc/pid/smaps and improves the mergeability of guarded VMAs "mm: perform guard region install/remove under VMA lock" (Lorenzo Stoakes) Reduce mmap lock contention for callers performing VMA guard region operations "vma_start_write_killable" (Matthew Wilcox) Start work on permitting applications to be killed when they are waiting on a read_lock on the VMA lock "mm/damon/tests: add more tests for online parameters commit" (SeongJae Park) Add additional userspace testing of DAMON's "commit" feature "mm/damon: misc cleanups" (SeongJae Park) "make VM_SOFTDIRTY a sticky VMA flag" (Lorenzo Stoakes) Address the possible loss of a VMA's VM_SOFTDIRTY flag when that VMA is merged with another "mm: support device-private THP" (Balbir Singh) Introduce support for Transparent Huge Page (THP) migration in zone device-private memory "Optimize folio split in memory failure" (Zi Yan) "mm/huge_memory: Define split_type and consolidate split support checks" (Wei Yang) Some more cleanups in the folio splitting code "mm: remove is_swap_[pte, pmd]() + non-swap entries, introduce leaf entries" (Lorenzo Stoakes) Clean up our handling of pagetable leaf entries by introducing the concept of 'software leaf entries', of type softleaf_t "reparent the THP split queue" (Muchun Song) Reparent the THP split queue to its parent memcg. This is in preparation for addressing the long-standing "dying memcg" problem, wherein dead memcg's linger for too long, consuming memory resources "unify PMD scan results and remove redundant cleanup" (Wei Yang) A little cleanup in the hugepage collapse code "zram: introduce writeback bio batching" (Sergey Senozhatsky) Improve zram writeback efficiency by introducing batched bio writeback support "memcg: cleanup the memcg stats interfaces" (Shakeel Butt) Clean up our handling of the interrupt safety of some memcg stats "make vmalloc gfp flags usage more apparent" (Vishal Moola) Clean up vmalloc's handling of incoming GFP flags "mm: Add soft-dirty and uffd-wp support for RISC-V" (Chunyan Zhang) Teach soft dirty and userfaultfd write protect tracking to use RISC-V's Svrsw60t59b extension "mm: swap: small fixes and comment cleanups" (Youngjun Park) Fix a small bug and clean up some of the swap code "initial work on making VMA flags a bitmap" (Lorenzo Stoakes) Start work on converting the vma struct's flags to a bitmap, so we stop running out of them, especially on 32-bit "mm/swapfile: fix and cleanup swap list iterations" (Youngjun Park) Address a possible bug in the swap discard code and clean things up a little [ This merge also reverts commit ebb9aeb980e5 ("vfio/nvgrace-gpu: register device memory for poison handling") because it looks broken to me, I've asked for clarification - Linus ] * tag 'mm-stable-2025-12-03-21-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (321 commits) mm: fix vma_start_write_killable() signal handling mm/swapfile: use plist_for_each_entry in __folio_throttle_swaprate mm/swapfile: fix list iteration when next node is removed during discard fs/proc/task_mmu.c: fix make_uffd_wp_huge_pte() huge pte handling mm/kfence: add reboot notifier to disable KFENCE on shutdown memcg: remove inc/dec_lruvec_kmem_state helpers selftests/mm/uffd: initialize char variable to Null mm: fix DEBUG_RODATA_TEST indentation in Kconfig mm: introduce VMA flags bitmap type tools/testing/vma: eliminate dependency on vma->__vm_flags mm: simplify and rename mm flags function for clarity mm: declare VMA flags by bit zram: fix a spelling mistake mm/page_alloc: optimize lowmem_reserve max lookup using its semantic monotonicity mm/vmscan: skip increasing kswapd_failures when reclaim was boosted pagemap: update BUDDY flag documentation mm: swap: remove scan_swap_map_slots() references from comments mm: swap: change swap_alloc_slow() to void mm, swap: remove redundant comment for read_swap_cache_async mm, swap: use SWP_SOLIDSTATE to determine if swap is rotational ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/hugetlb.h8
-rw-r--r--include/asm-generic/pgalloc.h24
-rw-r--r--include/asm-generic/pgtable_uffd.h17
-rw-r--r--include/linux/damon.h39
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/huge_mm.h112
-rw-r--r--include/linux/hugetlb.h13
-rw-r--r--include/linux/hugetlb_inline.h15
-rw-r--r--include/linux/iommu.h4
-rw-r--r--include/linux/kasan.h20
-rw-r--r--include/linux/kmsan.h6
-rw-r--r--include/linux/ksm.h4
-rw-r--r--include/linux/leafops.h619
-rw-r--r--include/linux/memcontrol.h89
-rw-r--r--include/linux/memory-failure.h17
-rw-r--r--include/linux/memory.h24
-rw-r--r--include/linux/memremap.h57
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mm.h681
-rw-r--r--include/linux/mm_inline.h16
-rw-r--r--include/linux/mm_types.h156
-rw-r--r--include/linux/mmap_lock.h37
-rw-r--r--include/linux/mmzone.h4
-rw-r--r--include/linux/node.h10
-rw-r--r--include/linux/pgtable.h12
-rw-r--r--include/linux/sched/mm.h12
-rw-r--r--include/linux/shmem_fs.h9
-rw-r--r--include/linux/swap.h15
-rw-r--r--include/linux/swapops.h241
-rw-r--r--include/linux/userfaultfd_k.h96
-rw-r--r--include/linux/vmalloc.h8
-rw-r--r--include/linux/vmstat.h48
-rw-r--r--include/net/sock.h6
-rw-r--r--include/ras/ras_event.h86
-rw-r--r--include/trace/events/huge_memory.h3
-rw-r--r--include/trace/events/memory-failure.h98
-rw-r--r--include/trace/events/mmflags.h1
38 files changed, 1810 insertions, 809 deletions
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index dcb8727f2b82..e1a2e1b7c8e7 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -97,14 +97,6 @@ static inline int huge_pte_none(pte_t pte)
}
#endif
-/* Please refer to comments above pte_none_mostly() for the usage */
-#ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY
-static inline int huge_pte_none_mostly(pte_t pte)
-{
- return huge_pte_none(pte) || is_pte_marker(pte);
-}
-#endif
-
#ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 3c8ec3bfea44..57137d3ac159 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -18,8 +18,7 @@
*/
static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm)
{
- struct ptdesc *ptdesc = pagetable_alloc_noprof(GFP_PGTABLE_KERNEL &
- ~__GFP_HIGHMEM, 0);
+ struct ptdesc *ptdesc = pagetable_alloc_noprof(GFP_PGTABLE_KERNEL, 0);
if (!ptdesc)
return NULL;
@@ -28,6 +27,8 @@ static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm)
return NULL;
}
+ ptdesc_set_kernel(ptdesc);
+
return ptdesc_address(ptdesc);
}
#define __pte_alloc_one_kernel(...) alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__))
@@ -146,6 +147,10 @@ static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long ad
pagetable_free(ptdesc);
return NULL;
}
+
+ if (mm == &init_mm)
+ ptdesc_set_kernel(ptdesc);
+
return ptdesc_address(ptdesc);
}
#define pmd_alloc_one(...) alloc_hooks(pmd_alloc_one_noprof(__VA_ARGS__))
@@ -172,13 +177,16 @@ static inline pud_t *__pud_alloc_one_noprof(struct mm_struct *mm, unsigned long
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
- gfp &= ~__GFP_HIGHMEM;
ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
pagetable_pud_ctor(ptdesc);
+
+ if (mm == &init_mm)
+ ptdesc_set_kernel(ptdesc);
+
return ptdesc_address(ptdesc);
}
#define __pud_alloc_one(...) alloc_hooks(__pud_alloc_one_noprof(__VA_ARGS__))
@@ -226,13 +234,16 @@ static inline p4d_t *__p4d_alloc_one_noprof(struct mm_struct *mm, unsigned long
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
- gfp &= ~__GFP_HIGHMEM;
ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
pagetable_p4d_ctor(ptdesc);
+
+ if (mm == &init_mm)
+ ptdesc_set_kernel(ptdesc);
+
return ptdesc_address(ptdesc);
}
#define __p4d_alloc_one(...) alloc_hooks(__p4d_alloc_one_noprof(__VA_ARGS__))
@@ -270,13 +281,16 @@ static inline pgd_t *__pgd_alloc_noprof(struct mm_struct *mm, unsigned int order
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
- gfp &= ~__GFP_HIGHMEM;
ptdesc = pagetable_alloc_noprof(gfp, order);
if (!ptdesc)
return NULL;
pagetable_pgd_ctor(ptdesc);
+
+ if (mm == &init_mm)
+ ptdesc_set_kernel(ptdesc);
+
return ptdesc_address(ptdesc);
}
#define __pgd_alloc(...) alloc_hooks(__pgd_alloc_noprof(__VA_ARGS__))
diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h
index 828966d4c281..0d85791efdf7 100644
--- a/include/asm-generic/pgtable_uffd.h
+++ b/include/asm-generic/pgtable_uffd.h
@@ -1,6 +1,23 @@
#ifndef _ASM_GENERIC_PGTABLE_UFFD_H
#define _ASM_GENERIC_PGTABLE_UFFD_H
+/*
+ * Some platforms can customize the uffd-wp bit, making it unavailable
+ * even if the architecture provides the resource.
+ * Adding this API allows architectures to add their own checks for the
+ * devices on which the kernel is running.
+ * Note: When overriding it, please make sure the
+ * CONFIG_HAVE_ARCH_USERFAULTFD_WP is part of this macro.
+ */
+#ifndef pgtable_supports_uffd_wp
+#define pgtable_supports_uffd_wp() IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP)
+#endif
+
+static inline bool uffd_supports_wp_marker(void)
+{
+ return pgtable_supports_uffd_wp() && IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP);
+}
+
#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static __always_inline int pte_uffd_wp(pte_t pte)
{
diff --git a/include/linux/damon.h b/include/linux/damon.h
index cae8c613c5fc..3813373a9200 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -91,17 +91,23 @@ struct damon_region {
* @nr_regions: Number of monitoring target regions of this target.
* @regions_list: Head of the monitoring target regions of this target.
* @list: List head for siblings.
+ * @obsolete: Whether the commit destination target is obsolete.
*
* Each monitoring context could have multiple targets. For example, a context
* for virtual memory address spaces could have multiple target processes. The
* @pid should be set for appropriate &struct damon_operations including the
* virtual address spaces monitoring operations.
+ *
+ * @obsolete is used only for damon_commit_targets() source targets, to specify
+ * the matching destination targets are obsolete. Read damon_commit_targets()
+ * to see how it is handled.
*/
struct damon_target {
struct pid *pid;
unsigned int nr_regions;
struct list_head regions_list;
struct list_head list;
+ bool obsolete;
};
/**
@@ -147,6 +153,8 @@ enum damos_action {
* @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us.
* @DAMOS_QUOTA_NODE_MEM_USED_BP: MemUsed ratio of a node.
* @DAMOS_QUOTA_NODE_MEM_FREE_BP: MemFree ratio of a node.
+ * @DAMOS_QUOTA_NODE_MEMCG_USED_BP: MemUsed ratio of a node for a cgroup.
+ * @DAMOS_QUOTA_NODE_MEMCG_FREE_BP: MemFree ratio of a node for a cgroup.
* @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics.
*
* Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported.
@@ -156,6 +164,8 @@ enum damos_quota_goal_metric {
DAMOS_QUOTA_SOME_MEM_PSI_US,
DAMOS_QUOTA_NODE_MEM_USED_BP,
DAMOS_QUOTA_NODE_MEM_FREE_BP,
+ DAMOS_QUOTA_NODE_MEMCG_USED_BP,
+ DAMOS_QUOTA_NODE_MEMCG_FREE_BP,
NR_DAMOS_QUOTA_GOAL_METRICS,
};
@@ -166,6 +176,7 @@ enum damos_quota_goal_metric {
* @current_value: Current value of @metric.
* @last_psi_total: Last measured total PSI
* @nid: Node id.
+ * @memcg_id: Memcg id.
* @list: List head for siblings.
*
* Data structure for getting the current score of the quota tuning goal. The
@@ -176,6 +187,12 @@ enum damos_quota_goal_metric {
* If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually
* entered by the user, probably inside the kdamond callbacks. Otherwise,
* DAMON sets @current_value with self-measured value of @metric.
+ *
+ * If @metric is DAMOS_QUOTA_NODE_MEM_{USED,FREE}_BP, @nid represents the node
+ * id of the target node to account the used/free memory.
+ *
+ * If @metric is DAMOS_QUOTA_NODE_MEMCG_{USED,FREE}_BP, @nid and @memcg_id
+ * represents the node id and the cgroup to account the used memory for.
*/
struct damos_quota_goal {
enum damos_quota_goal_metric metric;
@@ -184,7 +201,10 @@ struct damos_quota_goal {
/* metric-dependent fields */
union {
u64 last_psi_total;
- int nid;
+ struct {
+ int nid;
+ unsigned short memcg_id;
+ };
};
struct list_head list;
};
@@ -472,7 +492,7 @@ struct damos_migrate_dests {
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @migrate_dests: Destination nodes if @action is "migrate_{hot,cold}".
* @target_nid: Destination node if @action is "migrate_{hot,cold}".
- * @filters: Additional set of &struct damos_filter for &action.
+ * @core_filters: Additional set of &struct damos_filter for &action.
* @ops_filters: ops layer handling &struct damos_filter objects list.
* @last_applied: Last @action applied ops-managing entity.
* @stat: Statistics of this scheme.
@@ -498,7 +518,7 @@ struct damos_migrate_dests {
*
* Before applying the &action to a memory region, &struct damon_operations
* implementation could check pages of the region and skip &action to respect
- * &filters
+ * &core_filters
*
* The minimum entity that @action can be applied depends on the underlying
* &struct damon_operations. Since it may not be aligned with the core layer
@@ -542,7 +562,7 @@ struct damos {
struct damos_migrate_dests migrate_dests;
};
};
- struct list_head filters;
+ struct list_head core_filters;
struct list_head ops_filters;
void *last_applied;
struct damos_stat stat;
@@ -851,11 +871,11 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
#define damos_for_each_quota_goal_safe(goal, next, quota) \
list_for_each_entry_safe(goal, next, &(quota)->goals, list)
-#define damos_for_each_filter(f, scheme) \
- list_for_each_entry(f, &(scheme)->filters, list)
+#define damos_for_each_core_filter(f, scheme) \
+ list_for_each_entry(f, &(scheme)->core_filters, list)
-#define damos_for_each_filter_safe(f, next, scheme) \
- list_for_each_entry_safe(f, next, &(scheme)->filters, list)
+#define damos_for_each_core_filter_safe(f, next, scheme) \
+ list_for_each_entry_safe(f, next, &(scheme)->core_filters, list)
#define damos_for_each_ops_filter(f, scheme) \
list_for_each_entry(f, &(scheme)->ops_filters, list)
@@ -947,7 +967,8 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control);
int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control);
int damon_set_region_biggest_system_ram_default(struct damon_target *t,
- unsigned long *start, unsigned long *end);
+ unsigned long *start, unsigned long *end,
+ unsigned long min_sz_region);
#endif /* CONFIG_DAMON */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ce25feb06727..17b38b9d7f90 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2041,14 +2041,14 @@ static inline bool can_mmap_file(struct file *file)
return true;
}
-int __compat_vma_mmap_prepare(const struct file_operations *f_op,
+int __compat_vma_mmap(const struct file_operations *f_op,
struct file *file, struct vm_area_struct *vma);
-int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma);
+int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
{
if (file->f_op->mmap_prepare)
- return compat_vma_mmap_prepare(file, vma);
+ return compat_vma_mmap(file, vma);
return file->f_op->mmap(file, vma);
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 623bee335383..b155929af5b1 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -387,7 +387,7 @@ extern void free_pages(unsigned long addr, unsigned int order);
#define free_page(addr) free_pages((addr), 0)
void page_alloc_init_cpuhp(void);
-int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
+bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 11cab07f322a..ae7f21aad0ac 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -364,20 +364,35 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add
unsigned long len, unsigned long pgoff, unsigned long flags,
vm_flags_t vm_flags);
+enum split_type {
+ SPLIT_TYPE_UNIFORM,
+ SPLIT_TYPE_NON_UNIFORM,
+};
+
bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
-int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order);
+int folio_split_unmapped(struct folio *folio, unsigned int new_order);
int min_order_for_split(struct folio *folio);
int split_folio_to_list(struct folio *folio, struct list_head *list);
-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
+bool folio_split_supported(struct folio *folio, unsigned int new_order,
+ enum split_type split_type, bool warns);
int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
struct list_head *list);
-/*
- * try_folio_split_to_order - try to split a @folio at @page to @new_order using
- * non uniform split.
+
+static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ unsigned int new_order)
+{
+ return __split_huge_page_to_list_to_order(page, list, new_order);
+}
+static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
+{
+ return split_huge_page_to_list_to_order(page, NULL, new_order);
+}
+
+/**
+ * try_folio_split_to_order() - try to split a @folio at @page to @new_order
+ * using non uniform split.
* @folio: folio to be split
* @page: split to @new_order at the given page
* @new_order: the target split order
@@ -387,14 +402,13 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
* folios are put back to LRU list. Use min_order_for_split() to get the lower
* bound of @new_order.
*
- * Return: 0: split is successful, otherwise split failed.
+ * Return: 0 - split is successful, otherwise split failed.
*/
static inline int try_folio_split_to_order(struct folio *folio,
struct page *page, unsigned int new_order)
{
- if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
- return split_huge_page_to_list_to_order(&folio->page, NULL,
- new_order);
+ if (!folio_split_supported(folio, new_order, SPLIT_TYPE_NON_UNIFORM, /* warns= */ false))
+ return split_huge_page_to_order(&folio->page, new_order);
return folio_split(folio, new_order, page, NULL);
}
static inline int split_huge_page(struct page *page)
@@ -402,14 +416,43 @@ static inline int split_huge_page(struct page *page)
return split_huge_page_to_list_to_order(page, NULL, 0);
}
void deferred_split_folio(struct folio *folio, bool partially_mapped);
+#ifdef CONFIG_MEMCG
+void reparent_deferred_split_queue(struct mem_cgroup *memcg);
+#endif
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address, bool freeze);
+/**
+ * pmd_is_huge() - Is this PMD either a huge PMD entry or a software leaf entry?
+ * @pmd: The PMD to check.
+ *
+ * A huge PMD entry is a non-empty entry which is present and marked huge or a
+ * software leaf entry. This check be performed without the appropriate locks
+ * held, in which case the condition should be rechecked after they are
+ * acquired.
+ *
+ * Returns: true if this PMD is huge, false otherwise.
+ */
+static inline bool pmd_is_huge(pmd_t pmd)
+{
+ if (pmd_present(pmd)) {
+ return pmd_trans_huge(pmd);
+ } else if (!pmd_none(pmd)) {
+ /*
+ * Non-present PMDs must be valid huge non-present entries. We
+ * cannot assert that here due to header dependency issues.
+ */
+ return true;
+ }
+
+ return false;
+}
+
#define split_huge_pmd(__vma, __pmd, __address) \
do { \
pmd_t *____pmd = (__pmd); \
- if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \
+ if (pmd_is_huge(*____pmd)) \
__split_huge_pmd(__vma, __pmd, __address, \
false); \
} while (0)
@@ -447,19 +490,14 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma);
-static inline int is_swap_pmd(pmd_t pmd)
-{
- return !pmd_none(pmd) && !pmd_present(pmd);
-}
-
/* mmap_lock must be held on entry */
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
{
- if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd))
+ if (pmd_is_huge(*pmd))
return __pmd_trans_huge_lock(pmd, vma);
- else
- return NULL;
+
+ return NULL;
}
static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
struct vm_area_struct *vma)
@@ -473,6 +511,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
/**
* folio_test_pmd_mappable - Can we map this folio with a PMD?
* @folio: The folio to test
+ *
+ * Return: true - @folio can be mapped, false - @folio cannot be mapped.
*/
static inline bool folio_test_pmd_mappable(struct folio *folio)
{
@@ -481,6 +521,8 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
+vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf);
+
extern struct folio *huge_zero_folio;
extern unsigned long huge_zero_pfn;
@@ -524,6 +566,8 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze);
bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmdp, struct folio *folio);
+void map_anon_folio_pmd_nopf(struct folio *folio, pmd_t *pmd,
+ struct vm_area_struct *vma, unsigned long haddr);
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -576,6 +620,11 @@ split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
VM_WARN_ON_ONCE_PAGE(1, page);
return -EINVAL;
}
+static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
+{
+ VM_WARN_ON_ONCE_PAGE(1, page);
+ return -EINVAL;
+}
static inline int split_huge_page(struct page *page)
{
VM_WARN_ON_ONCE_PAGE(1, page);
@@ -602,6 +651,7 @@ static inline int try_folio_split_to_order(struct folio *folio,
}
static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
+static inline void reparent_deferred_split_queue(struct mem_cgroup *memcg) {}
#define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0)
@@ -642,10 +692,6 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
struct vm_area_struct *next)
{
}
-static inline int is_swap_pmd(pmd_t pmd)
-{
- return 0;
-}
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
{
@@ -662,6 +708,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
return 0;
}
+static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf)
+{
+ return 0;
+}
+
static inline bool is_huge_zero_folio(const struct folio *folio)
{
return false;
@@ -682,12 +733,6 @@ static inline void mm_put_huge_zero_folio(struct mm_struct *mm)
return;
}
-static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
-{
- return NULL;
-}
-
static inline bool thp_migration_supported(void)
{
return false;
@@ -720,6 +765,11 @@ static inline struct folio *get_persistent_huge_zero_folio(void)
{
return NULL;
}
+
+static inline bool pmd_is_huge(pmd_t pmd)
+{
+ return false;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline int split_folio_to_list_to_order(struct folio *folio,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8e63e46b8e1f..019a1c5281e4 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -150,8 +150,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
long hugetlb_reserve_pages(struct inode *inode, long from, long to,
- struct vm_area_struct *vma,
- vm_flags_t vm_flags);
+ struct vm_area_desc *desc, vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
@@ -172,7 +171,7 @@ bool hugetlbfs_pagecache_present(struct hstate *h,
struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
-extern int sysctl_hugetlb_shm_group;
+extern int sysctl_hugetlb_shm_group __read_mostly;
extern struct list_head huge_boot_pages[MAX_NUMNODES];
void hugetlb_bootmem_alloc(void);
@@ -275,11 +274,10 @@ void hugetlb_vma_lock_release(struct kref *kref);
long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot,
unsigned long cp_flags);
-bool is_hugetlb_entry_migration(pte_t pte);
-bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
+int hugetlb_vma_lock_alloc(struct vm_area_struct *vma);