aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgalloc.h2
-rw-r--r--include/linux/bootmem_info.h1
-rw-r--r--include/linux/compaction.h10
-rw-r--r--include/linux/damon.h136
-rw-r--r--include/linux/gfp.h4
-rw-r--r--include/linux/gfp_types.h6
-rw-r--r--include/linux/highmem-internal.h2
-rw-r--r--include/linux/huge_mm.h47
-rw-r--r--include/linux/list_lru.h70
-rw-r--r--include/linux/memcontrol.h31
-rw-r--r--include/linux/memory.h7
-rw-r--r--include/linux/memory_hotplug.h8
-rw-r--r--include/linux/mm.h62
-rw-r--r--include/linux/mm_inline.h2
-rw-r--r--include/linux/mm_types.h19
-rw-r--r--include/linux/mmu_notifier.h4
-rw-r--r--include/linux/mmzone.h23
-rw-r--r--include/linux/nodemask.h18
-rw-r--r--include/linux/page_ref.h18
-rw-r--r--include/linux/pageblock-flags.h6
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/linux/swap.h24
-rw-r--r--include/linux/swap_cgroup.h47
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/linux/userfaultfd_k.h40
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/linux/vmpressure.h9
-rw-r--r--include/trace/events/damon.h38
-rw-r--r--include/trace/events/vmscan.h52
29 files changed, 465 insertions, 229 deletions
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 57137d3ac159..051aa1331051 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -4,7 +4,7 @@
#ifdef CONFIG_MMU
-#define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO)
+#define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO | __GFP_SKIP_KASAN)
#define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT)
/**
diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index 492ceeb1cdf8..f724340755e5 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -82,7 +82,6 @@ static inline void get_page_bootmem(unsigned long info, struct page *page,
static inline void free_bootmem_page(struct page *page)
{
- kmemleak_free_part_phys(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE);
free_reserved_page(page);
}
#endif
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 173d9c07a895..f29ef0653546 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -2,6 +2,8 @@
#ifndef _LINUX_COMPACTION_H
#define _LINUX_COMPACTION_H
+#include <linux/swap.h>
+
/*
* Determines how hard direct compaction should try to succeed.
* Lower value means higher priority, analogically to reclaim priority.
@@ -73,11 +75,9 @@ static inline unsigned long compact_gap(unsigned int order)
* effectively limited by COMPACT_CLUSTER_MAX, as that's the maximum
* that the migrate scanner can have isolated on migrate list, and free
* scanner is only invoked when the number of isolated free pages is
- * lower than that. But it's not worth to complicate the formula here
- * as a bigger gap for higher orders than strictly necessary can also
- * improve chances of compaction success.
+ * lower than that.
*/
- return 2UL << order;
+ return min(2UL << order, COMPACT_CLUSTER_MAX);
}
static inline int current_is_kcompactd(void)
@@ -101,7 +101,7 @@ extern void compaction_defer_reset(struct zone *zone, int order,
bool alloc_success);
bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
- int alloc_flags);
+ int alloc_flags, gfp_t gfp_mask);
extern void __meminit kcompactd_run(int nid);
extern void __meminit kcompactd_stop(int nid);
diff --git a/include/linux/damon.h b/include/linux/damon.h
index f2cdb7c3f5e6..6f7edb3590ef 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -8,23 +8,20 @@
#ifndef _DAMON_H_
#define _DAMON_H_
+#include <linux/math64.h>
#include <linux/memcontrol.h>
#include <linux/mutex.h>
+#include <linux/prandom.h>
#include <linux/time64.h>
#include <linux/types.h>
-#include <linux/random.h>
/* Minimal region size. Every damon_region is aligned by this. */
#define DAMON_MIN_REGION_SZ PAGE_SIZE
+/* Maximum number of monitoring probes. */
+#define DAMON_MAX_PROBES (4)
/* Max priority score for DAMON-based operation schemes */
#define DAMOS_MAX_SCORE (99)
-/* Get a random number in [l, r) */
-static inline unsigned long damon_rand(unsigned long l, unsigned long r)
-{
- return l + get_random_u32_below(r - l);
-}
-
/**
* struct damon_addr_range - Represents an address region of [@start, @end).
* @start: Start address of the region (inclusive).
@@ -52,6 +49,7 @@ struct damon_size_range {
* @nr_accesses: Access frequency of this region.
* @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for
* each sampling interval.
+ * @probe_hits: Number of probe-positive region samples.
* @list: List head for siblings.
* @age: Age of this region.
*
@@ -80,6 +78,7 @@ struct damon_region {
unsigned long sampling_addr;
unsigned int nr_accesses;
unsigned int nr_accesses_bp;
+ unsigned char probe_hits[DAMON_MAX_PROBES];
struct list_head list;
unsigned int age;
@@ -121,6 +120,7 @@ struct damon_target {
* @DAMOS_PAGEOUT: Reclaim the region.
* @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE.
* @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
+ * @DAMOS_COLLAPSE: Call ``madvise()`` for the region with MADV_COLLAPSE.
* @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists.
* @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists.
* @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions.
@@ -140,6 +140,7 @@ enum damos_action {
DAMOS_PAGEOUT,
DAMOS_HUGEPAGE,
DAMOS_NOHUGEPAGE,
+ DAMOS_COLLAPSE,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
DAMOS_MIGRATE_HOT,
@@ -159,6 +160,8 @@ enum damos_action {
* @DAMOS_QUOTA_NODE_MEMCG_FREE_BP: MemFree ratio of a node for a cgroup.
* @DAMOS_QUOTA_ACTIVE_MEM_BP: Active to total LRU memory ratio.
* @DAMOS_QUOTA_INACTIVE_MEM_BP: Inactive to total LRU memory ratio.
+ * @DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP: Scheme-eligible memory ratio of a
+ * node in basis points (0-10000).
* @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics.
*
* Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported.
@@ -172,6 +175,7 @@ enum damos_quota_goal_metric {
DAMOS_QUOTA_NODE_MEMCG_FREE_BP,
DAMOS_QUOTA_ACTIVE_MEM_BP,
DAMOS_QUOTA_INACTIVE_MEM_BP,
+ DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP,
NR_DAMOS_QUOTA_GOAL_METRICS,
};
@@ -233,6 +237,8 @@ enum damos_quota_goal_tuner {
* @goals: Head of quota tuning goals (&damos_quota_goal) list.
* @goal_tuner: Goal-based @esz tuning algorithm to use.
* @esz: Effective size quota in bytes.
+ * @fail_charge_num: Failed regions charge rate numerator.
+ * @fail_charge_denom: Failed regions charge rate denominator.
*
* @weight_sz: Weight of the region's size for prioritization.
* @weight_nr_accesses: Weight of the region's nr_accesses for prioritization.
@@ -262,6 +268,10 @@ enum damos_quota_goal_tuner {
*
* The resulting effective size quota in bytes is set to @esz.
*
+ * For DAMOS action applying failed amount of regions, charging those same to
+ * those that the action has successfully applied may be unfair. For the
+ * reason, 'the size * @fail_charge_num / @fail_charge_denom' is charged.
+ *
* For selecting regions within the quota, DAMON prioritizes current scheme's
* target memory regions using the &struct damon_operations->get_scheme_score.
* You could customize the prioritization logic by setting &weight_sz,
@@ -276,6 +286,9 @@ struct damos_quota {
enum damos_quota_goal_tuner goal_tuner;
unsigned long esz;
+ unsigned int fail_charge_num;
+ unsigned int fail_charge_denom;
+
unsigned int weight_sz;
unsigned int weight_nr_accesses;
unsigned int weight_age;
@@ -617,6 +630,7 @@ enum damon_ops_id {
* @update: Update operations-related data structures.
* @prepare_access_checks: Prepare next access check of target regions.
* @check_accesses: Check the accesses to target regions.
+ * @apply_probes: Apply probes for each region.
* @get_scheme_score: Get the score of a region for a scheme.
* @apply_scheme: Apply a DAMON-based operation scheme.
* @target_valid: Determine if the target is valid.
@@ -643,6 +657,8 @@ enum damon_ops_id {
* last preparation and update the number of observed accesses of each region.
* It should also return max number of observed accesses that made as a result
* of its update. The value will be used for regions adjustment threshold.
+ * @apply_probes should apply the data attribute probes to each region and
+ * accordingly update the probe hits counter of the region.
* @get_scheme_score should return the priority score of a region for a scheme
* as an integer in [0, &DAMOS_MAX_SCORE].
* @apply_scheme is called from @kdamond when a region for user provided
@@ -660,6 +676,7 @@ struct damon_operations {
void (*update)(struct damon_ctx *context);
void (*prepare_access_checks)(struct damon_ctx *context);
unsigned int (*check_accesses)(struct damon_ctx *context);
+ void (*apply_probes)(struct damon_ctx *context);
int (*get_scheme_score)(struct damon_ctx *context,
struct damon_region *r, struct damos *scheme);
unsigned long (*apply_scheme)(struct damon_ctx *context,
@@ -722,6 +739,47 @@ struct damon_intervals_goal {
};
/**
+ * enum damon_filter_type - Type of &struct damon_filter
+ *
+ * @DAMON_FILTER_TYPE_ANON: Anonymous pages.
+ * @DAMON_FILTER_TYPE_MEMCG: Specific memcg's pages.
+ */
+enum damon_filter_type {
+ DAMON_FILTER_TYPE_ANON,
+ DAMON_FILTER_TYPE_MEMCG,
+};
+
+/**
+ * struct damon_filter - DAMON region filter for &struct damon_probe.
+ *
+ * @type: Type of the region.
+ * @matching: Whether this filter is for the type-matching ones.
+ * @allow: Whether the @type-@matching ones should pass this filter.
+ * @memcg_id: Memcg id of the question if @type is DAMON_FILTER_MEMCG.
+ * @list: Siblings list.
+ */
+struct damon_filter {
+ enum damon_filter_type type;
+ bool matching;
+ bool allow;
+ union {
+ u64 memcg_id;
+ };
+ struct list_head list;
+};
+
+/**
+ * struct damon_probe - Data region attribute probe.
+ *
+ * @filters: Filters for assessing if a given region is for this probe.
+ * @list: Siblings list.
+ */
+struct damon_probe {
+ struct list_head filters;
+ struct list_head list;
+};
+
+/**
* struct damon_attrs - Monitoring attributes for accuracy/overhead control.
*
* @sample_interval: The time between access samplings.
@@ -787,6 +845,7 @@ struct damon_attrs {
* @ops: Set of monitoring operations for given use cases.
* @addr_unit: Scale factor for core to ops address conversion.
* @min_region_sz: Minimum region size.
+ * @pause: Pause kdamond main loop.
* @adaptive_targets: Head of monitoring targets (&damon_target) list.
* @schemes: Head of schemes (&damos) list.
*/
@@ -838,13 +897,34 @@ struct damon_ctx {
/* public: */
struct damon_operations ops;
+ struct list_head probes;
unsigned long addr_unit;
unsigned long min_region_sz;
+ bool pause;
struct list_head adaptive_targets;
struct list_head schemes;
+
+ /* Per-ctx PRNG state for damon_rand(); kdamond is the sole consumer. */
+ struct rnd_state rnd_state;
};
+/* Get a random number in [@l, @r) using @ctx's lockless PRNG. */
+static inline unsigned long damon_rand(struct damon_ctx *ctx,
+ unsigned long l, unsigned long r)
+{
+ unsigned long span = r - l;
+ u64 rnd;
+
+ if (span <= U32_MAX) {
+ rnd = prandom_u32_state(&ctx->rnd_state);
+ return l + (unsigned long)((rnd * span) >> 32);
+ }
+ rnd = ((u64)prandom_u32_state(&ctx->rnd_state) << 32) |
+ prandom_u32_state(&ctx->rnd_state);
+ return l + mul_u64_u64_shr(rnd, span, 64);
+}
+
static inline struct damon_region *damon_next_region(struct damon_region *r)
{
return container_of(r->list.next, struct damon_region, list);
@@ -870,15 +950,26 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
return r->ar.end - r->ar.start;
}
+#define damon_for_each_filter(f, p) \
+ list_for_each_entry(f, &(p)->filters, list)
+
+#define damon_for_each_filter_safe(f, next, p) \
+ list_for_each_entry_safe(f, next, &(p)->filters, list)
+
+#define damon_for_each_probe(p, ctx) \
+ list_for_each_entry(p, &(ctx)->probes, list)
+
+#define damon_for_each_probe_safe(p, next, ctx) \
+ list_for_each_entry_safe(p, next, &(ctx)->probes, list)
#define damon_for_each_region(r, t) \
- list_for_each_entry(r, &t->regions_list, list)
+ list_for_each_entry(r, &(t)->regions_list, list)
#define damon_for_each_region_from(r, t) \
- list_for_each_entry_from(r, &t->regions_list, list)
+ list_for_each_entry_from(r, &(t)->regions_list, list)
#define damon_for_each_region_safe(r, next, t) \
- list_for_each_entry_safe(r, next, &t->regions_list, list)
+ list_for_each_entry_safe(r, next, &(t)->regions_list, list)
#define damon_for_each_target(t, ctx) \
list_for_each_entry(t, &(ctx)->adaptive_targets, list)
@@ -893,7 +984,7 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
list_for_each_entry_safe(s, next, &(ctx)->schemes, list)
#define damos_for_each_quota_goal(goal, quota) \
- list_for_each_entry(goal, &quota->goals, list)
+ list_for_each_entry(goal, &(quota)->goals, list)
#define damos_for_each_quota_goal_safe(goal, next, quota) \
list_for_each_entry_safe(goal, next, &(quota)->goals, list)
@@ -912,21 +1003,16 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
#ifdef CONFIG_DAMON
-struct damon_region *damon_new_region(unsigned long start, unsigned long end);
+struct damon_filter *damon_new_filter(enum damon_filter_type type,
+ bool matching, bool allow);
+void damon_add_filter(struct damon_probe *probe, struct damon_filter *f);
+void damon_destroy_filter(struct damon_filter *f);
-/*
- * Add a region between two other regions
- */
-static inline void damon_insert_region(struct damon_region *r,
- struct damon_region *prev, struct damon_region *next,
- struct damon_target *t)
-{
- __list_add(&r->list, &prev->list, &next->list);
- t->nr_regions++;
-}
+struct damon_probe *damon_new_probe(void);
+void damon_add_probe(struct damon_ctx *ctx, struct damon_probe *probe);
+
+struct damon_region *damon_new_region(unsigned long start, unsigned long end);
-void damon_add_region(struct damon_region *r, struct damon_target *t);
-void damon_destroy_region(struct damon_region *r, struct damon_target *t);
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
unsigned int nr_ranges, unsigned long min_region_sz);
void damon_update_region_access_rate(struct damon_region *r, bool accessed,
@@ -994,7 +1080,7 @@ int damon_kdamond_pid(struct damon_ctx *ctx);
int damon_call(struct damon_ctx *ctx, struct damon_call_control *control);
int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control);
-int damon_set_region_biggest_system_ram_default(struct damon_target *t,
+int damon_set_region_system_rams_default(struct damon_target *t,
unsigned long *start, unsigned long *end,
unsigned long addr_unit,
unsigned long min_region_sz);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 51ef13ed756e..cdf95a9f0b87 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -239,6 +239,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
struct page **page_array);
#define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__))
+void free_pages_bulk(struct page **page_array, unsigned long nr_pages);
+
unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp,
unsigned long nr_pages,
struct page **page_array);
@@ -467,6 +469,8 @@ void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages);
void free_contig_range(unsigned long pfn, unsigned long nr_pages);
#endif
+void __free_contig_range(unsigned long pfn, unsigned long nr_pages);
+
DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
#endif /* __LINUX_GFP_H */
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index cd4972a7c97c..54ca0c88bab6 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -281,9 +281,9 @@ enum {
*
* %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation.
* Used for userspace and vmalloc pages; the latter are unpoisoned by
- * kasan_unpoison_vmalloc instead. For userspace pages, results in
- * poisoning being skipped as well, see should_skip_kasan_poison for
- * details. Only effective in HW_TAGS mode.
+ * kasan_unpoison_vmalloc instead. If passed to vmalloc, kasan_unpoison_vmalloc
+ * is skipped too. For userspace pages, results in poisoning being skipped as
+ * well, see should_skip_kasan_poison for details. Only effective in HW_TAGS mode.
*/
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 0574c21ca45d..bb71e7dba4f7 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -262,7 +262,7 @@ static inline bool is_kmap_addr(const void *x)
* @__addr: Virtual address to be unmapped
*
* Unmaps an address previously mapped by kmap_atomic() and re-enables
- * pagefaults. Depending on PREEMP_RT configuration, re-enables also
+ * pagefaults. Depending on PREEMPT_RT configuration, re-enables also
* migration and preemption. Users should not count on these side effects.
*
* Mappings should be unmapped in the reverse order that they were mapped.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2949e5acff35..c0d223d0c556 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -238,6 +238,31 @@ static inline bool thp_vma_suitable_order(struct vm_area_struct *vma,
}
/*
+ * Make sure huge_gfp is always more limited than limit_gfp.
+ * Some shmem users want THP allocation to be done less aggressively
+ * and only in certain zone.
+ */
+static inline gfp_t thp_shmem_limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
+{
+ gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
+ gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
+ gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
+ gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);
+
+ /* Allow allocations only from the originally specified zones. */
+ result |= zoneflags;
+
+ /*
+ * Minimize the result gfp by taking the union with the deny flags,
+ * and the intersection of the allow flags.
+ */
+ result |= (limit_gfp & denyflags);
+ result |= (huge_gfp & limit_gfp) & allowflags;
+
+ return result;
+}
+
+/*
* Filter the bitfield of input orders to the ones suitable for use in the vma.
* See thp_vma_suitable_order().
* All orders that pass the checks are returned as a bitfield.
@@ -414,10 +439,10 @@ static inline int split_huge_page(struct page *page)
{
return split_huge_page_to_list_to_order(page, NULL, 0);
}
+
+int folio_memcg_alloc_deferred(struct folio *folio);
+
void deferred_split_folio(struct folio *folio, bool partially_mapped);
-#ifdef CONFIG_MEMCG
-void reparent_deferred_split_queue(struct mem_cgroup *memcg);
-#endif
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address, bool freeze);
@@ -581,6 +606,11 @@ static inline bool thp_vma_suitable_order(struct vm_area_struct *vma,
return false;
}
+static inline gfp_t thp_shmem_limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
+{
+ return huge_gfp;
+}
+
static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
unsigned long addr, unsigned long orders)
{
@@ -649,8 +679,15 @@ static inline int try_folio_split_to_order(struct folio *folio,
return -EINVAL;
}
-static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
-static inline void reparent_deferred_split_queue(struct mem_cgroup *memcg) {}
+static inline int folio_memcg_alloc_deferred(struct folio *folio)
+{
+ return 0;
+}
+
+static inline void deferred_split_folio(struct folio *folio, bool partially_mapped)
+{
+}
+
#define split_huge_pmd(__vma, __pmd, __address) \
do { } while (0)
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index fe739d35a864..a450fffe1550 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -81,9 +81,76 @@ static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker
int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
gfp_t gfp);
+
+#ifdef CONFIG_MEMCG
+/**
+ * folio_memcg_list_lru_alloc - allocate list_lru heads for shrinkable folio
+ * @folio: the newly allocated & charged folio
+ * @lru: the list_lru this might be queued on
+ * @gfp: gfp mask
+ *
+ * Allocate list_lru heads (per-memcg, per-node) needed to queue this
+ * particular folio down the line.
+ *
+ * This does memcg_list_lru_alloc(), but on the memcg that @folio is
+ * associated with. Handles folio_memcg() access rules in the fast
+ * path (list_lru heads allocated) and the allocation slowpath.
+ *
+ * Returns 0 on success, a negative error value otherwise.
+ */
+int folio_memcg_list_lru_alloc(struct folio *folio, struct list_lru *lru,
+ gfp_t gfp);
+#else
+static inline int folio_memcg_list_lru_alloc(struct folio *folio,
+ struct list_lru *lru, gfp_t gfp)
+{
+ return 0;
+}
+#endif
+
void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
/**
+ * list_lru_lock: lock the sublist for the given node and memcg
+ * @lru: the lru pointer
+ * @nid: the node id of the sublist to lock.
+ * @memcg: pointer to the cgroup of the sublist to lock. On return,
+ * updated to the cgroup whose sublist was actually locked,
+ * which may be an ancestor if the original memcg was dying.
+ *
+ * Returns the locked list_lru_one sublist. The caller must call
+ * list_lru_unlock() when done.
+ *
+ * You must ensure that the memcg is not freed during this call (e.g., with
+ * rcu or by taking a css refcnt).
+ *
+ * Return: the locked list_lru_one, or NULL on failure
+ */
+struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
+ struct mem_cgroup **memcg);
+
+/**
+ * list_lru_unlock: unlock a sublist locked by list_lru_lock()
+ * @l: the list_lru_one to unlock
+ */
+void list_lru_unlock(struct list_lru_one *l);
+
+struct list_lru_one *list_lru_lock_irq(struct list_lru *lru, int nid,
+ struct mem_cgroup **memcg);
+void list_lru_unlock_irq(struct list_lru_one *l);
+
+struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
+ struct mem_cgroup **memcg, unsigned long *irq_flags);
+void list_lru_unlock_irqrestore(struct list_lru_one *l,
+ unsigned long *irq_flags);
+
+/* Caller-locked variants, see list_lru_add() etc for documentation */
+bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l,
+ struct list_head *item, int nid, struct mem_cgroup *memcg);
+bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l,
+ struct list_head *item, int nid);
+
+/**
* list_lru_add: add an element to the lru list's tail
* @lru: the lru pointer
* @item: the item to be added.
@@ -115,6 +182,9 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
struct mem_cgroup *memcg);
+bool list_lru_add_irq(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg);
+
/**
* list_lru_add_obj: add an element to the lru list's tail
* @lru: the lru pointer
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index dc3fa687759b..e1f46a0016fc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,6 +29,7 @@ struct obj_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
+struct swap_cluster_info;
/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
@@ -277,10 +278,6 @@ struct mem_cgroup {
struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT];
#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- struct deferred_split deferred_split_queue;
-#endif
-
#ifdef CONFIG_LRU_GEN_WALKS_MMU
/* per-memcg mm_struct list */
struct lru_gen_mm_list mm_list;
@@ -646,8 +643,8 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp);
-int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
- gfp_t gfp, swp_entry_t entry);
+int mem_cgroup_swapin_charge_folio(struct folio *folio, unsigned short id,
+ struct mm_struct *mm, gfp_t gfp);
void __mem_cgroup_uncharge(struct folio *folio);
@@ -1137,7 +1134,7 @@ static inline int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp)
}
static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
- struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
+ unsigned short id, struct mm_struct *mm, gfp_t gfp)
{
return 0;
}
@@ -1899,9 +1896,6 @@ static inline void mem_cgroup_exit_user_fault(void)
current->in_user_fault = 0;
}
-void memcg1_swapout(struct folio *folio, swp_entry_t entry);
-void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages);
-
#else /* CONFIG_MEMCG_V1 */
static inline
unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
@@ -1929,14 +1923,23 @@ static inline void mem_cgroup_exit_user_fault(void)
{
}
-static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry)
+#endif /* CONFIG_MEMCG_V1 */
+
+#if defined(CONFIG_MEMCG_V1) && defined(CONFIG_SWAP)
+
+void __memcg1_swapout(struct folio *folio, struct swap_cluster_info *ci);
+void memcg1_swapin(struct folio *folio);
+
+#else
+
+static inline void __memcg1_swapout(struct folio *folio,
+ struct swap_cluster_info *ci)
{
}
-static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages)
+static inline void memcg1_swapin(struct folio *folio)
{
}
-
-#endif /* CONFIG_MEMCG_V1 */
+#endif
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 5bb5599c6b2b..463dc02f6cff 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -158,7 +158,11 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
extern int memory_notify(enum memory_block_state state, void *v);
-extern struct memory_block *find_memory_block(unsigned long section_nr);
+struct memory_block *memory_block_get(unsigned long block_id);
+static inline void memory_block_put(struct memory_block *mem)
+{
+ put_device(&mem->dev);
+}
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, walk_memory_blocks_func_t func);
@@ -171,7 +175,6 @@ struct memory_group *memory_group_find_by_id(int mgid);
typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
struct memory_group *excluded, void *arg);
-struct memory_block *find_memory_block_by_id(unsigned long block_id);
#define hotplug_memory_notifier(fn, pri) ({ \
static __meminitdata struct notifier_block fn##_mem_nb =\
{ .notifier_call = fn, .priority = pri };\
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 815e908c4135..7c9d66729c60 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -135,9 +135,10 @@ static inline bool movable_node_is_enabled(void)
return movable_node_enabled;
}
-extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap);
+extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap);
extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
- struct vmem_altmap *altmap);
+ struct vmem_altmap *altmap, struct dev_pagemap *pgmap);
/* reasonably generic interface to expand the physical pages */
extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
@@ -307,7 +308,8 @@ extern int sparse_add_section(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
- struct vmem_altmap *altmap);
+ struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap);
extern struct zone *zone_for_pfn_range(enum mmop online_type,
int nid, struct memory_group *group, unsigned long start_pfn,
unsigned long nr_pages);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fc2acedf0b76..485df9c2dbdd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -496,6 +496,21 @@ enum {
#else
#define VM_UFFD_MINOR VM_NONE
#endif
+
+/*
+ * vma_flags_t masks for the userfaultfd VMA flags. VMA_UFFD_MINOR is gated on
+ * the same config as VM_UFFD_MINOR -- which implies 64BIT, where the bit fits
+ * -- so an out-of-range bit is never fed to mk_vma_flags() on a build whose
+ * bitmap cannot hold it.
+ */
+#define VMA_UFFD_MISSING mk_vma_flags(VMA_UFFD_MISSING_BIT)
+#define VMA_UFFD_WP mk_vma_flags(VMA_UFFD_WP_BIT)
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+#define VMA_UFFD_MINOR mk_vma_flags(VMA_UFFD_MINOR_BIT)
+#else
+#define VMA_UFFD_MINOR EMPTY_VMA_FLAGS
+#endif
+
#ifdef CONFIG_64BIT
#define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
#define VM_SEALED INIT_VM_FLAG(SEALED)
@@ -1238,6 +1253,30 @@ static __always_inline void vma_flags_set_mask(vma_flags_t *flags,
#define vma_flags_set(flags, ...) \
vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
+static __always_inline vma_flags_t __mk_vma_flags_from_masks(size_t count,
+ const vma_flags_t *masks)
+{
+ vma_flags_t flags = EMPTY_VMA_FLAGS;
+ size_t i;
+
+ for (i = 0; i < count; i++)
+ vma_flags_set_mask(&flags, m