From 31b54a5e8916fdd4819880e3aed93f65ecbb47e3 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 5 Mar 2026 19:52:42 +0800 Subject: mm: memcontrol: prepare for reparenting LRU pages for lruvec lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following diagram illustrates how to ensure the safety of the folio lruvec lock when LRU folios undergo reparenting. In the folio_lruvec_lock(folio) function: rcu_read_lock(); retry: lruvec = folio_lruvec(folio); /* There is a possibility of folio reparenting at this point. */ spin_lock(&lruvec->lru_lock); if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) { /* * The wrong lruvec lock was acquired, and a retry is required. * This is because the folio resides on the parent memcg lruvec * list. */ spin_unlock(&lruvec->lru_lock); goto retry; } /* Reaching here indicates that folio_memcg() is stable. */ In the memcg_reparent_objcgs(memcg) function: spin_lock(&lruvec->lru_lock); spin_lock(&lruvec_parent->lru_lock); /* Transfer folios from the lruvec list to the parent's. */ spin_unlock(&lruvec_parent->lru_lock); spin_unlock(&lruvec->lru_lock); After acquiring the lruvec lock, it is necessary to verify whether the folio has been reparented. If reparenting has occurred, the new lruvec lock must be reacquired. During the LRU folio reparenting process, the lruvec lock will also be acquired (this will be implemented in a subsequent patch). Therefore, folio_memcg() remains unchanged while the lruvec lock is held. Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio) after the lruvec lock is acquired, the lruvec_memcg_debug() check is redundant. Hence, it is removed. This patch serves as a preparation for the reparenting of LRU folios. Link: https://lore.kernel.org/23f22cbb1419f277a3483018b32158ae2b86c666.1772711148.git.zhengqi.arch@bytedance.com Signed-off-by: Muchun Song Signed-off-by: Qi Zheng Acked-by: Johannes Weiner Acked-by: Shakeel Butt Cc: Allen Pais Cc: Axel Rasmussen Cc: Baoquan He Cc: Chengming Zhou Cc: Chen Ridong Cc: David Hildenbrand Cc: Hamza Mahfooz Cc: Harry Yoo Cc: Hugh Dickins Cc: Imran Khan Cc: Kamalesh Babulal Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes (Oracle) Cc: Michal Hocko Cc: Michal Koutný Cc: Mike Rapoport Cc: Muchun Song Cc: Nhat Pham Cc: Roman Gushchin Cc: Suren Baghdasaryan Cc: Usama Arif Cc: Vlastimil Babka Cc: Wei Xu Cc: Yosry Ahmed Cc: Yuanchu Xie Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 34 +++++++++++++++++----------------- include/linux/swap.h | 3 +-- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 38f94c7271c1..12982875073e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -741,7 +741,15 @@ out: * folio_lruvec - return lruvec for isolating/putting an LRU folio * @folio: Pointer to the folio. * - * This function relies on folio->mem_cgroup being stable. + * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec. + * Note that this alone will NOT guarantee the stability of the folio->lruvec + * association; the folio can be reparented to an ancestor if this races with + * cgroup deletion. + * + * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding. + * Once a lruvec is locked, folio_lruvec() can be called on other folios, and + * their binding is stable if the returned lruvec matches the one the caller has + * locked. Useful for lock batching. */ static inline struct lruvec *folio_lruvec(struct folio *folio) { @@ -764,15 +772,6 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio); struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags); -#ifdef CONFIG_DEBUG_VM -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio); -#else -static inline -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) -{ -} -#endif - static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ return css ? container_of(css, struct mem_cgroup, css) : NULL; @@ -1198,11 +1197,6 @@ static inline struct lruvec *folio_lruvec(struct folio *folio) return &pgdat->__lruvec; } -static inline -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) -{ -} - static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { return NULL; @@ -1261,6 +1255,7 @@ static inline struct lruvec *folio_lruvec_lock(struct folio *folio) { struct pglist_data *pgdat = folio_pgdat(folio); + rcu_read_lock(); spin_lock(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } @@ -1269,6 +1264,7 @@ static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio) { struct pglist_data *pgdat = folio_pgdat(folio); + rcu_read_lock(); spin_lock_irq(&pgdat->__lruvec.lru_lock); return &pgdat->__lruvec; } @@ -1278,6 +1274,7 @@ static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, { struct pglist_data *pgdat = folio_pgdat(folio); + rcu_read_lock(); spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); return &pgdat->__lruvec; } @@ -1500,23 +1497,26 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) static inline void lruvec_lock_irq(struct lruvec *lruvec) { + rcu_read_lock(); spin_lock_irq(&lruvec->lru_lock); } static inline void lruvec_unlock(struct lruvec *lruvec) { spin_unlock(&lruvec->lru_lock); + rcu_read_unlock(); } static inline void lruvec_unlock_irq(struct lruvec *lruvec) { spin_unlock_irq(&lruvec->lru_lock); + rcu_read_unlock(); } -static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec, - unsigned long flags) +static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec, unsigned long flags) { spin_unlock_irqrestore(&lruvec->lru_lock, flags); + rcu_read_unlock(); } /* Test requires a stable folio->memcg binding, see folio_memcg() */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 4b1f13b5bbad..ea08e2afa2b4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -310,8 +310,7 @@ extern unsigned long totalreserve_pages; /* linux/mm/swap.c */ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file, - unsigned int nr_io, unsigned int nr_rotated) - __releases(lruvec->lru_lock); + unsigned int nr_io, unsigned int nr_rotated); void lru_note_cost_refault(struct folio *); void folio_add_lru(struct folio *); void folio_add_lru_vma(struct folio *, struct vm_area_struct *); -- cgit v1.2.3