[RFC PATCH v4 12/12] mm: lru: use lruvec lock to serialize memcg changes
From: Muchun Song <hidden>
Date: 2021-05-27 09:35:16
Also in:
lkml
Subsystem:
control group - memory resource controller (memcg), memory management, memory management - mglru (multi-gen lru), memory management - page allocator, memory management - reclaim, memory management - swap, the rest · Maintainers:
Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt, Andrew Morton, Vlastimil Babka, Chris Li, Kairui Song, Linus Torvalds
As described by commit fc574c23558c ("mm/swap.c: serialize memcg
changes in pagevec_lru_move_fn"), TestClearPageLRU() aims to
serialize mem_cgroup_move_account() during pagevec_lru_move_fn().
Now lock_page_lruvec*() has the ability to detect whether page
memcg has been changed. So we can use lruvec lock to serialize
mem_cgroup_move_account() during pagevec_lru_move_fn(). This
change is a partial revert of the commit fc574c23558c ("mm/swap.c:
serialize memcg changes in pagevec_lru_move_fn").
And pagevec_lru_move_fn() is more hot compare with
mem_cgroup_move_account(), removing an atomic operation would be
an optimization. Also this change would not dirty cacheline for a
page which isn't on the LRU.
Signed-off-by: Muchun Song <redacted>
---
mm/compaction.c | 1 +
mm/memcontrol.c | 31 +++++++++++++++++++++++++++++++
mm/swap.c | 41 +++++++++++------------------------------
mm/vmscan.c | 9 ++++-----
4 files changed, 47 insertions(+), 35 deletions(-)
diff --git a/mm/compaction.c b/mm/compaction.c
index 225e06c63ec1..c8505542c33e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c@@ -531,6 +531,7 @@ static struct lruvec *compact_lock_page_irqsave(struct page *page, spin_lock_irqsave(&lruvec->lru_lock, *flags); out: + /* See the comments in lock_page_lruvec(). */ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) { spin_unlock_irqrestore(&lruvec->lru_lock, *flags); goto retry;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 33aad9ed5071..289524aaf629 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c@@ -1318,12 +1318,38 @@ struct lruvec *lock_page_lruvec(struct page *page) lruvec = mem_cgroup_page_lruvec(page); spin_lock(&lruvec->lru_lock); + /* + * The memcg of the page can be changed by any the following routines: + * + * 1) mem_cgroup_move_account() or + * 2) memcg_reparent_objcgs() + * + * The possible bad scenario would like: + * + * CPU0: CPU1: CPU2: + * lruvec = mem_cgroup_page_lruvec() + * + * if (!isolate_lru_page()) + * mem_cgroup_move_account() + * + * memcg_reparent_objcgs() + * + * spin_lock(&lruvec->lru_lock) + * ^^^^^^ + * wrong lock + * + * Either CPU1 or CPU2 can change page memcg, so we need to check + * whether page memcg is changed, if so, we should reacquire the + * new lruvec lock. + */ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) { spin_unlock(&lruvec->lru_lock); goto retry; } /* + * When we reach here, it means that the page_memcg(page) is stable. + * * Preemption is disabled in the internal of spin_lock, which can serve * as RCU read-side critical sections. */
@@ -1341,6 +1367,7 @@ struct lruvec *lock_page_lruvec_irq(struct page *page) lruvec = mem_cgroup_page_lruvec(page); spin_lock_irq(&lruvec->lru_lock); + /* See the comments in lock_page_lruvec(). */ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) { spin_unlock_irq(&lruvec->lru_lock); goto retry;
@@ -1361,6 +1388,7 @@ struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags) lruvec = mem_cgroup_page_lruvec(page); spin_lock_irqsave(&lruvec->lru_lock, *flags); + /* See the comments in lock_page_lruvec(). */ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) { spin_unlock_irqrestore(&lruvec->lru_lock, *flags); goto retry;
@@ -5846,7 +5874,10 @@ static int mem_cgroup_move_account(struct page *page, obj_cgroup_get(to->objcg); obj_cgroup_put(from->objcg); + /* See the comments in lock_page_lruvec(). */ + spin_lock(&from_vec->lru_lock); page->memcg_data = (unsigned long)to->objcg; + spin_unlock(&from_vec->lru_lock); __unlock_page_objcg(from->objcg);
diff --git a/mm/swap.c b/mm/swap.c
index 6260e0e11268..a254f0dcfe1d 100644
--- a/mm/swap.c
+++ b/mm/swap.c@@ -211,14 +211,8 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; - /* block memcg migration during page moving between lru */ - if (!TestClearPageLRU(page)) - continue; - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); (*move_fn)(page, lruvec); - - SetPageLRU(page); } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags);
@@ -228,7 +222,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec) { - if (!PageUnevictable(page)) { + if (PageLRU(page) && !PageUnevictable(page)) { del_page_from_lru_list(page, lruvec); ClearPageActive(page); add_page_to_lru_list_tail(page, lruvec);
@@ -324,7 +318,7 @@ void lru_note_cost_page(struct page *page) static void __activate_page(struct page *page, struct lruvec *lruvec) { - if (!PageActive(page) && !PageUnevictable(page)) { + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec);
@@ -377,12 +371,9 @@ static void activate_page(struct page *page) struct lruvec *lruvec; page = compound_head(page); - if (TestClearPageLRU(page)) { - lruvec = lock_page_lruvec_irq(page); - __activate_page(page, lruvec); - unlock_page_lruvec_irq(lruvec); - SetPageLRU(page); - } + lruvec = lock_page_lruvec_irq(page); + __activate_page(page, lruvec); + unlock_page_lruvec_irq(lruvec); } #endif
@@ -537,6 +528,9 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec) bool active = PageActive(page); int nr_pages = thp_nr_pages(page); + if (!PageLRU(page)) + return; + if (PageUnevictable(page)) return;
@@ -574,7 +568,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec) static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec) { - if (PageActive(page) && !PageUnevictable(page)) { + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { int nr_pages = thp_nr_pages(page); del_page_from_lru_list(page, lruvec);
@@ -590,7 +584,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec) static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec) { - if (PageAnon(page) && PageSwapBacked(page) && + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page) && !PageUnevictable(page)) { int nr_pages = thp_nr_pages(page);
@@ -1055,20 +1049,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec) */ void __pagevec_lru_add(struct pagevec *pvec) { - int i; - struct lruvec *lruvec = NULL; - unsigned long flags = 0; - - for (i = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - - lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags); - __pagevec_lru_add_fn(page, lruvec); - } - if (lruvec) - unlock_page_lruvec_irqrestore(lruvec, flags); - release_pages(pvec->pages, pvec->nr); - pagevec_reinit(pvec); + pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn); } /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5c30dffce768..a16b28da0878 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c@@ -4469,18 +4469,17 @@ void check_move_unevictable_pages(struct pagevec *pvec) nr_pages = thp_nr_pages(page); pgscanned += nr_pages; - /* block memcg migration during page moving between lru */ - if (!TestClearPageLRU(page)) + lruvec = relock_page_lruvec_irq(page, lruvec); + + if (!PageLRU(page) || !PageUnevictable(page)) continue; - lruvec = relock_page_lruvec_irq(page, lruvec); - if (page_evictable(page) && PageUnevictable(page)) { + if (page_evictable(page)) { del_page_from_lru_list(page, lruvec); ClearPageUnevictable(page); add_page_to_lru_list(page, lruvec); pgrescued += nr_pages; } - SetPageLRU(page); } if (lruvec) {
--
2.11.0