Thread (51 messages) 51 messages, 5 authors, 2016-10-26

[PATCHv4 30/43] mm: account huge pages to dirty, writaback, reclaimable, etc.

From: Kirill A. Shutemov <hidden>
Date: 2016-10-25 00:13:29
Also in: linux-ext4, linux-fsdevel, linux-mm, lkml
Subsystem: control group - memory resource controller (memcg), filesystems (vfs and infrastructure), memory management, memory management - memory policy and migration, page cache, the rest · Maintainers: Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt, Alexander Viro, Christian Brauner, Andrew Morton, David Hildenbrand, Matthew Wilcox, Linus Torvalds

We need to account huge pages according to its size to get background
writaback work properly.

Signed-off-by: Kirill A. Shutemov <redacted>
---
 fs/fs-writeback.c           | 10 ++++---
 include/linux/backing-dev.h | 10 +++++++
 include/linux/memcontrol.h  |  5 ++--
 mm/migrate.c                |  1 +
 mm/page-writeback.c         | 67 +++++++++++++++++++++++++++++----------------
 5 files changed, 64 insertions(+), 29 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05713a5da083..2feb8677e69e 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -366,8 +366,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 		struct page *page = radix_tree_deref_slot_protected(slot,
 							&mapping->tree_lock);
 		if (likely(page) && PageDirty(page)) {
-			__dec_wb_stat(old_wb, WB_RECLAIMABLE);
-			__inc_wb_stat(new_wb, WB_RECLAIMABLE);
+			int nr = hpage_nr_pages(page);
+			__add_wb_stat(old_wb, WB_RECLAIMABLE, -nr);
+			__add_wb_stat(new_wb, WB_RECLAIMABLE, nr);
 		}
 	}
 
@@ -376,9 +377,10 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 		struct page *page = radix_tree_deref_slot_protected(slot,
 							&mapping->tree_lock);
 		if (likely(page)) {
+			int nr = hpage_nr_pages(page);
 			WARN_ON_ONCE(!PageWriteback(page));
-			__dec_wb_stat(old_wb, WB_WRITEBACK);
-			__inc_wb_stat(new_wb, WB_WRITEBACK);
+			__add_wb_stat(old_wb, WB_WRITEBACK, -nr);
+			__add_wb_stat(new_wb, WB_WRITEBACK, nr);
 		}
 	}
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 43b93a947e61..e63487f78824 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -61,6 +61,16 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
 	__percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
+static inline void add_wb_stat(struct bdi_writeback *wb,
+				 enum wb_stat_item item, s64 amount)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__add_wb_stat(wb, item, amount);
+	local_irq_restore(flags);
+}
+
 static inline void __inc_wb_stat(struct bdi_writeback *wb,
 				 enum wb_stat_item item)
 {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 61d20c17f3b7..d24092581442 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,6 +29,7 @@
 #include <linux/mmzone.h>
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
+#include <linux/mm.h>
 
 struct mem_cgroup;
 struct page;
@@ -506,13 +507,13 @@ static inline void mem_cgroup_update_page_stat(struct page *page,
 static inline void mem_cgroup_inc_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(page, idx, 1);
+	mem_cgroup_update_page_stat(page, idx, hpage_nr_pages(page));
 }
 
 static inline void mem_cgroup_dec_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(page, idx, -1);
+	mem_cgroup_update_page_stat(page, idx, -hpage_nr_pages(page));
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
diff --git a/mm/migrate.c b/mm/migrate.c
index 99250aee1ac1..bfc722959d3e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -505,6 +505,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 * are mapped to swap space.
 	 */
 	if (newzone != oldzone) {
+		BUG_ON(PageTransHuge(page));
 		__dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
 		__inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
 		if (PageSwapBacked(page) && !PageSwapCache(page)) {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c76fc90b7039..f903c09940c4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2421,19 +2421,22 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 
 	if (mapping_cap_account_dirty(mapping)) {
 		struct bdi_writeback *wb;
+		struct zone *zone = page_zone(page);
+		pg_data_t *pgdat = page_pgdat(page);
+		int nr = hpage_nr_pages(page);
 
 		inode_attach_wb(inode, page);
 		wb = inode_to_wb(inode);
 
 		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
-		__inc_node_page_state(page, NR_FILE_DIRTY);
-		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-		__inc_node_page_state(page, NR_DIRTIED);
-		__inc_wb_stat(wb, WB_RECLAIMABLE);
-		__inc_wb_stat(wb, WB_DIRTIED);
-		task_io_account_write(PAGE_SIZE);
-		current->nr_dirtied++;
-		this_cpu_inc(bdp_ratelimits);
+		__mod_node_page_state(pgdat, NR_FILE_DIRTY, nr);
+		__mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, nr);
+		__mod_node_page_state(pgdat, NR_DIRTIED, nr);
+		__add_wb_stat(wb, WB_RECLAIMABLE, nr);
+		__add_wb_stat(wb, WB_DIRTIED, nr);
+		task_io_account_write(nr * PAGE_SIZE);
+		current->nr_dirtied += nr;
+		this_cpu_add(bdp_ratelimits, nr);
 	}
 }
 EXPORT_SYMBOL(account_page_dirtied);
@@ -2447,11 +2450,15 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 			  struct bdi_writeback *wb)
 {
 	if (mapping_cap_account_dirty(mapping)) {
+		struct zone *zone = page_zone(page);
+		pg_data_t *pgdat = page_pgdat(page);
+		int nr = hpage_nr_pages(page);
+
 		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
-		dec_node_page_state(page, NR_FILE_DIRTY);
-		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-		dec_wb_stat(wb, WB_RECLAIMABLE);
-		task_io_account_cancelled_write(PAGE_SIZE);
+		mod_node_page_state(pgdat, NR_FILE_DIRTY, -nr);
+		mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, -nr);
+		add_wb_stat(wb, WB_RECLAIMABLE, -nr);
+		task_io_account_cancelled_write(PAGE_SIZE * nr);
 	}
 }
 
@@ -2511,14 +2518,16 @@ void account_page_redirty(struct page *page)
 	struct address_space *mapping = page->mapping;
 
 	if (mapping && mapping_cap_account_dirty(mapping)) {
+		pg_data_t *pgdat = page_pgdat(page);
+		int nr = hpage_nr_pages(page);
 		struct inode *inode = mapping->host;
 		struct bdi_writeback *wb;
 		bool locked;
 
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
-		current->nr_dirtied--;
-		dec_node_page_state(page, NR_DIRTIED);
-		dec_wb_stat(wb, WB_DIRTIED);
+		current->nr_dirtied -= nr;
+		mod_node_page_state(pgdat, NR_DIRTIED, -nr);
+		add_wb_stat(wb, WB_DIRTIED, -nr);
 		unlocked_inode_to_wb_end(inode, locked);
 	}
 }
@@ -2704,10 +2713,14 @@ int clear_page_dirty_for_io(struct page *page)
 		 */
 		wb = unlocked_inode_to_wb_begin(inode, &locked);
 		if (TestClearPageDirty(page)) {
+			struct zone *zone = page_zone(page);
+			pg_data_t *pgdat = page_pgdat(page);
+			int nr = hpage_nr_pages(page);
+
 			mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
-			dec_node_page_state(page, NR_FILE_DIRTY);
-			dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-			dec_wb_stat(wb, WB_RECLAIMABLE);
+			mod_node_page_state(pgdat, NR_FILE_DIRTY, -nr);
+			mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, -nr);
+			add_wb_stat(wb, WB_RECLAIMABLE, -nr);
 			ret = 1;
 		}
 		unlocked_inode_to_wb_end(inode, locked);
@@ -2751,10 +2764,14 @@ int test_clear_page_writeback(struct page *page)
 		ret = TestClearPageWriteback(page);
 	}
 	if (ret) {
+		struct zone *zone = page_zone(page);
+		pg_data_t *pgdat = page_pgdat(page);
+		int nr = hpage_nr_pages(page);
+
 		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
-		dec_node_page_state(page, NR_WRITEBACK);
-		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-		inc_node_page_state(page, NR_WRITTEN);
+		mod_node_page_state(pgdat, NR_WRITEBACK, -nr);
+		mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, -nr);
+		mod_node_page_state(pgdat, NR_WRITTEN, nr);
 	}
 	unlock_page_memcg(page);
 	return ret;
@@ -2806,9 +2823,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		ret = TestSetPageWriteback(page);
 	}
 	if (!ret) {
+		struct zone *zone = page_zone(page);
+		pg_data_t *pgdat = page_pgdat(page);
+		int nr = hpage_nr_pages(page);
+
 		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
-		inc_node_page_state(page, NR_WRITEBACK);
-		inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
+		mod_node_page_state(pgdat, NR_WRITEBACK, nr);
+		mod_zone_page_state(zone, NR_ZONE_WRITE_PENDING, nr);
 	}
 	unlock_page_memcg(page);
 	return ret;
-- 
2.9.3
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help