Thread (9 messages) 9 messages, 3 authors, 2019-12-14

Re: [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED

From: Andrew Morton <akpm@linux-foundation.org>
Date: 2019-12-14 00:01:13
Also in: linux-fsdevel, linux-mm

On Tue, 10 Dec 2019 13:43:02 -0700 Jens Axboe [off-list ref] wrote:
If RWF_UNCACHED is set for io_uring (or pwritev2(2)), we'll drop the
cache instantiated for buffered writes. If new pages aren't
instantiated, we leave them alone. This provides similar semantics to
reads with RWF_UNCACHED set.
Wouid be nice to see a description of the proposed userspace API(s)
for exploiting this feature.
quoted hunk ↗ jump to hunk
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -285,6 +285,7 @@ enum positive_aop_returns {
 #define AOP_FLAG_NOFS			0x0002 /* used by filesystem to direct
 						* helper code (eg buffer layer)
 						* to clear GFP_FS from alloc */
+#define AOP_FLAG_UNCACHED		0x0004
 
 /*
  * oh the beauties of C type declarations.
@@ -3106,6 +3107,10 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *,
 				     struct kiocb *);
 
+struct pagevec;
+extern void write_drop_cached_pages(struct pagevec *pvec,
+				struct address_space *mapping);
+
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
diff --git a/mm/filemap.c b/mm/filemap.c
index fe37bd2b2630..2e36129ebe38 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3287,10 +3287,12 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 					pgoff_t index, unsigned flags)
 {
 	struct page *page;
-	int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
+	int fgp_flags = FGP_LOCK|FGP_WRITE;
 
 	if (flags & AOP_FLAG_NOFS)
 		fgp_flags |= FGP_NOFS;
+	if (!(flags & AOP_FLAG_UNCACHED))
+		fgp_flags |= FGP_CREAT;
 
 	page = pagecache_get_page(mapping, index, fgp_flags,
 			mapping_gfp_mask(mapping));
@@ -3301,21 +3303,65 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
 
+/*
+ * Start writeback on the pages in pgs[], and then try and remove those pages
+ * from the page cached. Used with RWF_UNCACHED.
+ */
+void write_drop_cached_pages(struct pagevec *pvec,
+			     struct address_space *mapping)
+{
+	loff_t start, end;
+	int i;
+
+	end = 0;
+	start = LLONG_MAX;
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		loff_t off = page_offset(pvec->pages[i]);
+		if (off < start)
+			start = off;
+		if (off > end)
+			end = off;
+	}
+
+	__filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE);
+
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+
+		lock_page(page);
+		if (page->mapping == mapping) {
+			wait_on_page_writeback(page);
+			if (!page_has_private(page) ||
+			    try_to_release_page(page, 0))
+				remove_mapping(mapping, page);
+		}
+		unlock_page(page);
+	}
This is kinda invalidate_inode_pages2_range(), only much less so?  Why
doesn't this code need to do all the things which
invalidate_inode_pages2_range() does?  What happens if these pages are
mmapped, faulted in?  Not faulted in?

+	pagevec_release(pvec);
+}
  
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help