Thread (23 messages) 23 messages, 2 authors, 2024-09-05

Re: [PATCH net-next v17 06/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'

From: Alexander Duyck <hidden>
Date: 2024-09-04 16:15:34
Also in: linux-mm, lkml

On Mon, Sep 2, 2024 at 5:09 AM Yunsheng Lin [off-list ref] wrote:
quoted hunk ↗ jump to hunk
Currently there is one 'struct page_frag' for every 'struct
sock' and 'struct task_struct', we are about to replace the
'struct page_frag' with 'struct page_frag_cache' for them.
Before begin the replacing, we need to ensure the size of
'struct page_frag_cache' is not bigger than the size of
'struct page_frag', as there may be tens of thousands of
'struct sock' and 'struct task_struct' instances in the
system.

By or'ing the page order & pfmemalloc with lower bits of
'va' instead of using 'u16' or 'u32' for page size and 'u8'
for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
And page address & pfmemalloc & order is unchanged for the
same page in the same 'page_frag_cache' instance, it makes
sense to fit them together.

After this patch, the size of 'struct page_frag_cache' should be
the same as the size of 'struct page_frag'.

CC: Alexander Duyck <redacted>
Signed-off-by: Yunsheng Lin <redacted>
---
 include/linux/mm_types_task.h   | 19 +++++-----
 include/linux/page_frag_cache.h | 47 ++++++++++++++++++++++--
 mm/page_frag_cache.c            | 63 +++++++++++++++++++++------------
 3 files changed, 96 insertions(+), 33 deletions(-)
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index cdc1e3696439..73a574a0e8f9 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -50,18 +50,21 @@ struct page_frag {
 #define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 struct page_frag_cache {
-       void *va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+       /* encoded_page consists of the virtual address, pfmemalloc bit and
+        * order of a page.
+        */
+       unsigned long encoded_page;
+
+       /* we maintain a pagecount bias, so that we dont dirty cache line
+        * containing page->_refcount every time we allocate a fragment.
+        */
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
        __u16 offset;
-       __u16 size;
+       __u16 pagecnt_bias;
 #else
        __u32 offset;
+       __u32 pagecnt_bias;
 #endif
-       /* we maintain a pagecount bias, so that we dont dirty cache line
-        * containing page->_refcount every time we allocate a fragment.
-        */
-       unsigned int            pagecnt_bias;
-       bool pfmemalloc;
 };

 /* Track pages that require TLB flushes */
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 0a52f7a179c8..cb89cd792fcc 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -3,18 +3,61 @@
 #ifndef _LINUX_PAGE_FRAG_CACHE_H
 #define _LINUX_PAGE_FRAG_CACHE_H

+#include <linux/bits.h>
 #include <linux/log2.h>
+#include <linux/mm.h>
 #include <linux/mm_types_task.h>
 #include <linux/types.h>

+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+/* Use a full byte here to enable assembler optimization as the shift
+ * operation is usually expecting a byte.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK             GENMASK(7, 0)
+#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       8
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
+#else
+/* Compiler should be able to figure out we don't read things as any value
+ * ANDed with 0 is 0.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK             0
+#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       0
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
+#endif
+
+static inline unsigned long page_frag_encoded_page_order(unsigned long encoded_page)
+{
+       return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
+}
+
+static inline bool page_frag_encoded_page_pfmemalloc(unsigned long encoded_page)
+{
+       return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
+}
+
+static inline void *page_frag_encoded_page_address(unsigned long encoded_page)
+{
+       return (void *)(encoded_page & PAGE_MASK);
+}
+
+static inline struct page *page_frag_encoded_page_ptr(unsigned long encoded_page)
+{
+       return virt_to_page((void *)encoded_page);
+}
+
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
 {
-       nc->va = NULL;
+       nc->encoded_page = 0;
 }

 static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
 {
-       return !!nc->pfmemalloc;
+       return page_frag_encoded_page_pfmemalloc(nc->encoded_page);
+}
+
+static inline unsigned int page_frag_cache_page_size(unsigned long encoded_page)
+{
+       return PAGE_SIZE << page_frag_encoded_page_order(encoded_page);
 }

 void page_frag_cache_drain(struct page_frag_cache *nc);
Still not a huge fan of adding all these functions that expose the
internals. It might be better to just place them in page_frag_cache.c
and pull them out to the .h file as needed.
quoted hunk ↗ jump to hunk
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index 4c8e04379cb3..a5c5373cb70e 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -12,16 +12,28 @@
  * be used in the "frags" portion of skb_shared_info.
  */

+#include <linux/build_bug.h>
 #include <linux/export.h>
 #include <linux/gfp_types.h>
 #include <linux/init.h>
-#include <linux/mm.h>
 #include <linux/page_frag_cache.h>
 #include "internal.h"

+static unsigned long page_frag_encode_page(struct page *page, unsigned int order,
+                                          bool pfmemalloc)
+{
+       BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
+       BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
+
+       return (unsigned long)page_address(page) |
+               (order & PAGE_FRAG_CACHE_ORDER_MASK) |
+               ((unsigned long)pfmemalloc << PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT);
+}
+
 static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
                                             gfp_t gfp_mask)
 {
+       unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
        struct page *page = NULL;
        gfp_t gfp = gfp_mask;
@@ -30,23 +42,31 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
                   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
        page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
                                PAGE_FRAG_CACHE_MAX_ORDER);
-       nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
 #endif
-       if (unlikely(!page))
+       if (unlikely(!page)) {
                page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+               if (unlikely(!page)) {
+                       nc->encoded_page = 0;
+                       return NULL;
+               }
I would recommend just skipping the conditional here. No need to do
that. You can basically just not encode the page below if you failed
to allocate it.
+
+               order = 0;
+       }

-       nc->va = page ? page_address(page) : NULL;
+       nc->encoded_page = page_frag_encode_page(page, order,
+                                                page_is_pfmemalloc(page));
I would just follow the same logic with the ternary operator here. If
page is set then encode the page, else just set it to 0.
quoted hunk ↗ jump to hunk
        return page;
 }

 void page_frag_cache_drain(struct page_frag_cache *nc)
 {
-       if (!nc->va)
+       if (!nc->encoded_page)
                return;

-       __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
-       nc->va = NULL;
+       __page_frag_cache_drain(page_frag_encoded_page_ptr(nc->encoded_page),
+                               nc->pagecnt_bias);
+       nc->encoded_page = 0;
 }
 EXPORT_SYMBOL(page_frag_cache_drain);
@@ -63,31 +83,27 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
                              unsigned int fragsz, gfp_t gfp_mask,
                              unsigned int align_mask)
 {
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-       unsigned int size = nc->size;
-#else
-       unsigned int size = PAGE_SIZE;
-#endif
-       unsigned int offset;
+       unsigned long encoded_page = nc->encoded_page;
+       unsigned int size, offset;
        struct page *page;

-       if (unlikely(!nc->va)) {
+       size = page_frag_cache_page_size(encoded_page);
+
+       if (unlikely(!encoded_page)) {
 refill:
                page = __page_frag_cache_refill(nc, gfp_mask);
                if (!page)
                        return NULL;

-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-               /* if size can vary use size else just use PAGE_SIZE */
-               size = nc->size;
-#endif
+               encoded_page = nc->encoded_page;
+               size = page_frag_cache_page_size(encoded_page);
+
                /* Even if we own the page, we do not use atomic_set().
                 * This would break get_page_unless_zero() users.
                 */
                page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);

                /* reset page count bias and offset to start of new frag */
-               nc->pfmemalloc = page_is_pfmemalloc(page);
                nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
                nc->offset = 0;
        }
It would probably make sense to move the getting of the size to just
after this if statement since you are doing it in two different paths
and I don't think you use size at all in the
"if(unlikely(!encoded_page))" path otherwise.
quoted hunk ↗ jump to hunk
@@ -107,13 +123,14 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
                        return NULL;
                }

-               page = virt_to_page(nc->va);
+               page = page_frag_encoded_page_ptr(encoded_page);

                if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
                        goto refill;

-               if (unlikely(nc->pfmemalloc)) {
-                       free_unref_page(page, compound_order(page));
+               if (unlikely(page_frag_encoded_page_pfmemalloc(encoded_page))) {
+                       free_unref_page(page,
+                                       page_frag_encoded_page_order(encoded_page));
                        goto refill;
                }
@@ -128,7 +145,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
        nc->pagecnt_bias--;
        nc->offset = offset + fragsz;

-       return nc->va + offset;
+       return page_frag_encoded_page_address(encoded_page) + offset;
 }
 EXPORT_SYMBOL(__page_frag_alloc_align);

--
2.33.0
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help