Re: [PATCH v2 07/12] s390: add pte_free_defer() for pgtables sharing page
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: 2023-06-30 13:40:13
Also in:
linux-mm, linux-s390, lkml, sparclinux
On Tue, 20 Jun 2023 00:51:19 -0700 (PDT) Hugh Dickins [off-list ref] wrote: [...]
quoted hunk ↗ jump to hunk
@@ -407,6 +429,77 @@ void __tlb_remove_table(void *_table) __free_page(page); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void pte_free_now0(struct rcu_head *head); +static void pte_free_now1(struct rcu_head *head); + +static void pte_free_pgste(struct rcu_head *head) +{ + unsigned long *table; + struct page *page; + + page = container_of(head, struct page, rcu_head); + table = (unsigned long *)page_to_virt(page); + table = (unsigned long *)((unsigned long)table | 0x03U); + __tlb_remove_table(table); +} + +static void pte_free_half(struct rcu_head *head, unsigned int bit) +{ + unsigned long *table; + struct page *page; + unsigned int mask; + + page = container_of(head, struct page, rcu_head); + mask = atomic_xor_bits(&page->_refcount, 0x04U << (bit + 24)); + + table = (unsigned long *)page_to_virt(page); + table += bit * PTRS_PER_PTE; + table = (unsigned long *)((unsigned long)table | (0x01U << bit)); + __tlb_remove_table(table); + + /* If pte_free_defer() of the other half came in, queue it now */ + if (mask & 0x0CU) + call_rcu(&page->rcu_head, bit ? pte_free_now0 : pte_free_now1); +} + +static void pte_free_now0(struct rcu_head *head) +{ + pte_free_half(head, 0); +} + +static void pte_free_now1(struct rcu_head *head) +{ + pte_free_half(head, 1); +} + +void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) +{ + unsigned int bit, mask; + struct page *page; + + page = virt_to_page(pgtable); + if (mm_alloc_pgste(mm)) { + call_rcu(&page->rcu_head, pte_free_pgste);
so is this now going to be used to free page tables instead of page_table_free_rcu? or will it be used instead of page_table_free? this is actually quite important for KVM on s390
quoted hunk ↗ jump to hunk
+ return; + } + bit = ((unsigned long)pgtable & ~PAGE_MASK) / + (PTRS_PER_PTE * sizeof(pte_t)); + + spin_lock_bh(&mm_pgtable_list_lock); + mask = atomic_xor_bits(&page->_refcount, 0x15U << (bit + 24)); + mask >>= 24; + /* Other half not allocated? Other half not already pending free? */ + if ((mask & 0x03U) == 0x00U && (mask & 0x30U) != 0x30U) + list_del(&page->lru); + spin_unlock_bh(&mm_pgtable_list_lock); + + /* Do not relink on rcu_head if other half already linked on rcu_head */ + if ((mask & 0x0CU) != 0x0CU) + call_rcu(&page->rcu_head, bit ? pte_free_now1 : pte_free_now0); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * Base infrastructure required to generate basic asces, region, segment, * and page tables that do not make use of enhanced features like EDAT1.diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 306a3d1a0fa6..1667a1bdb8a8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h@@ -146,7 +146,7 @@ struct page { pgtable_t pmd_huge_pte; /* protected by page->ptl */ unsigned long _pt_pad_2; /* mapping */ union { - struct mm_struct *pt_mm; /* x86 pgds only */ + struct mm_struct *pt_mm; /* x86 pgd, s390 */ atomic_t pt_frag_refcount; /* powerpc */ }; #if ALLOC_SPLIT_PTLOCKS