Re: [PATCH v9 2/8] hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations
From: David Rientjes <hidden>
Date: 2020-01-14 00:45:42
Also in:
linux-kselftest, linux-mm, lkml
On Mon, 13 Jan 2020, Mike Kravetz wrote:
quoted
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 35415af9ed26f..b03270b0d5833 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c@@ -96,8 +96,12 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) int idx; for (idx = 0; idx < hugetlb_max_hstate; idx++) { - if (page_counter_read(&h_cg->hugepage[idx])) + if (page_counter_read( + hugetlb_cgroup_get_counter(h_cg, idx, true)) || + page_counter_read( + hugetlb_cgroup_get_counter(h_cg, idx, false))) { return true; + } } return false; }@@ -108,18 +112,32 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, int idx; for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) { - struct page_counter *counter = &h_cgroup->hugepage[idx]; - struct page_counter *parent = NULL; + struct page_counter *fault_parent = NULL; + struct page_counter *reserved_parent = NULL; unsigned long limit; int ret; - if (parent_h_cgroup) - parent = &parent_h_cgroup->hugepage[idx]; - page_counter_init(counter, parent); + if (parent_h_cgroup) { + fault_parent = hugetlb_cgroup_get_counter( + parent_h_cgroup, idx, false); + reserved_parent = hugetlb_cgroup_get_counter( + parent_h_cgroup, idx, true); + } + page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx, + false), + fault_parent); + page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx, + true), + reserved_parent); limit = round_down(PAGE_COUNTER_MAX, 1 << huge_page_order(&hstates[idx])); - ret = page_counter_set_max(counter, limit); + + ret = page_counter_set_max( + hugetlb_cgroup_get_counter(h_cgroup, idx, false), + limit); + ret = page_counter_set_max( + hugetlb_cgroup_get_counter(h_cgroup, idx, true), limit); VM_BUG_ON(ret);The second page_counter_set_max() call overwrites ret before the check in VM_BUG_ON().quoted
} }@@ -149,7 +167,6 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) kfree(h_cgroup); } - /* * Should be called with hugetlb_lock held. * Since we are holding hugetlb_lock, pages cannot get moved from@@ -165,7 +182,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, struct hugetlb_cgroup *page_hcg; struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); - page_hcg = hugetlb_cgroup_from_page(page); + page_hcg = hugetlb_cgroup_from_page(page, false); /* * We can have pages in active list without any cgroup * ie, hugepage with less than 3 pages. We can safely@@ -184,7 +201,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, /* Take the pages off the local counter */ page_counter_cancel(counter, nr_pages); - set_hugetlb_cgroup(page, parent); + set_hugetlb_cgroup(page, parent, false); out: return; }@@ -227,7 +244,7 @@ static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, } int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, - struct hugetlb_cgroup **ptr) + struct hugetlb_cgroup **ptr, bool reserved) { int ret = 0; struct page_counter *counter;@@ -250,13 +267,20 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, } rcu_read_unlock(); - if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, - &counter)) { + if (!page_counter_try_charge(hugetlb_cgroup_get_counter(h_cg, idx, + reserved), + nr_pages, &counter)) { ret = -ENOMEM; hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx, HUGETLB_MAX); + css_put(&h_cg->css); + goto done; } - css_put(&h_cg->css); + /* Reservations take a reference to the css because they do not get + * reparented.I'm hoping someone with more cgroup knowledge can comment on this and any consequences of not reparenting reservations. We previously talked about why reparenting would be very difficult/expensive. I understand why you are nopt doing it. Just do not fully understand what needs to be done from the cgroup side.
I don't see any description of how hugetlb_cgroup currently acts wrt reparenting in the last patch in the series and how this is the same or different for reservations. I think the discussion that is referenced here is probably lost in some previous posting of the series. I think it's particularly useful information that the end user will need to know about for its handling so it would benefit from some documentation in the last patch.