[PATCH 41/49] mm: sched: numa: Control enabling and disabling of NUMA balancing
From: Mel Gorman <mgorman@suse.de>
Date: 2012-12-07 10:25:13
Also in:
lkml
Subsystem:
documentation, memory management, memory management - memory policy and migration, scheduler, the rest · Maintainers:
Jonathan Corbet, Andrew Morton, David Hildenbrand, Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot, Linus Torvalds
This patch adds Kconfig options and kernel parameters to allow the enabling and disabling of automatic NUMA balancing. The existance of such a switch was and is very important when debugging problems related to transparent hugepages and we should have the same for automatic NUMA placement. Signed-off-by: Mel Gorman <mgorman@suse.de> --- Documentation/kernel-parameters.txt | 3 +++ include/linux/sched.h | 4 +++ init/Kconfig | 8 ++++++ kernel/sched/core.c | 48 ++++++++++++++++++++++++----------- kernel/sched/fair.c | 3 +++ kernel/sched/features.h | 6 +++-- mm/mempolicy.c | 46 +++++++++++++++++++++++++++++++++ 7 files changed, 101 insertions(+), 17 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9776f06..d984acb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt@@ -403,6 +403,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. atkbd.softrepeat= [HW] Use software keyboard repeat + balancenuma= [KNL,X86] Enable or disable automatic NUMA balancing. + Allowed values are enable and disable + baycom_epp= [HW,AX25] Format: <io>,<mode>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1068afd..2669bdd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h@@ -1563,10 +1563,14 @@ struct task_struct { #ifdef CONFIG_BALANCE_NUMA extern void task_numa_fault(int node, int pages, bool migrated); +extern void set_balancenuma_state(bool enabled); #else static inline void task_numa_fault(int node, int pages, bool migrated) { } +static inline void set_balancenuma_state(bool enabled) +{ +} #endif /*
diff --git a/init/Kconfig b/init/Kconfig
index 6897a05..4cccc00f 100644
--- a/init/Kconfig
+++ b/init/Kconfig@@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE depends on ARCH_WANTS_PROT_NUMA_PROT_NONE depends on BALANCE_NUMA +config BALANCE_NUMA_DEFAULT_ENABLED + bool "Automatically enable NUMA aware memory/task placement" + default y + depends on BALANCE_NUMA + help + If set, autonumic NUMA balancing will be enabled if running on a NUMA + machine. + config BALANCE_NUMA bool "Memory placement aware NUMA scheduler" default n
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a59d869..4841f4f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c@@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { }; static void sched_feat_enable(int i) { }; #endif /* HAVE_JUMP_LABEL */ -static ssize_t -sched_feat_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int sched_feat_set(char *cmp) { - char buf[64]; - char *cmp; - int neg = 0; int i; - - if (cnt > 63) - cnt = 63; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - cmp = strstrip(buf); + int neg = 0; if (strncmp(cmp, "NO_", 3) == 0) { neg = 1;
@@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf, } } + return i; +} + +static ssize_t +sched_feat_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + char *cmp; + int i; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + cmp = strstrip(buf); + + i = sched_feat_set(cmp); if (i == __SCHED_FEAT_NR) return -EINVAL;
@@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p) #endif /* CONFIG_BALANCE_NUMA */ } +#ifdef CONFIG_BALANCE_NUMA +void set_balancenuma_state(bool enabled) +{ + if (enabled) + sched_feat_set("NUMA"); + else + sched_feat_set("NO_NUMA"); +} +#endif /* CONFIG_BALANCE_NUMA */ + /* * fork()/clone()-time setup: */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c1be907..b4bc459 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c@@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated) { struct task_struct *p = current; + if (!sched_feat_numa(NUMA)) + return; + /* FIXME: Allocate task-specific structure for placement policy here */ /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 7cfd289..d402368 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h@@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) /* - * Apply the automatic NUMA scheduling policy + * Apply the automatic NUMA scheduling policy. Enabled automatically + * at runtime if running on a NUMA machine. Can be controlled via + * balancenuma= */ #ifdef CONFIG_BALANCE_NUMA -SCHED_FEAT(NUMA, true) +SCHED_FEAT(NUMA, false) #endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index fd20e28..56ad9bf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c@@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p) mutex_unlock(&p->mutex); } +#ifdef CONFIG_BALANCE_NUMA +static bool __initdata balancenuma_override; + +static void __init check_balancenuma_enable(void) +{ + bool balancenuma_default = false; + + if (IS_ENABLED(CONFIG_BALANCE_NUMA_DEFAULT_ENABLED)) + balancenuma_default = true; + + if (nr_node_ids > 1 && !balancenuma_override) { + printk(KERN_INFO "Enabling automatic NUMA balancing. " + "Configure with balancenuma= or sysctl"); + set_balancenuma_state(balancenuma_default); + } +} + +static int __init setup_balancenuma(char *str) +{ + int ret = 0; + if (!str) + goto out; + balancenuma_override = true; + + if (!strcmp(str, "enable")) { + set_balancenuma_state(true); + ret = 1; + } else if (!strcmp(str, "disable")) { + set_balancenuma_state(false); + ret = 1; + } +out: + if (!ret) + printk(KERN_WARNING "Unable to parse balancenuma=\n"); + + return ret; +} +__setup("balancenuma=", setup_balancenuma); +#else +static inline void __init check_balancenuma_enable(void) +{ +} +#endif /* CONFIG_BALANCE_NUMA */ + /* assumes fs == KERNEL_DS */ void __init numa_policy_init(void) {
@@ -2571,6 +2615,8 @@ void __init numa_policy_init(void) if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) printk("numa_policy_init: interleaving failed\n"); + + check_balancenuma_enable(); } /* Reset policy of current process to default */
--
1.7.9.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>