Thread (35 messages) 35 messages, 6 authors, 2005-07-17

Re: [NUMA] Display and modify the memory policy of a process through /proc/<pid>/numa_policy

From: Christoph Lameter <hidden>
Date: 2005-07-17 03:21:51

On Sat, 16 Jul 2005, Paul Jackson wrote:
On the other hand, I hear him saying we can't do it, because the
locking cannot be safely handled.
Here is one approach to locking using xchg. This is restricted only to the 
policy fields on task_struct and vm_area_struct. One could also 
synchronize by taking the alloc_lock in task_struct. I did not use xchg
during the population of vm_area_struct and task_struct and also not 
during the destruction of these structures.

There may be additional races that need to be dealt with depending on 
when the task struct and vm_area_struct become visible through the /proc 
filesystem. However, these races are then general races affecting the use 
of other fields in the 
/proc filesystem.

Signed-off-by: Christoph Lameter <redacted>

Index: linux-2.6.13-rc3/mm/mempolicy.c
===================================================================
--- linux-2.6.13-rc3.orig/mm/mempolicy.c	2005-07-16 20:07:04.000000000 -0700
+++ linux-2.6.13-rc3/mm/mempolicy.c	2005-07-16 20:07:06.000000000 -0700
@@ -349,7 +349,7 @@ check_range(struct mm_struct *mm, unsign
 static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
 {
 	int err = 0;
-	struct mempolicy *old = vma->vm_policy;
+	struct mempolicy *old;
 
 	PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
 		 vma->vm_start, vma->vm_end, vma->vm_pgoff,
@@ -360,7 +360,7 @@ static int policy_vma(struct vm_area_str
 		err = vma->vm_ops->set_policy(vma, new);
 	if (!err) {
 		mpol_get(new);
-		vma->vm_policy = new;
+		old = xchg(&vma->vm_policy, new);
 		mpol_free(old);
 	}
 	return err;
@@ -451,8 +451,7 @@ asmlinkage long sys_set_mempolicy(int mo
 	new = mpol_new(mode, nodes);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
-	mpol_free(current->mempolicy);
-	current->mempolicy = new;
+	mpol_free(xchg(&current->mempolicy, new));
 	if (new && new->policy == MPOL_INTERLEAVE)
 		current->il_next = find_first_bit(new->v.nodes, MAX_NUMNODES);
 	return 0;
Index: linux-2.6.13-rc3/kernel/exit.c
===================================================================
--- linux-2.6.13-rc3.orig/kernel/exit.c	2005-07-12 21:46:46.000000000 -0700
+++ linux-2.6.13-rc3/kernel/exit.c	2005-07-16 20:07:06.000000000 -0700
@@ -851,8 +851,7 @@ fastcall NORET_TYPE void do_exit(long co
 	tsk->exit_code = code;
 	exit_notify(tsk);
 #ifdef CONFIG_NUMA
-	mpol_free(tsk->mempolicy);
-	tsk->mempolicy = NULL;
+	mpol_free(xchg(&tsk->mempolicy, NULL));
 #endif
 
 	BUG_ON(!(current->flags & PF_DEAD));
Index: linux-2.6.13-rc3/include/linux/mm.h
===================================================================
--- linux-2.6.13-rc3.orig/include/linux/mm.h	2005-07-12 21:46:46.000000000 -0700
+++ linux-2.6.13-rc3/include/linux/mm.h	2005-07-16 20:07:06.000000000 -0700
@@ -107,7 +107,9 @@ struct vm_area_struct {
 	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
 #endif
 #ifdef CONFIG_NUMA
-	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+	struct mempolicy *vm_policy;	/* NUMA policy for the VMA, may be updated only
+					 * with xchg or cmpxchg
+					 */
 #endif
 };
 
Index: linux-2.6.13-rc3/include/linux/sched.h
===================================================================
--- linux-2.6.13-rc3.orig/include/linux/sched.h	2005-07-16 19:54:14.000000000 -0700
+++ linux-2.6.13-rc3/include/linux/sched.h	2005-07-16 20:07:06.000000000 -0700
@@ -761,7 +761,10 @@ struct task_struct {
 	clock_t acct_stimexpd;	/* clock_t-converted stime since last update */
 #endif
 #ifdef CONFIG_NUMA
-  	struct mempolicy *mempolicy;
+  	struct mempolicy *mempolicy;	/* Only update via xchg or cmpxchg because mempolicy
+					 * may be changed from outside of the process
+					 * context
+					 */
 	short il_next;
 #endif
 #ifdef CONFIG_CPUSETS
Index: linux-2.6.13-rc3/fs/proc/task_mmu.c
===================================================================
--- linux-2.6.13-rc3.orig/fs/proc/task_mmu.c	2005-07-16 20:07:04.000000000 -0700
+++ linux-2.6.13-rc3/fs/proc/task_mmu.c	2005-07-16 20:08:49.000000000 -0700
@@ -357,7 +357,7 @@ static ssize_t numa_policy_write(struct 
 {
 	struct task_struct *task = proc_task(file->f_dentry->d_inode);
 	char buffer[MAX_MEMPOL_STRING_SIZE], *end;
-	struct mempolicy *pol, *old_policy;
+	struct mempolicy *pol;
 
 	if (!capable(CAP_SYS_RESOURCE))
 		return -EPERM;
@@ -373,17 +373,10 @@ static ssize_t numa_policy_write(struct 
 	if (*end == '\n')
 		end++;
 
-	old_policy = task->mempolicy;
+	if (pol->policy == MPOL_DEFAULT)
+		pol = NULL;
 
-
-	if (!mpol_equal(pol, old_policy)) {
-		if (pol->policy == MPOL_DEFAULT)
-			pol = NULL;
-
-		task->mempolicy = pol;
-		mpol_free(old_policy);
-	} else
-		mpol_free(pol);
+	mpol_free(xchg(&task->mempolicy, pol));
 
 	return end - buffer;
 }
@@ -402,7 +395,7 @@ static ssize_t numa_vma_policy_write(str
 	unsigned long addr;
 	char buffer[MAX_MEMPOL_STRING_SIZE];
 	char *p, *end;
-	struct mempolicy *pol, *old_policy;
+	struct mempolicy *pol;
 
 	if (!capable(CAP_SYS_RESOURCE))
 		return -EPERM;
@@ -426,16 +419,10 @@ static ssize_t numa_vma_policy_write(str
 	if (*end == '\n')
 		end++;
 
-	old_policy = vma->vm_policy;
+	if (pol->policy == MPOL_DEFAULT)
+		pol = NULL;
 
-	if (!mpol_equal(pol, old_policy)) {
-		if (pol->policy == MPOL_DEFAULT)
-			pol = NULL;
-
-		vma->vm_policy = pol;
-		mpol_free(old_policy);
-	} else
-		mpol_free(pol);
+	mpol_free(xchg(&vma->vm_policy, pol));
 
 	return end - buffer;
 }
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help