Re: [PATCH] igmp: fix ip_mc_sf_allow race
From: Flavio Leitner <hidden>
Date: 2010-01-04 18:51:14
Subsystem:
networking [general], networking [ipv4/ipv6], the rest · Maintainers:
"David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, David Ahern, Ido Schimmel, Linus Torvalds
On Mon, Jan 04, 2010 at 02:07:03PM +0100, Eric Dumazet wrote:
Le 04/01/2010 12:29, Flavio Leitner a écrit :quoted
Then, I tried using call_rcu() to avoid the problem you are saying, but when you stop the reproducer, sk_free() will warn printing "optmem leakage.." because the rcu callback didn't run yet.This is probably because your call_rcu() callback was trying to call sock_kfree_s() ?
yes, correct.
rtnl_unlock();
call_rcu(&iml->lock, callback_func)
callback_func()
{
sock_kfree_s(sk, iml, sizeof(*iml));
}
Take a look at sock_kfree_s() definition :
void sock_kfree_s(struct sock *sk, void *mem, int size)
{
kfree(mem);
atomic_sub(size, &sk->sk_omem_alloc);
}
You can certainly try :
rtnl_unlock();
atomic_sub(sizeof(*iml), sk->sk_omem_alloc);
call_rcu(&iml->rcu, kfree);
(immediate sk_omem_alloc handling, but deferred kfree())Ok, below is the new version using call_rcu(). I'm still running my tests here, so I'm planning to resubmit it later if this version is okay with you. thanks!
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 724c27e..9cccd16 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h@@ -170,6 +170,7 @@ struct ip_mc_socklist { struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist *sflist; + struct rcu_head rcu; }; struct ip_sf_list {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 76c0840..ed154db 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c@@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - inet->mc_list = iml; + rcu_assign_pointer(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done:
@@ -1825,6 +1825,17 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, return err; } + +void ip_mc_socklist_reclaim(struct rcu_head *rp) +{ + struct ip_mc_socklist *iml; + + iml = container_of(rp, struct ip_mc_socklist, rcu); + /* sk_omem_alloc should have been decreased by the caller*/ + kfree(iml); +} + + /* * Ask a socket to leave a group. */
@@ -1854,12 +1865,15 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - *imlp = iml->next; + rcu_assign_pointer(*imlp, iml->next); if (in_dev) ip_mc_dec_group(in_dev, group); + rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); return 0; } if (!in_dev)
@@ -2209,30 +2223,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) struct ip_mc_socklist *pmc; struct ip_sf_socklist *psl; int i; + int ret; + ret = 1; if (!ipv4_is_multicast(loc_addr)) - return 1; + goto out; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; } + ret = inet->mc_all; if (!pmc) - return inet->mc_all; + goto unlock; psl = pmc->sflist; + ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) - return pmc->sfmode == MCAST_EXCLUDE; + goto unlock; for (i=0; i<psl->sl_count; i++) { if (psl->sl_addr[i] == rmt_addr) break; } + ret = 0; if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count) - return 0; + goto unlock; if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) - return 0; - return 1; + goto unlock; + ret = 1; +unlock: + rcu_read_unlock(); +out: + return ret; } /*
@@ -2245,13 +2269,17 @@ void ip_mc_drop_socket(struct sock *sk) struct ip_mc_socklist *iml; struct net *net = sock_net(sk); - if (inet->mc_list == NULL) + rcu_read_lock(); + if (rcu_dereference(inet->mc_list) == NULL) { + rcu_read_unlock(); return; + } + rcu_read_unlock(); rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rcu_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - inet->mc_list = iml->next; + rcu_assign_pointer(inet->mc_list, iml->next); in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev);
@@ -2259,7 +2287,9 @@ void ip_mc_drop_socket(struct sock *sk) ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); in_dev_put(in_dev); } - sock_kfree_s(sk, iml, sizeof(*iml)); + /* decrease mem now to avoid the memleak warning */ + atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); + call_rcu(&iml->rcu, ip_mc_socklist_reclaim); } rtnl_unlock(); }
--
1.6.2.3