[PATCH net-next-2.6] net: add rcu protection to netdev->ifalias
From: Eric Dumazet <hidden>
Date: 2011-05-17 22:18:16
Subsystem:
networking drivers, networking [general], the rest · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds
Le lundi 02 mai 2011 à 15:27 -0700, David Miller a écrit :
From: Stephen Hemminger <redacted> Date: Thu, 28 Apr 2011 08:43:37 -0700quoted
On Thu, 28 Apr 2011 10:56:07 +0200 Eric Dumazet [off-list ref] wrote:quoted
Four years ago, Patrick made a change to hold rtnl mutex during netlink dump callbacks. I believe it was a wrong move. This slows down concurrent dumps, making good old /proc/net/ files faster than rtnetlink in some situations. This occurred to me because one "ip link show dev ..." was _very_ slow on a workload adding/removing network devices in background. All dump callbacks are able to use RCU locking now, so this patch does roughly a revert of commits : 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks This let writers fight for rtnl mutex and readers going full speed. It also takes care of phonet : phonet_route_get() is now called from rcu read section. I renamed it to phonet_route_get_rcu() Signed-off-by: Eric Dumazet <redacted> Cc: Patrick McHardy <redacted> Cc: Remi Denis-Courmont <redacted>Acked-by: Stephen Hemminger <redacted>Applied, thanks everyone!
I think we probably have to make some fixes, because rtnl_fill_ifinfo() can access fields that were previously protected with RTNL Let's see if it's doable, before considering re-adding rtnl_lock() in rtnl_dump_ifinfo() ? First step : dev->ifalias Its late here, I'll continue the work tomorrow. Thanks [PATCH net-next-2.6] net: add rcu protection to netdev->ifalias Avoid holding RTNL in show_ifalias() and rtnl_fill_ifinfo() to manipulate netdev->ifalias Signed-off-by: Eric Dumazet <redacted> --- include/linux/netdevice.h | 7 ++++++- net/core/dev.c | 28 ++++++++++++++++------------ net/core/net-sysfs.c | 13 +++++++------ net/core/rtnetlink.c | 8 ++++++-- 4 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a134d80..09b3df4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h@@ -980,6 +980,11 @@ struct net_device_ops { u32 features); }; +struct ifalias { + struct rcu_head rcu; + char name[0]; +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O
@@ -1004,7 +1009,7 @@ struct net_device { /* device name hash chain */ struct hlist_node name_hlist; /* snmp alias */ - char *ifalias; + struct ifalias __rcu *ifalias; /* * I/O specific fields
diff --git a/net/core/dev.c b/net/core/dev.c
index 155de20..01e3be0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c@@ -1035,6 +1035,12 @@ rollback: return err; } + +static void ifalias_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ifalias, rcu)); +} + /** * dev_set_alias - change ifalias of a device * @dev: device
@@ -1045,24 +1051,22 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { + struct ifalias *ifalias, *new = NULL; ASSERT_RTNL(); if (len >= IFALIASZ) return -EINVAL; - if (!len) { - if (dev->ifalias) { - kfree(dev->ifalias); - dev->ifalias = NULL; - } - return 0; + ifalias = rtnl_dereference(dev->ifalias); + if (len) { + new = kmalloc(sizeof(*new) + len + 1, GFP_KERNEL); + if (!new) + return -ENOMEM; + strlcpy(new->name, alias, len + 1); } - - dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!dev->ifalias) - return -ENOMEM; - - strlcpy(dev->ifalias, alias, len+1); + if (ifalias) + call_rcu(&ifalias->rcu, ifalias_kfree_rcu); + rcu_assign_pointer(dev->ifalias, new); return len; }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1b12217..e2acf15 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c@@ -281,13 +281,14 @@ static ssize_t show_ifalias(struct device *dev, struct device_attribute *attr, char *buf) { const struct net_device *netdev = to_net_dev(dev); + struct ifalias *ifalias; ssize_t ret = 0; - if (!rtnl_trylock()) - return restart_syscall(); - if (netdev->ifalias) - ret = sprintf(buf, "%s\n", netdev->ifalias); - rtnl_unlock(); + rcu_read_lock(); + ifalias = rcu_dereference(netdev->ifalias); + if (ifalias) + ret = sprintf(buf, "%s\n", ifalias->name); + rcu_read_unlock(); return ret; }
@@ -1265,7 +1266,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); - kfree(dev->ifalias); + kfree(rcu_dereference_protected(dev->ifalias, 1)); kfree((char *)dev - dev->padded); }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d2ba259..c8f0a11 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c@@ -849,6 +849,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, const struct rtnl_link_stats64 *stats; struct nlattr *attr, *af_spec; struct rtnl_af_ops *af_ops; + struct ifalias *ifalias; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL)
@@ -879,8 +880,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->qdisc) NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id); - if (dev->ifalias) - NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); + rcu_read_lock(); + ifalias = rcu_dereference(dev->ifalias); + if (ifalias) + NLA_PUT_STRING(skb, IFLA_IFALIAS, ifalias->name); + rcu_read_unlock(); if (1) { struct rtnl_link_ifmap map = {