Re: [RFC PATCH v4] cgroup: net_cls: traffic counter based on classification control cgroup
From: Eric Dumazet <hidden>
Date: 2013-01-18 14:27:20
Also in:
cgroups
On Fri, 2013-01-18 at 17:59 +0400, Alexey Perevalov wrote:
Traffic counter based on classification control group The main goal of this patch it's counting traffic for process placed to net_cls cgroup (ingress and egress). It's based on atomic and holds counter per network interfaces. It handles packets in net/core/dev.c for egress and in /net/ipv4/tcp.c|udp.c for ingress. Cgroup fs interface provides following files additional to existing net_cls files: net_cls.ifacename.usage_in_bytes Containing rcv/snd lines. Signed-off-by: Alexey Perevalov <redacted> ---
...
quoted hunk ↗ jump to hunk
index d1e8116..ffc9ec2 100644--- a/net/core/dev.c +++ b/net/core/dev.c@@ -135,6 +135,7 @@ #include <linux/net_tstamp.h> #include <linux/static_key.h> #include <net/flow_keys.h> +#include <net/cls_cgroup.h> #include "net-sysfs.h"@@ -2922,6 +2923,11 @@ int dev_queue_xmit(struct sk_buff *skb) */ rcu_read_lock_bh(); +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + if (dev)
How dev could be NULL here ?
+ count_cls_snd(skb_cls_classid(skb), skb->len, dev->name); +#endif
Why do you add these ugly ifdefs in the C files ?
As already said, you should not add new ifdef in C files, but properly
define empty functions in the header files
#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
...
#else
static inline void count_cls_snd( args ) { }
#endif
quoted hunk ↗ jump to hunk
+ skb_update_prio(skb); txq = netdev_pick_tx(dev, skb);diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ca2536..dc4dc3a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c@@ -276,6 +276,7 @@ #include <net/ip.h> #include <net/netdma.h> #include <net/sock.h> +#include <net/cls_cgroup.h> #include <asm/uaccess.h> #include <asm/ioctls.h>@@ -1464,6 +1465,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, u32 seq = tp->copied_seq; u32 offset; int copied = 0; +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + int ifindex = 0; +#endif if (sk->sk_state == TCP_LISTEN) return -ENOTCONN;@@ -1510,6 +1514,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, ++seq; break; } +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + ifindex = get_ifindex_from_skb(skb); +#endif sk_eat_skb(sk, skb, false); if (!desc->count) break;@@ -1520,8 +1527,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied > 0) + if (copied > 0) { tcp_cleanup_rbuf(sk, copied); +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + count_cls_rcv(current, copied, ifindex); +#endif + } return copied; } EXPORT_SYMBOL(tcp_read_sock);@@ -1549,6 +1560,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool copied_early = false; struct sk_buff *skb; u32 urg_hole = 0; +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + int ifindex = 0; +#endif lock_sock(sk);@@ -1873,6 +1887,9 @@ skip_copy: if (tcp_hdr(skb)->fin) goto found_fin_ok; if (!(flags & MSG_PEEK)) { +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + ifindex = get_ifindex_from_skb(skb); +#endif sk_eat_skb(sk, skb, copied_early); copied_early = false; }@@ -1882,6 +1899,9 @@ skip_copy: /* Process the FIN. */ ++*seq; if (!(flags & MSG_PEEK)) { +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + ifindex = get_ifindex_from_skb(skb); +#endif sk_eat_skb(sk, skb, copied_early); copied_early = false; }@@ -1924,6 +1944,11 @@ skip_copy: /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER) + if (copied > 0) + count_cls_rcv(current, copied, ifindex); +#endif + release_sock(sk); return copied;
There is no need to add so much ugly code in tcp at least For input packets, as soon as we identify a matching socket in IP/TCP early demux, we are done. It permits to properly account packets, even the retransmits and ACKS, and even packets dropped by netfilter or sk_filter.