Re: [patch net-next] tc: introduce OpenFlow classifier
From: Hannes Frederic Sowa <hidden>
Date: 2015-03-26 14:23:59
On Do, 2015-03-26 at 13:53 +0100, Jiri Pirko wrote:
quoted hunk ↗ jump to hunk
This patch introduces OpenFlow-based filter. So far, the very essential packet fields are supported (according to OpenFlow v1.4 spec). This patch is only the first step. There is a lot of potential performance improvements possible to implement. Also a lot of features are missing now. They will be addressed in follow-up patches. To the name of this classifier, I believe that "cls_openflow" is pretty accurate. It is actually a OpenFlow classifier. Signed-off-by: Jiri Pirko <jiri@resnulli.us> --- include/uapi/linux/pkt_cls.h | 31 ++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/cls_openflow.c | 681 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 724 insertions(+) create mode 100644 net/sched/cls_openflow.cdiff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index bf08e76..910898c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h@@ -404,6 +404,37 @@ enum { #define TCA_BPF_MAX (__TCA_BPF_MAX - 1) +/* OpenFlow classifier */ + +enum { + TCA_OF_UNSPEC, + TCA_OF_CLASSID, + TCA_OF_POLICE, + TCA_OF_INDEV, + TCA_OF_ACT, + TCA_OF_KEY_ETH_DST, /* ETH_ALEN */ + TCA_OF_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_OF_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_OF_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_OF_KEY_ETH_TYPE, /* be16 */ + TCA_OF_KEY_IP_PROTO, /* u8 */ + TCA_OF_KEY_IPV4_SRC, /* be32 */ + TCA_OF_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_OF_KEY_IPV4_DST, /* be32 */ + TCA_OF_KEY_IPV4_DST_MASK, /* be32 */ + TCA_OF_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_OF_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_OF_KEY_IPV6_DST, /* struct in6_addr */ + TCA_OF_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_OF_KEY_TCP_SRC, /* be16 */ + TCA_OF_KEY_TCP_DST, /* be16 */ + TCA_OF_KEY_UDP_SRC, /* be16 */ + TCA_OF_KEY_UDP_DST, /* be16 */ + __TCA_OF_MAX, +}; + +#define TCA_OF_MAX (__TCA_OF_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr {diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2274e72..32d1a7b 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig@@ -477,6 +477,17 @@ config NET_CLS_BPF To compile this code as a module, choose M here: the module will be called cls_bpf. +config NET_CLS_OPENFLOW + tristate "OpenFlow classifier" + select NET_CLS + ---help--- + If you say Y here, you will be able to classify packets based on + a configurable combination of packet keys and masks according to + OpenFlow standard. + + To compile this code as a module, choose M here: the module will + be called cls_openflow. + config NET_EMATCH bool "Extended Matches" select NET_CLSdiff --git a/net/sched/Makefile b/net/sched/Makefile index 7ca7f4c..5faa9ca 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o +obj-$(CONFIG_NET_CLS_OPENFLOW) += cls_openflow.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.odiff --git a/net/sched/cls_openflow.c b/net/sched/cls_openflow.c new file mode 100644 index 0000000..b59311f --- /dev/null +++ b/net/sched/cls_openflow.c@@ -0,0 +1,681 @@ +/* + * net/sched/cls_openflow.c OpenFlow classifier + * + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> + +#include <linux/if_ether.h> +#include <linux/in6.h> +#include <linux/ip.h> + +#include <net/sch_generic.h> +#include <net/pkt_cls.h> +#include <net/ip.h> + +struct of_flow_key { + int indev_ifindex; + struct { + u8 src[ETH_ALEN]; + u8 dst[ETH_ALEN]; + __be16 type; + } eth; + struct { + u8 proto; + } ip; + union { + struct { + __be32 src; + __be32 dst; + } ipv4; + struct { + struct in6_addr src; + struct in6_addr dst; + } ipv6; + }; + union { + struct { + __be16 src; + __be16 dst; + } tp; + };
__u8 end[0];
u8 pad[DIV_ROUND_UP(offsetof(strut ..., __end), sizeof(long)];+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
BITS_PER_LONG / 8 == sizeof(long)
+
+struct of_flow_match {
+ struct of_flow_key key;
+ struct of_flow_key mask;
+};
+
+struct cls_of_head {
+ struct list_head filters;
+ u32 hgen;
+ struct rcu_head rcu;
+};
+
+struct cls_of_filter {
+ struct list_head list;
+ u32 handle;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct tcf_proto *tp;
+ struct of_flow_match match;
+ struct rcu_head rcu;
+};
+
+static int __check_header(struct sk_buff *skb, int len)
+{
+ if (unlikely(skb->len < len))
+ return -EINVAL;
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return -ENOMEM;
+ return 0;
+}
+
+static int of_extract_ipv4(struct sk_buff *skb, struct of_flow_key *key)
+{
+ unsigned int iph_off = skb_network_offset(skb);
+ struct iphdr *iph;
+ unsigned int iph_len;
+ int err;
+
+ err = __check_header(skb, iph_off + sizeof(*iph));
+ if (unlikely(err))
+ goto errout;
+
+ iph_len = ip_hdrlen(skb);
+ if (unlikely(iph_len < sizeof(*iph) ||
+ skb->len < iph_off + iph_len)) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ iph = ip_hdr(skb);
+ key->ipv4.src = iph->saddr;
+ key->ipv4.dst = iph->daddr;
+ key->ip.proto = iph->protocol;
+
+ skb_set_transport_header(skb, iph_off + iph_len);
+ return 0;
+
+errout:
+ memset(&key->ip, 0, sizeof(key->ip));
+ memset(&key->ipv4, 0, sizeof(key->ipv4));
+ return err;
+}
+
+static int of_extract_ipv6(struct sk_buff *skb, struct of_flow_key *key)
+{
+ unsigned int iph_off = skb_network_offset(skb);
+ int payload_off;
+ struct ipv6hdr *iph;
+ uint8_t nexthdr;
+ __be16 frag_off;
+ int err;
+
+ err = __check_header(skb, iph_off + sizeof(*iph));
+ if (unlikely(err))
+ goto errout;
+
+ iph = ipv6_hdr(skb);
+ nexthdr = iph->nexthdr;
+ payload_off = (u8 *) (iph + 1) - skb->data;
+
+ key->ip.proto = NEXTHDR_NONE;
+ key->ipv6.src = iph->saddr;
+ key->ipv6.dst = iph->daddr;
+
+ payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr, &frag_off);
+ if (unlikely(payload_off < 0)) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ key->ip.proto = nexthdr;
+ skb_set_transport_header(skb, payload_off);
+ return 0;
+
+errout:
+ memset(&key->ip, 0, sizeof(key->ip));
+ memset(&key->ipv6, 0, sizeof(key->ipv6));
+ return err;
+}
+
+static bool __tcphdr_ok(struct sk_buff *skb)
+{
+ int tcph_off = skb_transport_offset(skb);
+ int tcph_len;
+
+ if (unlikely(!pskb_may_pull(skb, tcph_off + sizeof(struct tcphdr))))
+ return false;
+
+ tcph_len = tcp_hdrlen(skb);
+ if (unlikely(tcph_len < sizeof(struct tcphdr) ||
+ skb->len < tcph_off + tcph_len))
+ return false;
+
+ return true;
+}
+
+static bool __udphdr_ok(struct sk_buff *skb)
+{
+ return pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct udphdr));
+}
+
+static void of_extract_tp(struct sk_buff *skb, struct of_flow_key *key)
+{
+ if (key->ip.proto == IPPROTO_TCP) {
+ if (__tcphdr_ok(skb)) {
+ struct tcphdr *tcp = tcp_hdr(skb);
+
+ key->tp.src = tcp->source;
+ key->tp.dst = tcp->dest;
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
+ }
+
+ } else if (key->ip.proto == IPPROTO_UDP) {
+ if (__udphdr_ok(skb)) {
+ struct udphdr *udp = udp_hdr(skb);
+
+ key->tp.src = udp->source;
+ key->tp.dst = udp->dest;
+ } else {
+ memset(&key->tp, 0, sizeof(key->tp));
+ }
+ }
+}
+
+static void of_extract_key(struct sk_buff *skb, struct of_flow_key *key)
+{
+ struct ethhdr *eth;
+ int err;
+
+ key->indev_ifindex = skb->skb_iif;
+
+ eth = eth_hdr(skb);
+ ether_addr_copy(key->eth.src, eth->h_source);
+ ether_addr_copy(key->eth.dst, eth->h_dest);
+
+ key->eth.type = skb->protocol;
+ if (key->eth.type == htons(ETH_P_IP)) {
+ err = of_extract_ipv4(skb, key);
+ if (likely(!err))
+ of_extract_tp(skb, key);
+ } else if (key->eth.type == htons(ETH_P_IPV6)) {
+ err = of_extract_ipv6(skb, key);
+ if (likely(!err))
+ of_extract_tp(skb, key);
+ }
+}
+
+static bool of_match(struct of_flow_key *skb_key, struct cls_of_filter *f)
+{
+ const long *lkey = (const long *) &f->match.key;
+ const long *lmask = (const long *) &f->match.mask;
+ const long *lskb_key = (const long *) skb_key;
+ int i;
+
+ for (i = 0; i < sizeof(struct of_flow_key); i += sizeof(const long)) {
+ if ((*lkey++ & *lmask) != (*lskb_key++ & *lmask))
+ return false;
+ lmask++;
+ }
+ return true;
+}
+
+static int of_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_of_head *head = rcu_dereference_bh(tp->root);
+ struct cls_of_filter *f;
+ struct of_flow_key skb_key;
+ int ret;
+
+ of_extract_key(skb, &skb_key);
+
+ list_for_each_entry_rcu(f, &head->filters, list) {
+ if (!of_match(&skb_key, f))
+ continue;This seems very limited to me, do you have plans to extend this? Bye, Hannes