[RFC PATCH 09/11] ptq: Hook up transmit side of Per Queue Threads
From: Tom Herbert <hidden>
Date: 2020-06-24 17:19:50
Subsystem:
networking [general], networking [sockets], the rest · Maintainers:
"David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Kuniyuki Iwashima, Willem de Bruijn, Linus Torvalds
Support to select device queue for transmit based on the per thread transmit queue. Patch includes: - Add a global queue (gqid) mapping to sock - Function to convert gqid in a sock to a device queue (dqid) by calling sk_tx_gqid_to_dqid_get - Function sock_record_tx_queue to record a queue in a socket taken from ptq_threads in struct task - Call sock_record_tx_queue from af_inet send, listen, and accept functions to populate the socket's gqid for steerig - In netdev_pick_tx try to take the queue index from the socket using sk_tx_gqid_to_dqid_get --- include/net/sock.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 9 ++++--- net/ipv4/af_inet.c | 6 +++++ 3 files changed, 75 insertions(+), 3 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index acb76cfaae1b..5ec9d02e7ad0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h@@ -140,6 +140,7 @@ typedef __u64 __bitwise __addrpair; * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_tx_gqid_mapping: global tx queue number for sending * @skc_rx_queue_mapping: rx queue number for this connection * @skc_flags: place holder for sk_flags * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
@@ -225,6 +226,9 @@ struct sock_common { struct hlist_nulls_node skc_nulls_node; }; unsigned short skc_tx_queue_mapping; +#ifdef CONFIG_RPS + unsigned short skc_tx_gqid_mapping; +#endif #ifdef CONFIG_XPS unsigned short skc_rx_queue_mapping; #endif
@@ -353,6 +357,9 @@ struct sock { #define sk_nulls_node __sk_common.skc_nulls_node #define sk_refcnt __sk_common.skc_refcnt #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping +#ifdef CONFIG_RPS +#define sk_tx_gqid_mapping __sk_common.skc_tx_gqid_mapping +#endif #ifdef CONFIG_XPS #define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping #endif
@@ -1792,6 +1799,34 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, return __sk_receive_skb(sk, skb, nested, 1, true); } +static inline int sk_tx_gqid_get(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (sk && sk->sk_tx_gqid_mapping != NO_QUEUE) + return sk->sk_tx_gqid_mapping; +#endif + + return -1; +} + +static inline void sk_tx_gqid_set(struct sock *sk, int gqid) +{ +#ifdef CONFIG_RPS + /* sk_tx_queue_mapping accept only up to RPS_MAX_QID (0x7ffe) */ + if (WARN_ON_ONCE((unsigned int)gqid > RPS_MAX_QID && + gqid != NO_QUEUE)) + return; + sk->sk_tx_gqid_mapping = gqid; +#endif +} + +static inline void sk_tx_gqid_clear(struct sock *sk) +{ +#ifdef CONFIG_RPS + sk->sk_tx_gqid_mapping = NO_QUEUE; +#endif +} + static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) { /* sk_tx_queue_mapping accept only upto a 16-bit value */
@@ -1803,6 +1838,9 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) static inline void sk_tx_queue_clear(struct sock *sk) { sk->sk_tx_queue_mapping = NO_QUEUE; + + /* Clear tx_gqid at same points */ + sk_tx_gqid_clear(sk); } static inline int sk_tx_queue_get(const struct sock *sk)
@@ -1813,6 +1851,31 @@ static inline int sk_tx_queue_get(const struct sock *sk) return -1; } +static inline int sk_tx_gqid_to_dqid_get(const struct net_device *dev, + const struct sock *sk) +{ + int ret = -1; +#ifdef CONFIG_RPS + int gqid; + u16 dqid; + + gqid = sk_tx_gqid_get(sk); + if (gqid >= 0) { + dqid = netdev_tx_gqid_to_dqid(dev, gqid); + if (dqid != NO_QUEUE) + ret = dqid; + } +#endif + return ret; +} + +static inline void sock_record_tx_queue(struct sock *sk) +{ +#ifdef CONFIG_PER_THREAD_QUEUES + sk_tx_gqid_set(sk, current->ptq_queues.txq_id); +#endif +} + static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_XPS
diff --git a/net/core/dev.c b/net/core/dev.c
index f64bf6608775..f4478c9b1c9c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c@@ -3982,10 +3982,13 @@ u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, if (queue_index < 0 || skb->ooo_okay || queue_index >= dev->real_num_tx_queues) { - int new_index = get_xps_queue(dev, sb_dev, skb); + int new_index = sk_tx_gqid_to_dqid_get(dev, sk); - if (new_index < 0) - new_index = skb_tx_hash(dev, sb_dev, skb); + if (new_index < 0) { + new_index = get_xps_queue(dev, sb_dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, sb_dev, skb); + } if (queue_index != new_index && sk && sk_fullsock(sk) &&
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 02aa5cb3a4fd..9b36aa3d1622 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c@@ -201,6 +201,8 @@ int inet_listen(struct socket *sock, int backlog) lock_sock(sk); + sock_record_tx_queue(sk); + err = -EINVAL; if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) goto out;
@@ -630,6 +632,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, } } + sock_record_tx_queue(sk); + switch (sock->state) { default: err = -EINVAL;
@@ -742,6 +746,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, lock_sock(sk2); sock_rps_record_flow(sk2); + sock_record_tx_queue(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
@@ -794,6 +799,7 @@ EXPORT_SYMBOL(inet_getname); int inet_send_prepare(struct sock *sk) { sock_rps_record_flow(sk); + sock_record_tx_queue(sk); /* We may need to bind the socket. */ if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
--
2.25.1