Thread (17 messages) 17 messages, 3 authors, 2020-12-14

Re: [PATCH net-next] tun: fix ubuf refcount incorrectly on error path

From: Jason Wang <jasowang@redhat.com>
Date: 2020-12-07 03:56:13
Also in: virtualization

On 2020/12/4 下午6:22, wangyunjian wrote:
quoted hunk ↗ jump to hunk
quoted
-----Original Message-----
From: Jason Wang [mailto:jasowang@redhat.com]
Sent: Friday, December 4, 2020 2:11 PM
To: wangyunjian <redacted>; mst@redhat.com
Cc: virtualization@lists.linux-foundation.org; netdev@vger.kernel.org; Lilijun
(Jerry) [off-list ref]; xudingke [off-list ref]
Subject: Re: [PATCH net-next] tun: fix ubuf refcount incorrectly on error path


On 2020/12/3 下午4:00, wangyunjian wrote:
quoted
From: Yunjian Wang <redacted>

After setting callback for ubuf_info of skb, the callback
(vhost_net_zerocopy_callback) will be called to decrease the refcount
when freeing skb. But when an exception occurs afterwards, the error
handling in vhost handle_tx() will try to decrease the same refcount
again. This is wrong and fix this by clearing ubuf_info when meeting
errors.

Fixes: 4477138fa0ae ("tun: properly test for IFF_UP")
Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP
driver")

Signed-off-by: Yunjian Wang <redacted>
---
   drivers/net/tun.c | 11 +++++++++++
   1 file changed, 11 insertions(+)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index
2dc1988a8973..3614bb1b6d35 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1861,6 +1861,12 @@ static ssize_t tun_get_user(struct tun_struct
*tun, struct tun_file *tfile,
quoted
   	if (unlikely(!(tun->dev->flags & IFF_UP))) {
   		err = -EIO;
   		rcu_read_unlock();
+		if (zerocopy) {
+			skb_shinfo(skb)->destructor_arg = NULL;
+			skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+			skb_shinfo(skb)->tx_flags &= ~SKBTX_SHARED_FRAG;
+		}
+
   		goto drop;
   	}
@@ -1874,6 +1880,11 @@ static ssize_t tun_get_user(struct tun_struct
*tun, struct tun_file *tfile,

   		if (unlikely(headlen > skb_headlen(skb))) {
   			atomic_long_inc(&tun->dev->rx_dropped);
+			if (zerocopy) {
+				skb_shinfo(skb)->destructor_arg = NULL;
+				skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+				skb_shinfo(skb)->tx_flags &= ~SKBTX_SHARED_FRAG;
+			}
   			napi_free_frags(&tfile->napi);
   			rcu_read_unlock();
   			mutex_unlock(&tfile->napi_mutex);
It looks to me then we miss the failure feedback.

The issues comes from the inconsistent error handling in tun.

I wonder whether we can simply do uarg->callback(uarg, false) if necessary on
every failture path on tun_get_user().
How about this?

---
  drivers/net/tun.c | 29 ++++++++++++++++++-----------
  1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2dc1988a8973..36a8d8eacd7b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1637,6 +1637,19 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
  	return NULL;
  }
  
+/* copy ubuf_info for callback when skb has no error */
+inline static tun_copy_ubuf_info(struct sk_buff *skb, bool zerocopy, void *msg_control)
+{
+	if (zerocopy) {
+		skb_shinfo(skb)->destructor_arg = msg_control;
+		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+	} else if (msg_control) {
+		struct ubuf_info *uarg = msg_control;
+		uarg->callback(uarg, false);
+	}
+}
+
  /* Get packet from user space buffer */
  static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
  			    void *msg_control, struct iov_iter *from,
@@ -1812,16 +1825,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
  		break;
  	}
  
-	/* copy skb_ubuf_info for callback when skb has no error */
-	if (zerocopy) {
-		skb_shinfo(skb)->destructor_arg = msg_control;
-		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
-		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
-	} else if (msg_control) {
-		struct ubuf_info *uarg = msg_control;
-		uarg->callback(uarg, false);
-	}
-
  	skb_reset_network_header(skb);
  	skb_probe_transport_header(skb);
  	skb_record_rx_queue(skb, tfile->queue_index);
@@ -1830,6 +1833,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
  		struct bpf_prog *xdp_prog;
  		int ret;
  
+		tun_copy_ubuf_info(skb, zerocopy, msg_control);

If you think disabling zerocopy for XDP (which I think it makes sense). 
It's better to do this in another patch.

quoted hunk ↗ jump to hunk
  		local_bh_disable();
  		rcu_read_lock();
  		xdp_prog = rcu_dereference(tun->xdp_prog);
@@ -1880,7 +1884,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
  			WARN_ON(1);
  			return -ENOMEM;
  		}
-
+		tun_copy_ubuf_info(skb, zerocopy, msg_control);

And for NAPI frags.

quoted hunk ↗ jump to hunk
  		local_bh_disable();
  		napi_gro_frags(&tfile->napi);
  		local_bh_enable();
@@ -1889,6 +1893,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
  		struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
  		int queue_len;
  
+		tun_copy_ubuf_info(skb, zerocopy, msg_control);
  		spin_lock_bh(&queue->lock);
  		__skb_queue_tail(queue, skb);
  		queue_len = skb_queue_len(queue);
@@ -1899,8 +1904,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
  
  		local_bh_enable();
  	} else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
+		tun_copy_ubuf_info(skb, zerocopy, msg_control);
  		tun_rx_batched(tun, tfile, skb, more);
  	} else {
+		tun_copy_ubuf_info(skb, zerocopy, msg_control);
  		netif_rx_ni(skb);
  	}
  	rcu_read_unlock();

So it looks to me you want to disable zerocopy in all of the possible 
datapath?

Thanks
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help