Thread (15 messages) 15 messages, 4 authors, 2023-09-01

Re: [PATCH v2] skbuff: skb_segment, Call zero copy functions before using skbuff frags

From: Eric Dumazet <edumazet@google.com>
Date: 2023-08-31 06:59:56
Also in: bpf, lkml, stable

On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
[off-list ref] wrote:
quoted hunk ↗ jump to hunk
Commit bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions
once per nskb") added the call to zero copy functions in skb_segment().
The change introduced a bug in skb_segment() because skb_orphan_frags()
may possibly change the number of fragments or allocate new fragments
altogether leaving nrfrags and frag to point to the old values. This can
cause a panic with stacktrace like the one below.

[  193.894380] BUG: kernel NULL pointer dereference, address: 00000000000000bc
[  193.895273] CPU: 13 PID: 18164 Comm: vh-net-17428 Kdump: loaded Tainted: G           O      5.15.123+ #26
[  193.903919] RIP: 0010:skb_segment+0xb0e/0x12f0
[  194.021892] Call Trace:
[  194.027422]  <TASK>
[  194.072861]  tcp_gso_segment+0x107/0x540
[  194.082031]  inet_gso_segment+0x15c/0x3d0
[  194.090783]  skb_mac_gso_segment+0x9f/0x110
[  194.095016]  __skb_gso_segment+0xc1/0x190
[  194.103131]  netem_enqueue+0x290/0xb10 [sch_netem]
[  194.107071]  dev_qdisc_enqueue+0x16/0x70
[  194.110884]  __dev_queue_xmit+0x63b/0xb30
[  194.121670]  bond_start_xmit+0x159/0x380 [bonding]
[  194.128506]  dev_hard_start_xmit+0xc3/0x1e0
[  194.131787]  __dev_queue_xmit+0x8a0/0xb30
[  194.138225]  macvlan_start_xmit+0x4f/0x100 [macvlan]
[  194.141477]  dev_hard_start_xmit+0xc3/0x1e0
[  194.144622]  sch_direct_xmit+0xe3/0x280
[  194.147748]  __dev_queue_xmit+0x54a/0xb30
[  194.154131]  tap_get_user+0x2a8/0x9c0 [tap]
[  194.157358]  tap_sendmsg+0x52/0x8e0 [tap]
[  194.167049]  handle_tx_zerocopy+0x14e/0x4c0 [vhost_net]
[  194.173631]  handle_tx+0xcd/0xe0 [vhost_net]
[  194.176959]  vhost_worker+0x76/0xb0 [vhost]
[  194.183667]  kthread+0x118/0x140
[  194.190358]  ret_from_fork+0x1f/0x30
[  194.193670]  </TASK>

In this case calling skb_orphan_frags() updated nr_frags leaving nrfrags
local variable in skb_segment() stale. This resulted in the code hitting
i >= nrfrags prematurely and trying to move to next frag_skb using
list_skb pointer, which was NULL, and caused kernel panic. Move the call
to zero copy functions before using frags and nr_frags.

Fixes: bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions once per nskb")
Signed-off-by: Mohamed Khalfella <redacted>
Reported-by: Amit Goyal <redacted>
Cc: stable@vger.kernel.org
---
 net/core/skbuff.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a298992060e6..18a33dc2d6af 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4354,21 +4354,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
        struct sk_buff *segs = NULL;
        struct sk_buff *tail = NULL;
        struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
-       skb_frag_t *frag = skb_shinfo(head_skb)->frags;
        unsigned int mss = skb_shinfo(head_skb)->gso_size;
        unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
-       struct sk_buff *frag_skb = head_skb;
        unsigned int offset = doffset;
        unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
        unsigned int partial_segs = 0;
        unsigned int headroom;
        unsigned int len = head_skb->len;
+       struct sk_buff *frag_skb;
+       skb_frag_t *frag;
        __be16 proto;
        bool csum, sg;
-       int nfrags = skb_shinfo(head_skb)->nr_frags;
        int err = -ENOMEM;
        int i = 0;
-       int pos;
+       int nfrags, pos;

        if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
            mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4445,6 +4444,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
        headroom = skb_headroom(head_skb);
        pos = skb_headlen(head_skb);

+       if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+               return ERR_PTR(-ENOMEM);
+
+       nfrags = skb_shinfo(head_skb)->nr_frags;
+       frag = skb_shinfo(head_skb)->frags;
+       frag_skb = head_skb;
+
        do {
                struct sk_buff *nskb;
                skb_frag_t *nskb_frag;
@@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
                    (skb_headlen(list_skb) == len || sg)) {
                        BUG_ON(skb_headlen(list_skb) > len);

+                       nskb = skb_clone(list_skb, GFP_ATOMIC);
+                       if (unlikely(!nskb))
+                               goto err;
+
This patch is quite complex to review, so I am asking if this part was
really needed ?
<1>  : You moved here <2> and <3>

If this is not strictly needed, please keep the code as is to ease
code review...
quoted hunk ↗ jump to hunk
                        i = 0;
                        nfrags = skb_shinfo(list_skb)->nr_frags;
                        frag = skb_shinfo(list_skb)->frags;
@@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
                                frag++;
                        }

-                       nskb = skb_clone(list_skb, GFP_ATOMIC);
<2>
                        list_skb = list_skb->next;

-                       if (unlikely(!nskb))
-                               goto err;
-
<3>
quoted hunk ↗ jump to hunk
                        if (unlikely(pskb_trim(nskb, len))) {
                                kfree_skb(nskb);
                                goto err;
@@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
                skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
                                           SKBFL_SHARED_FRAG;

-               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-                   skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+               if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
Why using list_skb here instead of frag_skb ?
Again, I have to look at the whole thing to understand why you did this.
                        goto err;

                while (pos < offset + len) {
                        if (i >= nfrags) {
+                               if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+                                   skb_zerocopy_clone(nskb, list_skb,
+                                                      GFP_ATOMIC))
+                                       goto err;
+
This part is fine.
quoted hunk ↗ jump to hunk
                                i = 0;
                                nfrags = skb_shinfo(list_skb)->nr_frags;
                                frag = skb_shinfo(list_skb)->frags;
@@ -4583,10 +4593,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
                                        i--;
                                        frag--;
                                }
-                               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-                                   skb_zerocopy_clone(nskb, frag_skb,
-                                                      GFP_ATOMIC))
-                                       goto err;

                                list_skb = list_skb->next;
                        }
--
2.17.1
Thanks.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help