[Linux-kernel-mentees][PATCH 1/2] net: reorder members of virtnet_info for optimization
From: Anant Thazhemadam <hidden>
Date: 2020-09-30 05:17:51
Also in:
bpf, linux-kernel-mentees, lkml
Subsystem:
networking drivers, the rest, virtio net driver · Maintainers:
Andrew Lunn, "David S. Miller", Eric Dumazet, Jakub Kicinski, Paolo Abeni, Linus Torvalds, "Michael S. Tsirkin", Jason Wang
Analysis of the structure virtnet_info using pahole gives the
following stats.
/* size: 256, cachelines: 4, members: 25 */
/* sum members: 245, holes: 3, sum holes: 11 */
/* paddings: 1, sum paddings: 4 */
Reordering the order in which the members of virtnet_info are declared
helps in packing byte holes in the middle of virtnet_info, reduce the
size required by the structure by 8 bytes, and also allows members to be
stored without overstepping the boundaries of a cacheline (for a
cacheline of size 64bytes) unnecessarily.
Analysis using pahole post-reordering of members gives the following
stats.
/* size: 248, cachelines: 4, members: 25 */
/* padding: 3 */
/* paddings: 1, sum paddings: 4 */
/* last cacheline: 56 bytes */
Signed-off-by: Anant Thazhemadam <redacted>
---
The complete analysis done by pahole can be found below.
Before the change:
struct virtnet_info {
struct virtio_device * vdev; /* 0 8 */
struct virtqueue * cvq; /* 8 8 */
struct net_device * dev; /* 16 8 */
struct send_queue * sq; /* 24 8 */
struct receive_queue * rq; /* 32 8 */
unsigned int status; /* 40 4 */
u16 max_queue_pairs; /* 44 2 */
u16 curr_queue_pairs; /* 46 2 */
u16 xdp_queue_pairs; /* 48 2 */
bool big_packets; /* 50 1 */
bool mergeable_rx_bufs; /* 51 1 */
bool has_cvq; /* 52 1 */
bool any_header_sg; /* 53 1 */
u8 hdr_len; /* 54 1 */
/* XXX 1 byte hole, try to pack */
struct delayed_work refill; /* 56 88 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
struct work_struct config_work; /* 144 32 */
bool affinity_hint_set; /* 176 1 */
/* XXX 7 bytes hole, try to pack */
struct hlist_node node; /* 184 16 */
/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
struct hlist_node node_dead; /* 200 16 */
struct control_buf * ctrl; /* 216 8 */
u8 duplex; /* 224 1 */
/* XXX 3 bytes hole, try to pack */
u32 speed; /* 228 4 */
long unsigned int guest_offloads; /* 232 8 */
long unsigned int guest_offloads_capable; /* 240 8 */
struct failover * failover; /* 248 8 */
/* size: 256, cachelines: 4, members: 25 */
/* sum members: 245, holes: 3, sum holes: 11 */
/* paddings: 1, sum paddings: 4 */
};
After the Change:
struct virtnet_info {
struct virtio_device * vdev; /* 0 8 */
struct virtqueue * cvq; /* 8 8 */
struct net_device * dev; /* 16 8 */
struct send_queue * sq; /* 24 8 */
struct receive_queue * rq; /* 32 8 */
unsigned int status; /* 40 4 */
u16 max_queue_pairs; /* 44 2 */
u16 curr_queue_pairs; /* 46 2 */
u16 xdp_queue_pairs; /* 48 2 */
bool big_packets; /* 50 1 */
bool mergeable_rx_bufs; /* 51 1 */
bool has_cvq; /* 52 1 */
bool any_header_sg; /* 53 1 */
bool affinity_hint_set; /* 54 1 */
u8 hdr_len; /* 55 1 */
struct control_buf * ctrl; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct work_struct config_work; /* 64 32 */
struct hlist_node node; /* 96 16 */
struct hlist_node node_dead; /* 112 16 */
/* --- cacheline 2 boundary (128 bytes) --- */
long unsigned int guest_offloads; /* 128 8 */
long unsigned int guest_offloads_capable; /* 136 8 */
struct failover * failover; /* 144 8 */
struct delayed_work refill; /* 152 88 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 3 boundary (192 bytes) was 48 bytes ago --- */
u32 speed; /* 240 4 */
u8 duplex; /* 244 1 */
/* size: 248, cachelines: 4, members: 25 */
/* padding: 3 */
/* paddings: 1, sum paddings: 4 */
/* last cacheline: 56 bytes */
};
It can be seen that the size has reduced by 8 bytes, and the holes have been eliminated
as well. Also, more members of virtnet_info are accomodated within one cacheline
(without unnecessarily crossing over the cacheline boundary).
drivers/net/virtio_net.c | 42 ++++++++++++++++++++--------------------
1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 263b005981bd..32747f1980ae 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c@@ -137,29 +137,29 @@ struct receive_queue { struct napi_struct napi; + /* Name of this receive queue: input.$index */ + char name[40]; + struct bpf_prog __rcu *xdp_prog; struct virtnet_rq_stats stats; + /* RX: fragments + linear part + virtio header */ + struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Page frag for packet buffer allocation. */ + struct page_frag alloc_frag; + /* Chain pages by the private ptr. */ struct page *pages; /* Average packet length for mergeable receive buffers. */ struct ewma_pkt_len mrg_avg_pkt_len; - /* Page frag for packet buffer allocation. */ - struct page_frag alloc_frag; - - /* RX: fragments + linear part + virtio header */ - struct scatterlist sg[MAX_SKB_FRAGS + 2]; + struct xdp_rxq_info xdp_rxq; /* Min single buffer size for mergeable buffers case. */ unsigned int min_buf_len; - - /* Name of this receive queue: input.$index */ - char name[40]; - - struct xdp_rxq_info xdp_rxq; }; /* Control VQ buffers: protected by the rtnl lock */
@@ -202,33 +202,33 @@ struct virtnet_info { /* Host can handle any s/g split between our header and packet data */ bool any_header_sg; + /* Does the affinity hint is set for virtqueues? */ + bool affinity_hint_set; + /* Packet virtio header size */ u8 hdr_len; - /* Work struct for refilling if we run low on memory. */ - struct delayed_work refill; + struct control_buf *ctrl; /* Work struct for config space updates */ struct work_struct config_work; - /* Does the affinity hint is set for virtqueues? */ - bool affinity_hint_set; - /* CPU hotplug instances for online & dead */ struct hlist_node node; struct hlist_node node_dead; - struct control_buf *ctrl; - - /* Ethtool settings */ - u8 duplex; - u32 speed; - unsigned long guest_offloads; unsigned long guest_offloads_capable; /* failover when STANDBY feature enabled */ struct failover *failover; + + /* Work struct for refilling if we run low on memory. */ + struct delayed_work refill; + + /* Ethtool settings */ + u32 speed; + u8 duplex; }; struct padded_vnet_hdr {
--
2.25.1