Re: [PATCH net-next v15 03/14] netdev: support binding dma-buf to netdevice
From: Jakub Kicinski <kuba@kernel.org>
Date: 2024-07-03 01:09:12
Also in:
bpf, dri-devel, linux-alpha, linux-arch, linux-doc, linux-kselftest, linux-media, linux-mips, linux-trace-kernel, lkml, sparclinux
On Fri, 28 Jun 2024 00:32:40 +0000 Mina Almasry wrote:
+/* Protected by rtnl_lock() */
+static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ struct netdev_rx_queue *rxq;
+ unsigned long xa_idx;
+ unsigned int rxq_idx;
+
+ if (!binding)
+ return;nit: I don't see how it can happen, no defensive programming, please
+ if (binding->list.next)
+ list_del(&binding->list);
+
+ xa_for_each(&binding->bound_rxq_list, xa_idx, rxq) {nit: s/bound_rxq_list/bound_rxqs/ ? it's not a list
+ if (rxq->mp_params.mp_priv == binding) {
+ /* We hold the rtnl_lock while binding/unbinding
+ * dma-buf, so we can't race with another thread that
+ * is also modifying this value. However, the page_pool
+ * may read this config while it's creating its
+ * rx-queues. WRITE_ONCE() here to match the
+ * READ_ONCE() in the page_pool.
+ */
+ WRITE_ONCE(rxq->mp_params.mp_priv, NULL);Is this really sufficient in terms of locking? @binding is not RCU-protected and neither is the reader guaranteed to be in an RCU critical section. Actually the "reader" tries to take a ref and use this struct so it's not even a pure reader. Let's add a lock or use one of the existing locks Or, perhaps time to add a mutex to struct net_device
+ rxq_idx = get_netdev_rx_queue_index(rxq);
+
+ netdev_rx_queue_restart(binding->dev, rxq_idx);
+ }
+ }
+
+ xa_erase(&net_devmem_dmabuf_bindings, binding->id);
+
+ net_devmem_dmabuf_binding_put(binding);
+}
+
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding)
+{
+ struct netdev_rx_queue *rxq;
+ u32 xa_idx;
+ int err;
+
+ if (rxq_idx >= dev->num_rx_queues)
+ return -ERANGE;
+
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
+ if (rxq->mp_params.mp_priv)
+ return -EEXIST;Makes me wonder - do we need an API to unbind or we assume application will only have one binding per socket and close it every time? I guess that's fine for future extension.
+ err = xa_alloc(&binding->bound_rxq_list, &xa_idx, rxq, xa_limit_32b,
+ GFP_KERNEL);
+ if (err)
+ return err;
+
+ /* We hold the rtnl_lock while binding/unbinding dma-buf, so we can't
+ * race with another thread that is also modifying this value. However,
+ * the driver may read this config while it's creating its * rx-queues.
+ * WRITE_ONCE() here to match the READ_ONCE() in the driver.
+ */
+ WRITE_ONCE(rxq->mp_params.mp_priv, binding);
+
+ err = netdev_rx_queue_restart(dev, rxq_idx);
+ if (err)
+ goto err_xa_erase;
+
+ return 0;
+
+err_xa_erase:
+ WRITE_ONCE(rxq->mp_params.mp_priv, NULL);
+ xa_erase(&binding->bound_rxq_list, xa_idx);
+
+ return err;
+}
+
+int net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct net_devmem_dmabuf_binding **out)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ static u32 id_alloc_next;
+ struct scatterlist *sg;
+ struct dma_buf *dmabuf;
+ unsigned int sg_idx, i;
+ unsigned long virtual;
+ int err;
+
+ dmabuf = dma_buf_get(dmabuf_fd);
+ if (IS_ERR(dmabuf))
+ return -EBADFD;
nit: I think error pointers are nicer than **out parameters :(
you can ERR_CAST() all the DMABUF errors
+ binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
+ dev_to_node(&dev->dev));
+ if (!binding) {
+ err = -ENOMEM;
+ goto err_put_dmabuf;
+ }
+
+ binding->dev = dev;
+
+ err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
+ binding, xa_limit_32b, &id_alloc_next,
+ GFP_KERNEL);
+ if (err < 0)
+ goto err_free_binding;
+
+ xa_init_flags(&binding->bound_rxq_list, XA_FLAGS_ALLOC);
+
+ refcount_set(&binding->ref, 1);
+
+ binding->dmabuf = dmabuf;
+
+ binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
+ if (IS_ERR(binding->attachment)) {
+ err = PTR_ERR(binding->attachment);
+ goto err_free_id;
+ }-/* Stub */
int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
{
- return 0;
+ struct nlattr *tb[ARRAY_SIZE(netdev_queue_dmabuf_nl_policy)];
+ struct net_devmem_dmabuf_binding *out_binding;
+ struct list_head *sock_binding_list;
+ u32 ifindex, dmabuf_fd, rxq_idx;
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ struct nlattr *attr;
+ int rem, err = 0;
+ void *hdr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_DMABUF_FD) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_QUEUES))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
+ dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_BIND_DMABUF_DMABUF_FD]);
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (!netdev) {|| !netif_device_present(netdev)
+ err = -ENODEV;
+ goto err_unlock;
+ }
+
+ err = net_devmem_bind_dmabuf(netdev, dmabuf_fd, &out_binding);
+ if (err)
+ goto err_unlock;
+
+ nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+
+ if (nla_type(attr) != NETDEV_A_BIND_DMABUF_QUEUES)
+ continue;nit: nla_for_each_attr_type()
+ err = nla_parse_nested(
+ tb, ARRAY_SIZE(netdev_queue_dmabuf_nl_policy) - 1, attr,
+ netdev_queue_dmabuf_nl_policy, info->extack);
+ if (err < 0)
+ goto err_unbind;
+
+ rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_DMABUF_IDX]);
+
+ err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx,
+ out_binding);
+ if (err)
+ goto err_unbind;
+ }
+
+ sock_binding_list = genl_sk_priv_get(&netdev_nl_family,
+ NETLINK_CB(skb).sk);
+ if (IS_ERR(sock_binding_list)) {
+ err = PTR_ERR(sock_binding_list);
+ goto err_unbind;
+ }
+
+ list_add(&out_binding->list, sock_binding_list);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp) {
+ err = -ENOMEM;
+ goto err_unbind;
+ }
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_genlmsg_free;
+ }I'd move genl_sk_priv_get(), genlmsg_new() and genlmsg_iput() before we take rtnl_lock(), but I admit it's a bit late for this sort of feedback.. :)
+ nla_put_u32(rsp, NETDEV_A_BIND_DMABUF_DMABUF_ID, out_binding->id); + genlmsg_end(rsp, hdr); + + rtnl_unlock(); + + return genlmsg_reply(rsp, info); + +err_genlmsg_free: + nlmsg_free(rsp); +err_unbind: + net_devmem_unbind_dmabuf(out_binding); +err_unlock: + rtnl_unlock(); + return err; }