Thread (54 messages) 54 messages, 5 authors, 2016-07-12

Re: [PATCH 07/13] pci: Provide sensible irq vector alloc/free routines

From: Alexander Gordeev <hidden>
Date: 2016-06-23 11:16:17
Also in: linux-nvme, linux-pci, lkml

On Tue, Jun 14, 2016 at 09:59:00PM +0200, Christoph Hellwig wrote:
Add a helper to allocate a range of interrupt vectors, which will
transparently use MSI-X and MSI if available or fallback to legacy
vectors.  The interrupts are available in a core managed array
in the pci_dev structure, and can also be released using a similar
helper.

The next patch will also add automatic spreading of MSI / MSI-X
vectors to this function.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/pci/msi.c   | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  18 +++++++++
New APIs should be documented in Documentation/PCI/MSI-HOWTO.txt, I guess.
quoted hunk ↗ jump to hunk
 2 files changed, 128 insertions(+)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index a080f44..a33adec 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2003-2004 Intel
  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
+ * Copyright (c) 2016 Christoph Hellwig.
  */
 
 #include <linux/err.h>
@@ -1120,6 +1121,115 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
 
+static unsigned int pci_nr_irq_vectors(struct pci_dev *pdev)
+{
+	int nr_entries;
+
+	nr_entries = pci_msix_vec_count(pdev);
+	if (nr_entries <= 0 && pci_msi_supported(pdev, 1))
+		nr_entries = pci_msi_vec_count(pdev);
+	if (nr_entries <= 0)
+		nr_entries = 1;
+	return nr_entries;
+}
This function is strange, because it:
  (a) does not consider PCI_IRQ_NOMSIX flag;
  (b) only calls pci_msi_supported() for MSI case;
  (c) calls pci_msi_supported() with just one vector;
  (d) might return suboptimal number of vectors (number of MSI-X used 
      later for MSI or vice versa)

Overall, I would suggest simply return maximum between MSI-X and MSI
numbers and let the rest of the code (i.e the two range functions)
handle a-d.
+static int pci_enable_msix_range_wrapper(struct pci_dev *pdev, u32 *irqs,
+		unsigned int min_vecs, unsigned int max_vecs)
+{
+	struct msix_entry *msix_entries;
+	int vecs, i;
+
+	msix_entries = kcalloc(max_vecs, sizeof(struct msix_entry), GFP_KERNEL);
+	if (!msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < max_vecs; i++)
+		msix_entries[i].entry = i;
+
+	vecs = pci_enable_msix_range(pdev, msix_entries, min_vecs, max_vecs);
+	if (vecs > 0) {
This condition check is unneeded.
+		for (i = 0; i < vecs; i++)
+			irqs[i] = msix_entries[i].vector;
+	}
+
+	kfree(msix_entries);
+	return vecs;
+}
+
+/**
+ * pci_alloc_irq_vectors - allocate multiple IRQs for a device
+ * @dev:		PCI device to operate on
+ * @min_vecs:		minimum number of vectors required (must be >= 1)
+ * @max_vecs:		maximum (desired) number of vectors
+ * @flags:		flags or quirks for the allocation
+ *
+ * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
+ * vectors if available, and fall back to a single legacy vector
+ * if neither is available.  Return the number of vectors allocated,
+ * (which might be smaller than @max_vecs) if successful, or a negative
+ * error code on error.  The Linux irq numbers for the allocated
+ * vectors are stored in pdev->irqs.  If less than @min_vecs interrupt
+ * vectors are available for @dev the function will fail with -ENOSPC.
+ */
+int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+		unsigned int max_vecs, unsigned int flags)
+{
+	unsigned int vecs, i;
+	u32 *irqs;
+
+	max_vecs = min(max_vecs, pci_nr_irq_vectors(dev));
Optionally, you could move this assignment to  pci_nr_irq_vectors() and
simply let it handle number of vectors to request.
+	irqs = kcalloc(max_vecs, sizeof(u32), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	if (!(flags & PCI_IRQ_NOMSIX)) {
+		vecs = pci_enable_msix_range_wrapper(dev, irqs, min_vecs,
+				max_vecs);
+		if (vecs > 0)
+			goto done;
+	}
+
+	vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
+	if (vecs > 0) {
+		for (i = 0; i < vecs; i++)
+			irqs[i] = dev->irq + i;
+		goto done;
+	}
+
+	if (min_vecs > 1)
+		return -ENOSPC;
irqs is leaked if (min_vecs > 1)

You can get rid of this check at all if you reorganize your code i.e.
like this:

	...

	vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
	if (vecs < 0)
		goto legacy;

	for (i = 0; i < vecs; i++)
		irqs[i] = dev->irq + i;

done:
	...


legacy:
	...
+
+	/* use legacy irq */
+	kfree(irqs);
+	dev->irqs = &dev->irq;
+	return 1;
+
+done:
+	dev->irqs = irqs;
+	return vecs;
+}
+EXPORT_SYMBOL(pci_alloc_irq_vectors);
+
+/**
+ * pci_free_irq_vectors - free previously allocated IRQs for a device
+ * @dev:		PCI device to operate on
+ *
+ * Undoes the allocations and enabling in pci_alloc_irq_vectors().
+ */
+void pci_free_irq_vectors(struct pci_dev *dev)
+{
+	if (dev->msix_enabled)
+		pci_disable_msix(dev);
+	else if (dev->msi_enabled)
+		pci_disable_msi(dev);
The checks are probably redundant or incomplete. Redundant - because
pci_disable_msi()/pci_disable_msix() do it anyways:

	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

Incomplete - because the two other conditions are not checked.
+	if (dev->irqs != &dev->irq)
+		kfree(dev->irqs);
Unset dev->irqs?

BTW, since (dev->irqs == &dev->irq) effectively checks if MSI/MSI-X
was enabled this function could bail out in case they did not.
quoted hunk ↗ jump to hunk
+}
+EXPORT_SYMBOL(pci_free_irq_vectors);
+
+
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
 {
 	return to_pci_dev(desc->dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b67e4df..84a20fc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -320,6 +320,7 @@ struct pci_dev {
 	 * directly, use the values stored here. They might be different!
 	 */
 	unsigned int	irq;
+	unsigned int	*irqs;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 
 	bool match_driver;		/* Skip attaching driver */
@@ -1237,6 +1238,8 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
 
+#define PCI_IRQ_NOMSIX		(1 << 0) /* don't try to use MSI-X interrupts */
BTW, why PCI_IRQ_NOMSIX only and no PCI_IRQ_NOMSI?
quoted hunk ↗ jump to hunk
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include <linux/pci-dma.h>
@@ -1284,6 +1287,9 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
 		return rc;
 	return 0;
 }
+int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+		unsigned int max_vecs, unsigned int flags);
+void pci_free_irq_vectors(struct pci_dev *dev);
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
 static inline void pci_msi_shutdown(struct pci_dev *dev) { }
@@ -1307,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev *dev,
 static inline int pci_enable_msix_exact(struct pci_dev *dev,
 		      struct msix_entry *entries, int nvec)
 { return -ENOSYS; }
+static inline int pci_alloc_irq_vectors(struct pci_dev *dev,
+		unsigned int min_vecs, unsigned int max_vecs,
+		unsigned int flags)
+{
+	if (min_vecs > 1)
+		return -ENOSPC;
+	dev->irqs = &dev->irq;
+	return 1;
+}
+static inline void pci_free_irq_vectors(struct pci_dev *dev)
+{
Unset dev->irqs?
+}
 #endif
 
 #ifdef CONFIG_PCIEPORTBUS
-- 
2.1.4
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help