Re: [PATCH 2/2] PCI: hv: Support for Hyper-V vPCI for ARM64
From: Marc Zyngier <maz@kernel.org>
Date: 2021-09-13 19:02:50
Also in:
linux-hyperv, linux-pci
Sunil, On Mon, 13 Sep 2021 18:37:22 +0100, Sunil Muthuswamy [off-list ref] wrote:
This patch adds support for Hyper-V vPCI by adding a PCI MSI IRQ domain specific to Hyper-V that is based on SPIs. The IRQ domain parents itself to the arch GIC IRQ domain for basic vector management.
Given that we literally spent *weeks* discussing this, I would have appreciated if you had Cc'd me directly instead as a basic courtesy rather than me spotting it on the list.
quoted hunk ↗ jump to hunk
Signed-off-by: Sunil Muthuswamy <redacted> --- arch/arm64/hyperv/Makefile | 2 +- arch/arm64/hyperv/hv_pci.c | 275 +++++++++++++++++++++++++++ arch/arm64/include/asm/hyperv-tlfs.h | 9 + arch/arm64/include/asm/mshyperv.h | 26 +++ drivers/pci/Kconfig | 2 +- drivers/pci/controller/Kconfig | 2 +- drivers/pci/controller/pci-hyperv.c | 5 + 7 files changed, 318 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/hyperv/hv_pci.cdiff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile index 87c31c001da9..af7a66e43ef4 100644 --- a/arch/arm64/hyperv/Makefile +++ b/arch/arm64/hyperv/Makefile@@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := hv_core.o mshyperv.o +obj-y := hv_core.o mshyperv.o hv_pci.odiff --git a/arch/arm64/hyperv/hv_pci.c b/arch/arm64/hyperv/hv_pci.c new file mode 100644 index 000000000000..06179e4a6a2d --- /dev/null +++ b/arch/arm64/hyperv/hv_pci.c
Nit: this is definitely the wrong location. There isn't anything arm64 specific here that warrants hiding it away. Like most other bizarre MSI implementation, it should either live in drivers/pci or in drivers/irqchip.
quoted hunk ↗ jump to hunk
@@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Architecture specific vector management for the Hyper-V vPCI. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : Sunil Muthuswamy <sunilmut@microsoft.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details.
What is the point of this if you have the SPDX tag?
+ */ + +#include <asm/mshyperv.h> +#include <linux/acpi.h> +#include <linux/irqdomain.h> +#include <linux/irq.h> +#include <acpi/acpi_bus.h> + +/* + * SPI vectors to use for vPCI; arch SPIs range is [32, 1019], but leaving a bit + * of room at the start to allow for SPIs to be specified through ACPI. + */ +#define HV_PCI_MSI_SPI_START 50
If that's the start, it has a good chance of being the wrong start. Given that the HyperV PCI controller advertises Multi-MSI support, INTID 50 cannot be used for any device that requires more than 2 vectors.
+#define HV_PCI_MSI_SPI_NR (1020 - HV_PCI_MSI_SPI_START)
+
+struct hv_pci_chip_data {
+ spinlock_t lock;Why a spinlock? Either this can be used in interrupt context, and we require a raw_spinlock_t instead, or it never is used in interrupt context and should be a good old mutex.
+ DECLARE_BITMAP(bm, HV_PCI_MSI_SPI_NR);
+};
+
+/* Hyper-V vPCI MSI GIC IRQ domain */
+static struct irq_domain *hv_msi_gic_irq_domain;
+
+static struct irq_chip hv_msi_irq_chip = {
+ .name = "Hyper-V ARM64 PCI MSI",That's a mouthful! How about "MSI" instead?
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent
+};
+
+/**
+ * Frees the specified number of interrupts.
+ * @domain: The IRQ domain
+ * @virq: The virtual IRQ number.
+ * @nr_irqs: Number of IRQ's to free.
+ */
+static void hv_pci_vec_irq_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct hv_pci_chip_data *chip_data = domain->host_data;
+ unsigned long flags;
+ unsigned int i;
+
+ for (i = 0; i < nr_irqs; i++) {
+ struct irq_data *irqd = irq_domain_get_irq_data(domain,
+ virq + i);
+
+ spin_lock_irqsave(&chip_data->lock, flags);
+ clear_bit(irqd->hwirq - HV_PCI_MSI_SPI_START, chip_data->bm);
+ spin_unlock_irqrestore(&chip_data->lock, flags);Really? Why should you disable interrupts here? Why do you need to lock/unlock on each iteration of this loop?
+ irq_domain_reset_irq_data(irqd);
+ }
+
+ irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+/**
+ * Allocate an interrupt from the domain.
+ * @hwirq: Will be set to the allocated H/W IRQ.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_alloc_device_irq(struct irq_domain *domain,
+ unsigned int virq, irq_hw_number_t *hwirq)
+{
+ struct hv_pci_chip_data *chip_data = domain->host_data;
+ unsigned long flags;
+ unsigned int index;
+
+ spin_lock_irqsave(&chip_data->lock, flags);
+ index = find_first_zero_bit(chip_data->bm, HV_PCI_MSI_SPI_NR);
+ if (index == HV_PCI_MSI_SPI_NR) {
+ spin_unlock_irqrestore(&chip_data->lock, flags);
+ pr_err("No more free IRQ vector available\n");No, we don't shout because we're out of MSIs. It happens, and drivers can nicely use less vectors if needed. But more importantly, this is totally breaks MultiMSI, see below.
+ return -ENOSPC;
+ }
+
+ set_bit(index, chip_data->bm);
+ spin_unlock_irqrestore(&chip_data->lock, flags);
+ *hwirq = index + HV_PCI_MSI_SPI_START;
+
+ return 0;
+}
+
+/**
+ * Allocate an interrupt from the parent GIC domain.
+ * @domain: The IRQ domain.
+ * @virq: The virtual IRQ number.
+ * @hwirq: The H/W IRQ number that needs to be allocated.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_irq_gic_domain_alloc(struct irq_domain *domain,
+ unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct irq_fwspec fwspec;
+
+ fwspec.fwnode = domain->parent->fwnode;
+ fwspec.param_count = 2;
+ fwspec.param[0] = hwirq;
+ fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ return irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+}
+
+/**
+ * Allocate specified number of interrupts from the domain.
+ * @domain: The IRQ domain.
+ * @virq: The starting virtual IRQ number.
+ * @nr_irqs: Number of IRQ's to allocate.
+ * @args: The MSI alloc information.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_irq_domain_alloc(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs,
+ void *args)
+{
+ irq_hw_number_t hwirq;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = hv_pci_vec_alloc_device_irq(domain, virq, &hwirq);
+ if (ret)
+ goto free_irq;
+
+ ret = hv_pci_vec_irq_gic_domain_alloc(domain, virq + i, hwirq);Please read the specification for PCI MultiMSI. You offer none of the alignment and contiguity guarantees that are required.
+ if (ret) + goto free_irq; + + ret = irq_domain_set_hwirq_and_chip(domain, virq + i, + hwirq, &hv_msi_irq_chip, + domain->host_data); + if (ret) + goto free_irq; + + irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i)));
Why? The GIC is responsible for the distribution, not the MSI layer. This looks completely bogus.
+ pr_debug("pID:%d vID:%u\n", (int)hwirq, virq + i);
+ }
+
+ return 0;
+
+free_irq:
+ if (i > 0)
+ hv_pci_vec_irq_domain_free(domain, virq, i - 1);
+
+ return ret;
+}
+
+/**
+ * Activate the interrupt.
+ * @domain: The IRQ domain.
+ * @irqd: IRQ data.
+ * @reserve: Indicates whether the IRQ's can be reserved.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_irq_domain_activate(struct irq_domain *domain,
+ struct irq_data *irqd, bool reserve)
+{
+ /* All available online CPUs are available for targeting */
+ irq_data_update_effective_affinity(irqd, cpu_online_mask);Which completely contradicts what you have written above, and doesn't match what the GIC does either.
+ return 0;
+}
+
+static const struct irq_domain_ops hv_pci_domain_ops = {
+ .alloc = hv_pci_vec_irq_domain_alloc,
+ .free = hv_pci_vec_irq_domain_free,
+ .activate = hv_pci_vec_irq_domain_activate,
+};
+
+
+/**
+ * This routine performs the architecture specific initialization for vector
+ * domain to operate. It allocates an IRQ domain tree as a child of the GIC
+ * IRQ domain.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+int hv_pci_vector_init(void)Why isn't this static?
+{
+ static struct hv_pci_chip_data *chip_data;
+ struct fwnode_handle *fn = NULL;
+ int ret = -ENOMEM;
+
+ chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL);
+ if (!chip_data)
+ return ret;
+
+ spin_lock_init(&chip_data->lock);
+ fn = irq_domain_alloc_named_fwnode("Hyper-V ARM64 vPCI");
+ if (!fn)
+ goto free_chip;
+
+ hv_msi_gic_irq_domain = acpi_irq_create_hierarchy(0, HV_PCI_MSI_SPI_NR,
+ fn, &hv_pci_domain_ops, chip_data);
+
+ if (!hv_msi_gic_irq_domain) {
+ pr_err("Failed to create Hyper-V ARMV vPCI MSI IRQ domain\n");
+ goto free_chip;
+ }
+
+ return 0;
+
+free_chip:
+ kfree(chip_data);
+ if (fn)
+ irq_domain_free_fwnode(fn);
+
+ return ret;
+}
+
+/* This routine performs the cleanup for the IRQ domain. */
+void hv_pci_vector_free(void)Why isn't this static?
+{
+ static struct hv_pci_chip_data *chip_data;
+
+ if (!hv_msi_gic_irq_domain)
+ return;
+
+ /* Host data cannot be null if the domain was created successfully */
+ chip_data = hv_msi_gic_irq_domain->host_data;
+ irq_domain_remove(hv_msi_gic_irq_domain);
+ hv_msi_gic_irq_domain = NULL;
+ kfree(chip_data);
+}
+
+/* Performs the architecture specific initialization for Hyper-V vPCI. */
+int hv_pci_arch_init(void)
+{
+ return hv_pci_vector_init();
+}
+EXPORT_SYMBOL_GPL(hv_pci_arch_init);
+
+/* Architecture specific cleanup for Hyper-V vPCI. */
+void hv_pci_arch_free(void)
+{
+ hv_pci_vector_free();
+}
+EXPORT_SYMBOL_GPL(hv_pci_arch_free);
+
+struct irq_domain *hv_msi_parent_vector_domain(void)
+{
+ return hv_msi_gic_irq_domain;
+}
+EXPORT_SYMBOL_GPL(hv_msi_parent_vector_domain);
+
+unsigned int hv_msi_get_int_vector(struct irq_data *irqd)
+{
+ irqd = irq_domain_get_irq_data(hv_msi_gic_irq_domain, irqd->irq);
+
+ return irqd->hwirq;
+}
+EXPORT_SYMBOL_GPL(hv_msi_get_int_vector);I fail to understand why this is all exported instead of being part of the HyperV PCI module.
quoted hunk ↗ jump to hunk
diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h index 4d964a7f02ee..bc6c7ac934a1 100644 --- a/arch/arm64/include/asm/hyperv-tlfs.h +++ b/arch/arm64/include/asm/hyperv-tlfs.h@@ -64,6 +64,15 @@ #define HV_REGISTER_STIMER0_CONFIG 0x000B0000 #define HV_REGISTER_STIMER0_COUNT 0x000B0001 +union hv_msi_entry { + u64 as_uint64[2]; + struct { + u64 address; + u32 data; + u32 reserved; + } __packed; +}; + #include <asm-generic/hyperv-tlfs.h> #endifdiff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h index 20070a847304..68bc1617707b 100644 --- a/arch/arm64/include/asm/mshyperv.h +++ b/arch/arm64/include/asm/mshyperv.h@@ -20,6 +20,8 @@ #include <linux/types.h> #include <linux/arm-smccc.h> +#include <linux/interrupt.h> +#include <linux/msi.h> #include <asm/hyperv-tlfs.h> /*@@ -49,6 +51,30 @@ static inline u64 hv_get_register(unsigned int reg) ARM_SMCCC_OWNER_VENDOR_HYP, \ HV_SMCCC_FUNC_NUMBER) +#define hv_msi_handler NULL +#define hv_msi_handler_name NULL +#define hv_msi_irq_delivery_mode 0 +#define hv_msi_prepare NULL + +int hv_pci_arch_init(void); +void hv_pci_arch_free(void); +struct irq_domain *hv_msi_parent_vector_domain(void); +unsigned int hv_msi_get_int_vector(struct irq_data *data); +static inline irq_hw_number_t +hv_msi_domain_ops_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return arg->hwirq; +} + +static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, + struct msi_desc *msi_desc) +{ + msi_entry->address = ((u64)msi_desc->msg.address_hi << 32) | + msi_desc->msg.address_lo; + msi_entry->data = msi_desc->msg.data; +}
Why do we need any of this? Why inline? Please explain what you are trying to achieve here. Thanks, M. -- Without deviation from the norm, progress is not possible.