RE: [EXTERNAL] Re: [PATCH 2/2] PCI: hv: Support for Hyper-V vPCI for ARM64

From: Sunil Muthuswamy <hidden>
Date: 2021-10-07 23:41:28
Also in: linux-hyperv, linux-pci

On Mon, September 13, 2021 12:03 PM
Marc Zyngier [off-list ref] wrote:

quoted

--- /dev/null
+++ b/arch/arm64/hyperv/hv_pci.c

Nit: this is definitely the wrong location. There isn't anything arm64
specific here that warrants hiding it away. Like most other bizarre
MSI implementation, it should either live in drivers/pci or in
drivers/irqchip.

Thanks. I am moving all of this to drivers/pci/controller in v2.

quoted

@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Architecture specific vector management for the Hyper-V vPCI.
+ *
+ * Copyright (C) 2018, Microsoft, Inc.
+ *
+ * Author : Sunil Muthuswamy <sunilmut@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as

published

quoted

+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD

TITLE or

quoted

+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.

What is the point of this if you have the SPDX tag?

Will be fixed in V2

quoted

+/*
+ * SPI vectors to use for vPCI; arch SPIs range is [32, 1019], but leaving a bit
+ * of room at the start to allow for SPIs to be specified through ACPI.
+ */
+#define HV_PCI_MSI_SPI_START	50

If that's the start, it has a good chance of being the wrong
start. Given that the HyperV PCI controller advertises Multi-MSI
support, INTID 50 cannot be used for any device that requires more
than 2 vectors.

Moved to a power of 2, in v2. More comments below.

quoted

+#define HV_PCI_MSI_SPI_NR	(1020 - HV_PCI_MSI_SPI_START)
+
+struct hv_pci_chip_data {
+	spinlock_t lock;

Why a spinlock? Either this can be used in interrupt context, and we
require a raw_spinlock_t instead, or it never is used in interrupt
context and should be a good old mutex.

Good call. Upon reviewing the requirements again, I believe we can get
away with just a mutex.

quoted

+	DECLARE_BITMAP(bm, HV_PCI_MSI_SPI_NR);
+};
+
+/* Hyper-V vPCI MSI GIC IRQ domain */
+static struct irq_domain *hv_msi_gic_irq_domain;
+
+static struct irq_chip hv_msi_irq_chip = {
+	.name = "Hyper-V ARM64 PCI MSI",

That's a mouthful! How about "MSI" instead?

Will be addressed in V2.

quoted

+	.irq_set_affinity = irq_chip_set_affinity_parent,
+	.irq_eoi = irq_chip_eoi_parent,
+	.irq_mask = irq_chip_mask_parent,
+	.irq_unmask = irq_chip_unmask_parent
+};
+
+/**
+ * Frees the specified number of interrupts.
+ * @domain: The IRQ domain
+ * @virq: The virtual IRQ number.
+ * @nr_irqs: Number of IRQ's to free.
+ */
+static void hv_pci_vec_irq_domain_free(struct irq_domain *domain,
+				       unsigned int virq, unsigned int nr_irqs)
+{
+	struct hv_pci_chip_data *chip_data = domain->host_data;
+	unsigned long flags;
+	unsigned int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *irqd = irq_domain_get_irq_data(domain,
+								virq + i);
+
+		spin_lock_irqsave(&chip_data->lock, flags);
+		clear_bit(irqd->hwirq - HV_PCI_MSI_SPI_START, chip_data-
bm);
+		spin_unlock_irqrestore(&chip_data->lock, flags);

Really? Why should you disable interrupts here? Why do you need to
lock/unlock on each iteration of this loop?

Good call. In v2, I am moving to using bitmap region to satisfy Multi-MSI
requirements and that should also take care of this.

quoted

+		irq_domain_reset_irq_data(irqd);
+	}
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+/**
+ * Allocate an interrupt from the domain.
+ * @hwirq: Will be set to the allocated H/W IRQ.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_alloc_device_irq(struct irq_domain *domain,
+				unsigned int virq, irq_hw_number_t *hwirq)
+{
+	struct hv_pci_chip_data *chip_data = domain->host_data;
+	unsigned long flags;
+	unsigned int index;
+
+	spin_lock_irqsave(&chip_data->lock, flags);
+	index = find_first_zero_bit(chip_data->bm, HV_PCI_MSI_SPI_NR);
+	if (index == HV_PCI_MSI_SPI_NR) {
+		spin_unlock_irqrestore(&chip_data->lock, flags);
+		pr_err("No more free IRQ vector available\n");

No, we don't shout because we're out of MSIs. It happens, and drivers
can nicely use less vectors if needed.

But more importantly, this is totally breaks MultiMSI, see below.

'pr_err' removed in v2 and more comments below on Mult-MSI.

quoted

+		return -ENOSPC;
+	}
+
+	set_bit(index, chip_data->bm);
+	spin_unlock_irqrestore(&chip_data->lock, flags);
+	*hwirq = index + HV_PCI_MSI_SPI_START;
+
+	return 0;
+}
+
+/**
+ * Allocate an interrupt from the parent GIC domain.
+ * @domain: The IRQ domain.
+ * @virq: The virtual IRQ number.
+ * @hwirq: The H/W IRQ number that needs to be allocated.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_irq_gic_domain_alloc(struct irq_domain *domain,
+					   unsigned int virq,
+					   irq_hw_number_t hwirq)
+{
+	struct irq_fwspec fwspec;
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 2;
+	fwspec.param[0] = hwirq;
+	fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+	return irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+}
+
+/**
+ * Allocate specified number of interrupts from the domain.
+ * @domain: The IRQ domain.
+ * @virq: The starting virtual IRQ number.
+ * @nr_irqs: Number of IRQ's to allocate.
+ * @args: The MSI alloc information.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_irq_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq, unsigned int nr_irqs,
+				       void *args)
+{
+	irq_hw_number_t hwirq;
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = hv_pci_vec_alloc_device_irq(domain, virq, &hwirq);
+		if (ret)
+			goto free_irq;
+
+		ret = hv_pci_vec_irq_gic_domain_alloc(domain, virq + i,

hwirq);

Please read the specification for PCI MultiMSI. You offer none of the
alignment and contiguity guarantees that are required.

Good call on Multi-MSI and thank you! I am looking to address this in
v2. But, the 'MSI_FLAG_MULTI_PCI_MSI' flag that we set today in 
Hyper-V vPCI, even for x64 seems wrong and broken. We only allocate
one vector at a time from the Hypervisor. That's not going to work with
Multi-MSI. See 'vector_count' in 'hv_compose_msi_req_v2'.
Nevertheless, I do agree with you that if we are implementing something
new, we should be able to at least keep that clean. The Hyper-V vPCI
bug can be addressed separately.

quoted

+		if (ret)
+			goto free_irq;
+
+		ret = irq_domain_set_hwirq_and_chip(domain, virq + i,
+				hwirq, &hv_msi_irq_chip,
+				domain->host_data);
+		if (ret)
+			goto free_irq;
+
+

	irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq +
i)));

Why? The GIC is responsible for the distribution, not the MSI layer.
This looks completely bogus.

Thanks. Will be removed in v2.

quoted

+		pr_debug("pID:%d vID:%u\n", (int)hwirq, virq + i);
+	}
+
+	return 0;
+
+free_irq:
+	if (i > 0)
+		hv_pci_vec_irq_domain_free(domain, virq, i - 1);
+
+	return ret;
+}
+
+/**
+ * Activate the interrupt.
+ * @domain: The IRQ domain.
+ * @irqd: IRQ data.
+ * @reserve: Indicates whether the IRQ's can be reserved.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+static int hv_pci_vec_irq_domain_activate(struct irq_domain *domain,
+					  struct irq_data *irqd, bool reserve)
+{
+	/* All available online CPUs are available for targeting */
+	irq_data_update_effective_affinity(irqd, cpu_online_mask);

Which completely contradicts what you have written above, and doesn't
match what the GIC does either.

We will need to still support this as when Hyper-V vPCI composes the MSI
message (' hv_compose_msi_req_get_cpu'), it will pick the first available CPU
from online cpu mask.

quoted

+	return 0;
+}
+
+static const struct irq_domain_ops hv_pci_domain_ops = {
+	.alloc	= hv_pci_vec_irq_domain_alloc,
+	.free	= hv_pci_vec_irq_domain_free,
+	.activate = hv_pci_vec_irq_domain_activate,
+};
+
+
+/**
+ * This routine performs the architecture specific initialization for vector
+ * domain to operate. It allocates an IRQ domain tree as a child of the GIC
+ * IRQ domain.
+ *
+ * Return: 0 on success and error value on failure.
+ */
+int hv_pci_vector_init(void)

Why isn't this static?

Thanks. This is getting rearranged in v2.

quoted

+{
+	static struct hv_pci_chip_data *chip_data;
+	struct fwnode_handle *fn = NULL;
+	int ret = -ENOMEM;
+
+	chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL);
+	if (!chip_data)
+		return ret;
+
+	spin_lock_init(&chip_data->lock);
+	fn = irq_domain_alloc_named_fwnode("Hyper-V ARM64 vPCI");
+	if (!fn)
+		goto free_chip;
+
+	hv_msi_gic_irq_domain = acpi_irq_create_hierarchy(0,

HV_PCI_MSI_SPI_NR,

quoted

+					fn, &hv_pci_domain_ops, chip_data);
+
+	if (!hv_msi_gic_irq_domain) {
+		pr_err("Failed to create Hyper-V ARMV vPCI MSI IRQ

domain\n");

quoted

+		goto free_chip;
+	}
+
+	return 0;
+
+free_chip:
+	kfree(chip_data);
+	if (fn)
+		irq_domain_free_fwnode(fn);
+
+	return ret;
+}
+
+/* This routine performs the cleanup for the IRQ domain. */
+void hv_pci_vector_free(void)

Why isn't this static?

Thanks. This is getting rearranged in v2.

quoted

+{
+	static struct hv_pci_chip_data *chip_data;
+
+	if (!hv_msi_gic_irq_domain)
+		return;
+
+	/* Host data cannot be null if the domain was created successfully */
+	chip_data = hv_msi_gic_irq_domain->host_data;
+	irq_domain_remove(hv_msi_gic_irq_domain);
+	hv_msi_gic_irq_domain = NULL;
+	kfree(chip_data);
+}
+
+/* Performs the architecture specific initialization for Hyper-V vPCI. */
+int hv_pci_arch_init(void)
+{
+	return hv_pci_vector_init();
+}
+EXPORT_SYMBOL_GPL(hv_pci_arch_init);
+
+/* Architecture specific cleanup for Hyper-V vPCI. */
+void hv_pci_arch_free(void)
+{
+	hv_pci_vector_free();
+}
+EXPORT_SYMBOL_GPL(hv_pci_arch_free);
+
+struct irq_domain *hv_msi_parent_vector_domain(void)
+{
+	return hv_msi_gic_irq_domain;
+}
+EXPORT_SYMBOL_GPL(hv_msi_parent_vector_domain);
+
+unsigned int hv_msi_get_int_vector(struct irq_data *irqd)
+{
+	irqd = irq_domain_get_irq_data(hv_msi_gic_irq_domain, irqd->irq);
+
+	return irqd->hwirq;
+}
+EXPORT_SYMBOL_GPL(hv_msi_get_int_vector);

I fail to understand why this is all exported instead of being part of
the HyperV PCI module.

Thanks. Yes, this will all become part of the Hyper-V vPCI module in v2
with the code rearrangement.

quoted

diff --git a/arch/arm64/include/asm/hyperv-tlfs.h

b/arch/arm64/include/asm/hyperv-tlfs.h

quoted

index 4d964a7f02ee..bc6c7ac934a1 100644

--- a/arch/arm64/include/asm/hyperv-tlfs.h
+++ b/arch/arm64/include/asm/hyperv-tlfs.h

@@ -64,6 +64,15 @@
 #define HV_REGISTER_STIMER0_CONFIG	0x000B0000
 #define HV_REGISTER_STIMER0_COUNT	0x000B0001

+union hv_msi_entry {
+	u64 as_uint64[2];
+	struct {
+		u64 address;
+		u32 data;
+		u32 reserved;
+	} __packed;
+};
+
 #include <asm-generic/hyperv-tlfs.h>

 #endif

diff --git a/arch/arm64/include/asm/mshyperv.h

b/arch/arm64/include/asm/mshyperv.h

quoted

index 20070a847304..68bc1617707b 100644

--- a/arch/arm64/include/asm/mshyperv.h
+++ b/arch/arm64/include/asm/mshyperv.h

@@ -20,6 +20,8 @@

 #include <linux/types.h>
 #include <linux/arm-smccc.h>
+#include <linux/interrupt.h>
+#include <linux/msi.h>
 #include <asm/hyperv-tlfs.h>

 /*

@@ -49,6 +51,30 @@ static inline u64 hv_get_register(unsigned int reg)
 				ARM_SMCCC_OWNER_VENDOR_HYP,	\
 				HV_SMCCC_FUNC_NUMBER)

+#define hv_msi_handler			NULL
+#define hv_msi_handler_name		NULL
+#define hv_msi_irq_delivery_mode	0
+#define hv_msi_prepare NULL
+
+int hv_pci_arch_init(void);
+void hv_pci_arch_free(void);
+struct irq_domain *hv_msi_parent_vector_domain(void);
+unsigned int hv_msi_get_int_vector(struct irq_data *data);
+static inline irq_hw_number_t
+hv_msi_domain_ops_get_hwirq(struct msi_domain_info *info,
+			    msi_alloc_info_t *arg)
+{
+	return arg->hwirq;
+}
+
+static inline void hv_set_msi_entry_from_desc(union hv_msi_entry

*msi_entry,

quoted

+					      struct msi_desc *msi_desc)
+{
+	msi_entry->address = ((u64)msi_desc->msg.address_hi << 32) |
+			      msi_desc->msg.address_lo;
+	msi_entry->data = msi_desc->msg.data;
+}

Why do we need any of this? Why inline? Please explain what you are
trying to achieve here.

This is because the 'hv_msi_entry' structure is defined differently by
the Hyper-V for x64 and arm64 (x64 doesn't has the high part of address).
And, so this is just to handle that difference.

Appreciate all of your inputs. v2 is coming up.

- Sunil

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help