Re: [PATCH v2 6/7] ibmvfc: register and use asynchronous sub-queue

From: Dave Marquardt <hidden>
Date: 2026-06-25 21:32:18
Also in: linux-scsi, lkml

Tyrel Datwyler [off-list ref] writes:

On 6/8/26 11:30 AM, Dave Marquardt via B4 Relay wrote:

quoted

From: Dave Marquardt <redacted>

Refactor existing code for async events into a common routine,
register a channel and interrupt handler for the asynchronous
sub-queue, and set capability bits to request that VIOS use the
asynchronous sub-queue.

Again, all your patches need Signed-off-by: tags to be accepted.

Also, there is still a lot of different things happening in this patch

You seem to be reworking the fpin_desc helpers which I'm not really seeing any
reason that you couldn't squash those changes into patch 2. It seems like
needless churn since, unless I'm missing something, there doesn't seem to be any
new definitions added since patch 2 just reworking of the input parameters such
that period goes away and is hardcoded and a new type paramerter is added.

Okay, I'll roll the ibmvfc_common_fpin_to_desc changes into patch 2.

I would also help reduce the patch size and make it more reviewable if you split
the the interrupt handler definition and queue registration into separate
patches as well.

quoted

---
 drivers/scsi/ibmvscsi/ibmvfc.c       | 376 +++++++++++++++++++++++++++--------
 drivers/scsi/ibmvscsi/ibmvfc.h       |   3 +
 drivers/scsi/ibmvscsi/ibmvfc_kunit.c |   2 +-
 3 files changed, 298 insertions(+), 83 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index ad1f5636e879..a2252cd2f44b 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c

@@ -1514,7 +1514,8 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
 	login_info->max_cmds = cpu_to_be32(max_cmds);
 	login_info->capabilities =
 		cpu_to_be64(IBMVFC_CAN_MIGRATE | IBMVFC_CAN_SEND_VF_WWPN |
-			    IBMVFC_CAN_USE_NOOP_CMD);
+			    IBMVFC_CAN_USE_NOOP_CMD | IBMVFC_YES_SCSI |
+			    IBMVFC_USE_ASYNC_SUBQ | IBMVFC_CAN_HANDLE_FPIN);
 
 	if (vhost->mq_enabled || vhost->using_channels)
 		login_info->capabilities |= cpu_to_be64(IBMVFC_CAN_USE_CHANNELS);

@@ -3240,8 +3241,8 @@ static size_t ibmvfc_fpin_size_helper(u8 fpin_status)
  * non-NULL - pointer to populated struct fc_els_fpin
  */
 static struct fc_els_fpin *
-ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
-			   __be32 period, __be32 threshold, __be32 event_count)
+ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 type, __be16 modifier,
+			   __be32 threshold, __be32 event_count)
 {

This function signature changes here, and looking at the implemenation below I'm
struggling to see why this couldn't just be all implmented immediatly in Patch #2.

quoted

 	struct fc_fn_peer_congn_desc *pdesc;
 	struct fc_fn_congn_desc *cdesc;

@@ -3253,7 +3254,7 @@ ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
 	if (size == 0)
 		return NULL;
 
-	fpin = kzalloc(size, GFP_KERNEL);
+	fpin = kzalloc(size, GFP_ATOMIC);

Why are we changing this to GFP_ATOMIC? Isn't this only ever called from work
queue context?

I've restored this for my next version of the patch series.

quoted

 	if (fpin == NULL)
 		return NULL;

@@ -3266,12 +3267,9 @@ ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
 		cdesc = (struct fc_fn_congn_desc *)fpin->fpin_desc;
 		cdesc->desc_tag = cpu_to_be32(ELS_DTAG_CONGESTION);
 		cdesc->desc_len = cpu_to_be32(FC_TLV_DESC_LENGTH_FROM_SZ(*cdesc));
-		if (fpin_status == IBMVFC_AE_FPIN_CONGESTION_CLEARED)
-			cdesc->event_type = cpu_to_be16(FPIN_CONGN_CLEAR);
-		else
-			cdesc->event_type = cpu_to_be16(FPIN_CONGN_DEVICE_SPEC);
+		cdesc->event_type = type;
 		cdesc->event_modifier = modifier;
-		cdesc->event_period = period;
+		cdesc->event_period = cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_PERIOD);
 		cdesc->severity = FPIN_CONGN_SEVERITY_WARNING;
 		break;
 	case IBMVFC_AE_FPIN_PORT_CONGESTED:
@@ -3281,12 +3279,9 @@ ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
 		pdesc = (struct fc_fn_peer_congn_desc *)fpin->fpin_desc;
 		pdesc->desc_tag = cpu_to_be32(ELS_DTAG_PEER_CONGEST);
 		pdesc->desc_len = cpu_to_be32(FC_TLV_DESC_LENGTH_FROM_SZ(*pdesc));
-		if (fpin_status == IBMVFC_AE_FPIN_PORT_CLEARED)
-			pdesc->event_type = cpu_to_be16(FPIN_CONGN_CLEAR);
-		else
-			pdesc->event_type = cpu_to_be16(FPIN_CONGN_DEVICE_SPEC);
+		pdesc->event_type = type;
 		pdesc->event_modifier = modifier;
-		pdesc->event_period = period;
+		pdesc->event_period = cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_PERIOD);
 		pdesc->detecting_wwpn = cpu_to_be64(0);
 		pdesc->attached_wwpn = wwpn;
 		pdesc->pname_count = cpu_to_be32(1);
@@ -3297,7 +3292,7 @@ ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
 		ldesc = (struct fc_fn_li_desc *)fpin->fpin_desc;
 		ldesc->desc_tag = cpu_to_be32(ELS_DTAG_LNK_INTEGRITY);
 		ldesc->desc_len = cpu_to_be32(FC_TLV_DESC_LENGTH_FROM_SZ(*ldesc));
-		ldesc->event_type = cpu_to_be16(FPIN_LI_UNKNOWN);
+		ldesc->event_type = type;
 		ldesc->event_modifier = modifier;
 		ldesc->event_threshold = threshold;
 		ldesc->event_count = event_count;
@@ -3331,9 +3326,47 @@ ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
 static struct fc_els_fpin *
 ibmvfc_basic_fpin_to_desc(struct ibmvfc_async_crq *crq, u64 wwpn)
 {
+	__be16 type;
+
+	switch (crq->fpin_status) {
+	case IBMVFC_AE_FPIN_LINK_CONGESTED:
+	case IBMVFC_AE_FPIN_PORT_CONGESTED:
+		type = cpu_to_be16(FPIN_CONGN_DEVICE_SPEC);
+		break;
+	case IBMVFC_AE_FPIN_PORT_CLEARED:
+	case IBMVFC_AE_FPIN_CONGESTION_CLEARED:
+		type = cpu_to_be16(FPIN_CONGN_CLEAR);
+		break;
+	case IBMVFC_AE_FPIN_PORT_DEGRADED:
+		type = cpu_to_be16(FPIN_LI_UNKNOWN);
+		break;
+	default:
+		return (NULL);
+	}
+
 	return ibmvfc_common_fpin_to_desc(crq->fpin_status, cpu_to_be64(wwpn),
-					  cpu_to_be16(0),
-					  cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_PERIOD),
+					  type, cpu_to_be16(0),
+					  cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_THRESHOLD),
+					  cpu_to_be32(1));
+}
+
+/**
+ * ibmvfc_full_fpin_to_desc(): allocate and populate a struct fc_els_fpin struct
+ * containing a descriptor.
+ * @ibmvfc_fpin: Pointer to async subq FPIN data
+ *
+ * Allocate a struct fc_els_fpin containing a descriptor and populate
+ * based on data from *ibmvfc_fpin.
+ *
+ * Return:
+ * NULL     - unable to allocate structure
+ * non-NULL - pointer to populated struct fc_els_fpin
+ */
+static struct fc_els_fpin *
+ibmvfc_full_fpin_to_desc(struct ibmvfc_async_subq *ibmvfc_fpin)
+{
+	return ibmvfc_common_fpin_to_desc(ibmvfc_fpin->fpin_status, ibmvfc_fpin->wwpn,
+					  cpu_to_be16(0), cpu_to_be16(0),
 					  cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_THRESHOLD),
 					  cpu_to_be32(1));
 }
@@ -3344,67 +3377,99 @@ ibmvfc_basic_fpin_to_desc(struct ibmvfc_async_crq *crq, u64 wwpn)
  */
 static void ibmvfc_process_async_work(struct work_struct *work)
 {
+	struct ibmvfc_async_subq_fpin *sqfpin;
+	struct ibmvfc_target *tgt, *next;
+	struct ibmvfc_async_subq *subq;
 	struct ibmvfc_async_work *aw;
 	struct ibmvfc_async_crq *crq;
-	struct ibmvfc_target *tgt;
 	struct ibmvfc_host *vhost;
 	struct fc_els_fpin *fpin;
+	__be64 node_name;
+	__be64 scsi_id;
+	__be64 wwpn;
 
 	aw = container_of(work, struct ibmvfc_async_work, async_work_s);
 	crq = aw->crq;
+	subq = aw->subq;
 	vhost = aw->vhost;
 
-	if (!crq->scsi_id && !crq->wwpn && !crq->node_name)
+	if ((!crq && !subq) || (crq && subq)) {
+		dev_err_ratelimited(vhost->dev,
+				    "FPIN event received, unable to process\n");
 		goto end;
-	list_for_each_entry(tgt, &vhost->targets, queue) {
-		if (crq->scsi_id && cpu_to_be64(tgt->scsi_id) != crq->scsi_id)
+	}
+
+	if (crq) {
+		wwpn = crq->wwpn;
+		node_name = crq->node_name;
+		scsi_id = crq->scsi_id;
+	} else {
+		wwpn = subq->wwpn;
+		node_name = subq->id.node_name;
+		scsi_id = 0;
+	}
+
+	if (!scsi_id && !wwpn && !node_name)
+		goto end;
+
+	list_for_each_entry_safe(tgt, next, &vhost->targets, queue) {
+		if (scsi_id && cpu_to_be64(tgt->scsi_id) != scsi_id)
 			continue;
-		if (crq->wwpn && cpu_to_be64(tgt->ids.port_name) != crq->wwpn)
+		if (wwpn && cpu_to_be64(tgt->ids.port_name) != wwpn)
 			continue;
-		if (crq->node_name && cpu_to_be64(tgt->ids.node_name) != crq->node_name)
+		if (node_name && cpu_to_be64(tgt->ids.node_name) != node_name)
 			continue;
 		if (!tgt->rport)
 			continue;
-		fpin = ibmvfc_basic_fpin_to_desc(crq, tgt->wwpn);
+		if (crq) {
+			fpin = ibmvfc_basic_fpin_to_desc(crq, tgt->wwpn);
+		} else {
+			sqfpin = (struct ibmvfc_async_subq_fpin *)subq;
+			fpin = ibmvfc_full_fpin_to_desc(subq);
+		}
 		if (fpin) {
 			fc_host_fpin_rcv(tgt->vhost->host,
 					 sizeof(*fpin) + be32_to_cpu(fpin->desc_len),
 					 (char *)fpin, 0);
 			kfree(fpin);
 		} else
-			dev_err_ratelimited(vhost->dev,
-					    "FPIN event %u received, unable to process\n",
-					    crq->fpin_status);
+			dev_err_ratelimited(vhost->dev, "FPIN event received, unable to process\n");
 	}
 
  end:
-	crq->valid = 0;
+	if (crq)
+		crq->valid = 0;
+	if (subq)
+		subq->valid = 0;
 
 	kfree(aw);
 }
 
 /**
- * ibmvfc_handle_async - Handle an async event from the adapter
- * @crq:	crq to process
+ * ibmvfc_handle_async_common - Handle an async event from the adapter
+ * @event:	event to process
+ * @link_state:	link state
  * @vhost:	ibmvfc host struct
+ * @scsi_id:	scsi_id (0 if not applicable)
+ * @wwpn:	wwpn
+ * @node_name:	node_name
+ * @aw_crq:	crq pointer for async work (NULL if not needed)
+ * @aw_subq:	subq pointer for async work (NULL if not needed)
  *
  **/
-VISIBLE_IF_KUNIT void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
-					  struct ibmvfc_host *vhost)
+static void ibmvfc_handle_async_common(u64 event, u8 link_state,
+				       struct ibmvfc_host *vhost,
+				       u64 scsi_id, u64 wwpn, u64 node_name,
+				       struct ibmvfc_async_crq *aw_crq,
+				       struct ibmvfc_async_subq *aw_subq)

This is a lot of parameters which could be extracted here in the function if it
just knew the type of async_crq. Would it be easier to no just take a void
*async_instance parameter, and an is_subq boolean to determine if you cast the
void * to ibmvfc_async_crq * or ibmvfc_async_sub_crq *?

Yes, good idea. I've changed this to

VISIBLE_IF_KUNIT void ibmvfc_handle_async(void *crq,
					  struct ibmvfc_host *vhost,
					  bool is_subq)

and will extract all of these parameters from crq.

quoted

+/**
+ * ibmvfc_handle_async - Handle an async event from the adapter
+ * @crq:	crq to process
+ * @vhost:	ibmvfc host struct
+ *
+ **/
+VISIBLE_IF_KUNIT void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
+					  struct ibmvfc_host *vhost)

async and asyncq as in the function name below can be hard to notice the
difference in name convention. Maybe async_subq would be a better ender, but as
it might be eaiser to just call ibmvfc_handle_async_common(crq, 0) and
ibmfvc_handle_async(sub_crq, 1), as I outlined above in each of the interrupt
handlers.

I agree. I've refactored to a singled ibmvfc_handle_async() with a
boolean parameter indicating whether the CRQ is a subqueue CRQ or not.

quoted

+{
+	const struct ibmvfc_async_desc *desc = ibmvfc_get_ae_desc(be64_to_cpu(crq->event));
+	u64 event = be64_to_cpu(crq->event);
+
+	ibmvfc_log(vhost, desc->log_level,
+		   "%s event received. scsi_id: %llx, wwpn: %llx, node_name: %llx%s\n",
+		   desc->desc, be64_to_cpu(crq->scsi_id),
+		   be64_to_cpu(crq->wwpn), be64_to_cpu(crq->node_name),
+		   ibmvfc_get_link_state(crq->link_state));
+
+	ibmvfc_handle_async_common(event, crq->link_state, vhost,
+				   crq->scsi_id, crq->wwpn, crq->node_name,
+				   crq, NULL);
 }
 EXPORT_SYMBOL_IF_KUNIT(ibmvfc_handle_async);
 
+VISIBLE_IF_KUNIT void ibmvfc_handle_asyncq(struct ibmvfc_crq *crq_instance,
+					   struct ibmvfc_host *vhost)
+{
+	struct ibmvfc_async_subq *crq = (struct ibmvfc_async_subq *)crq_instance;
+	const struct ibmvfc_async_desc *desc = ibmvfc_get_ae_desc(be16_to_cpu(crq->event));
+	u64 event = be16_to_cpu(crq->event);
+
+	ibmvfc_log(vhost, desc->log_level,
+		   "%s event received. wwpn: %llx, node_name: %llx%s event 0x%x\n",
+		   desc->desc, be64_to_cpu(crq->wwpn), be64_to_cpu(crq->id.node_name),
+		   ibmvfc_get_link_state(crq->link_state), be16_to_cpu(crq->event));
+
+	ibmvfc_handle_async_common(event, crq->link_state, vhost,
+				   0, crq->wwpn, crq->id.node_name,
+				   NULL, crq);
+}
+EXPORT_SYMBOL_IF_KUNIT(ibmvfc_handle_asyncq);
+
 /**
  * ibmvfc_handle_crq - Handles and frees received events in the CRQ
  * @crq:	Command/Response queue

@@ -4117,6 +4229,13 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost
 	spin_unlock(&evt->queue->l_lock);
 }
 
+/**
+ * ibmvfc_next_scrq - Returns the next entry in message subqueue
+ * @scrq:	Pointer to message subqueue
+ *
+ * Returns:
+ *	Pointer to next entry in queue / NULL if empty
+ **/
 static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_queue *scrq)
 {
 	struct ibmvfc_crq *crq;
@@ -4132,6 +4251,57 @@ static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_queue *scrq)
 	return crq;
 }
 
+static void ibmvfc_drain_async_subq(struct ibmvfc_queue *scrq)
+{
+	struct ibmvfc_crq *crq;
+	unsigned long flags;
+	int done = 0;
+
+	ENTER;
+
+	spin_lock_irqsave(scrq->q_lock, flags);
+	while (!done) {
+		while ((crq = ibmvfc_next_scrq(scrq)) != NULL) {
+			ibmvfc_handle_asyncq(crq, scrq->vhost);
+			crq->valid = 0;

Isn't up to the handle_async_common to clear the valid crq bit depending on the
type of async action?

I've moved clearing the valid bit to ibmvfc_handle_async().

quoted

+			wmb();	/* complete write */
+		}
+
+		ibmvfc_toggle_scrq_irq(scrq, 1);
+		crq = ibmvfc_next_scrq(scrq);
+		if (crq != NULL) {
+			ibmvfc_toggle_scrq_irq(scrq, 0);
+			ibmvfc_handle_asyncq(crq, scrq->vhost);
+			crq->valid = 0;

Same comment as above.

quoted

+			wmb();	/* complete write */
+		} else
+			done = 1;
+	}
+	spin_unlock_irqrestore(scrq->q_lock, flags);
+
+	LEAVE;
+}
+
+/**
+ * ibmvfc_interrupt_asyncq - Handle an async event from the adapter
+ * @irq:           interrupt request
+ * @scrq_instance: async subq
+ *
+ **/
+static irqreturn_t ibmvfc_interrupt_asyncq(int irq, void *scrq_instance)
+{
+	struct ibmvfc_queue *scrq = (struct ibmvfc_queue *)scrq_instance;
+
+	ENTER;
+
+	ibmvfc_toggle_scrq_irq(scrq, 0);
+	ibmvfc_drain_async_subq(scrq);
+
+	LEAVE;
+
+	return IRQ_HANDLED;
+}
+
 static void ibmvfc_drain_sub_crq(struct ibmvfc_queue *scrq)
 {
 	struct ibmvfc_crq *crq;

@@ -5500,6 +5670,8 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
 	unsigned int npiv_max_sectors;
 	int level = IBMVFC_DEFAULT_LOG_LEVEL;
 
+	ENTER;
+
 	switch (mad_status) {
 	case IBMVFC_MAD_SUCCESS:
 		ibmvfc_free_event(evt);
@@ -5578,6 +5750,8 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
 		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
 		wake_up(&vhost->work_wait_q);
 	}
+
+	LEAVE;

Also, please drop the ENTER/LEAVE macros as these look like debug artifacts.
Especially, in common code paths like the interrupt handlers.

Done.

-Dave

`h`	back out one level
`j`	next message in thread
`k`	previous message in thread
`l`	drill in
`Esc`	close help / fold thread tree
`?`	toggle this help