Re: nvme tcp receive errors
From: Sagi Grimberg <sagi@grimberg.me>
Date: 2021-03-31 22:46:09
Subsystem:
nvm express driver, the rest · Maintainers:
Keith Busch, Jens Axboe, Christoph Hellwig, Sagi Grimberg, Linus Torvalds
quoted
What is the workload you are running? have an fio job file? Is this I/O to a raw block device? or with fs or iosched?It's O_DIRECT to raw block device using libaio engine. No fs, page cache, or io scheduler are used.
I see.
The fio job is generated by a script that cycles through various sizes, rw mixes, and io depth. It is not always consistent on which paricular set of parameters are running when the error message is observed, though. I can get more details if this will be helpful.
Try out a debug patch [1], and when this happens we can get some more info on the request itself.
quoted
Also, I'm assuming that you are using Linux nvmet as the target device?Not this time. The target is implemented in a hardware device.
Ha, cool... [1]: ---
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 69f59d2c5799..b218a41ac088 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c@@ -37,6 +37,14 @@ enum nvme_tcp_send_state { NVME_TCP_SEND_DDGST, }; +enum nvme_tcp_cmd_state { + NVME_TCP_CMD_QUEUED = 0, + NVME_TCP_CMD_SEND_PDU, + NVME_TCP_CMD_PENDING_DATA, + NVME_TCP_CMD_DATA_DONE, + NVME_TCP_CMD_DONE, +}; + struct nvme_tcp_request { struct nvme_request req; void *pdu;
@@ -56,6 +64,7 @@ struct nvme_tcp_request { size_t offset; size_t data_sent; enum nvme_tcp_send_state state; + enum nvme_tcp_cmd_state cmd_state; }; enum nvme_tcp_queue_flags {
@@ -482,6 +491,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
struct nvme_completion *cqe)
{
+ struct nvme_tcp_request *req;
struct request *rq;
rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);@@ -493,6 +503,8 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
return -EINVAL;
}
+ req = blk_mq_rq_to_pdu(rq);
+ req->cmd_state = NVME_TCP_CMD_DONE;
if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
nvme_complete_rq(rq);
queue->nr_cqe++;@@ -503,6 +515,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
struct nvme_tcp_data_pdu *pdu)
{
+ struct nvme_tcp_request *req;
struct request *rq;
rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);@@ -512,11 +525,12 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
nvme_tcp_queue_id(queue), pdu->command_id);
return -ENOENT;
}
+ req = blk_mq_rq_to_pdu(rq);
if (!blk_rq_payload_bytes(rq)) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x unexpected data\n",
- nvme_tcp_queue_id(queue), rq->tag);
+ "queue %d tag %#x unexpected data cmd_state %d\n",
+ nvme_tcp_queue_id(queue), rq->tag, req->cmd_state);
return -EIO;
}
@@ -755,7 +769,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
nvme_tcp_ddgst_final(queue->rcv_hash,
&queue->exp_ddgst);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
+ req->cmd_state = NVME_TCP_CMD_DATA_DONE;
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
+ req->cmd_state = NVME_TCP_CMD_DONE;
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
queue->nr_cqe++;
}@@ -796,7 +812,10 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
struct request *rq =
blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
pdu->command_id);
+ struct nvme_tcp_request *req;
+ req = blk_mq_rq_to_pdu(rq);
+ req->cmd_state = NVME_TCP_CMD_DONE;
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
queue->nr_cqe++;
}@@ -944,6 +963,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
nvme_tcp_ddgst_final(queue->snd_hash,
&req->ddgst);
req->state = NVME_TCP_SEND_DDGST;
+ req->cmd_state = NVME_TCP_CMD_DATA_DONE;
req->offset = 0;
} else {
nvme_tcp_done_send_req(queue);@@ -979,6 +999,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
len -= ret;
if (!len) {
+ req->cmd_state = req->data_len ?
NVME_TCP_CMD_PENDING_DATA : NVME_TCP_CMD_DATA_DONE;
if (inline_data) {
req->state = NVME_TCP_SEND_DATA;
if (queue->data_digest)@@ -2329,6 +2350,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
+ req->cmd_state = NVME_TCP_CMD_QUEUED;
nvme_tcp_queue_request(req, true, bd->last);
return BLK_STS_OK;
---
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme