Re: [RFC PATCH 1/1] SUNRPC: increase max timeout for rebind to handle NFS server restart
From: dai.ngo@oracle.com
Date: 2023-02-23 05:40:47
Hi Anna, Just a reminder that this patch is still waiting for a review. Thanks, -Dai On 2/17/23 10:22 AM, dai.ngo@oracle.com wrote:
Hi Trond, Could you please let me know your opinion on this patch? Thanks, -Dai On 2/10/23 12:10 AM, Dai Ngo wrote:quoted
Occasionally NLM lock and unlock request fail with EIO and ENOLCK respectively. This usually happens when the NFS server is restarted while NLM lock test is running. Currently there is a 9 seconds limit for retrying the bind operation. If the server is under load the port mapper might take more than 9 seconds to become ready after the NFS server restarted. This patch increases the timeout for rebind from 9 to 30 seconds allowing a bit more time for the port mapper to become ready. Signed-off-by: Dai Ngo <dai.ngo@oracle.com> --- include/linux/sunrpc/clnt.h | 3 +++ include/linux/sunrpc/sched.h | 4 ++-- net/sunrpc/clnt.c | 2 +- net/sunrpc/sched.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-)diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 770ef2cb5775..7f2dee56c121 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h@@ -162,6 +162,9 @@ struct rpc_add_xprt_test {#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) #define RPC_CLNT_CREATE_CONNECTED (1UL << 12) +#define RPC_CLNT_REBIND_DELAY 3 +#define RPC_CLNT_REBIND_MAX_TIMEOUT 30 + struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32);diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index b8ca3ecaf8d7..e9dc142f10bb 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h@@ -90,8 +90,8 @@ struct rpc_task {#endif unsigned char tk_priority : 2,/* Task priority */ tk_garb_retry : 2, - tk_cred_retry : 2, - tk_rebind_retry : 2; + tk_cred_retry : 2; + unsigned char tk_rebind_retry; }; typedef void (*rpc_action)(struct rpc_task *);diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0b0b9f1eed46..6c89a1fa40bf 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c@@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)if (task->tk_rebind_retry == 0) break; task->tk_rebind_retry--; - rpc_delay(task, 3*HZ); + rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ); goto retry_timeout; case -ENOBUFS: rpc_delay(task, HZ >> 2);diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index be587a308e05..5c18a35752aa 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c@@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)/* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; - task->tk_rebind_retry = 2; + task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT / + RPC_CLNT_REBIND_DELAY; /* starting timestamp */ task->tk_start = ktime_get();