Flamegraph analysis showed that the cork/uncork calls consume
nearly a third of the CPU time spent in svc_tcp_sendto(). The
other two consumers are mutex lock/unlock and svc_tcp_sendmsg().

Now that svc_tcp_sendto() coalesces RPC messages properly, there
is no need to introduce artificial delays to prevent sending
partial messages.

After applying this change, I measured a 1.2K read IOPS increase
for 8KB random I/O (several percent) on 56Gb IP over IB.

Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
This commit is contained in:
Chuck Lever 2023-07-19 14:31:22 -04:00
parent baabf59c24
commit 89d2d9fbea
2 changed files with 0 additions and 8 deletions

View file

@ -35,8 +35,6 @@ struct svc_sock {
/* Total length of the data (not including fragment headers)
* received so far in the fragments making up this rpc: */
u32 sk_datalen;
/* Number of queued send requests */
atomic_t sk_sendqlen;
struct page_frag_cache sk_frag_cache;

View file

@ -1268,22 +1268,17 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
tcp_sock_set_cork(svsk->sk_sk, true);
err = svc_tcp_sendmsg(svsk, rqstp, marker, &sent);
trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
if (atomic_dec_and_test(&svsk->sk_sendqlen))
tcp_sock_set_cork(svsk->sk_sk, false);
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
@ -1292,7 +1287,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
(err < 0) ? "got error" : "sent",
(err < 0) ? err : sent, xdr->len);
svc_xprt_deferred_close(xprt);
atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}