Re: [RFC PATCH v1 12/12] test/vsock: MSG_ZEROCOPY support for vsock_perf
From: Krasnov Arseniy <hidden>
Date: 2023-02-20 09:05:32
Also in:
kvm, lkml
On 16.02.2023 18:29, Stefano Garzarella wrote:
On Mon, Feb 06, 2023 at 07:06:32AM +0000, Arseniy Krasnov wrote:quoted
To use this option pass '--zc' parameter:--zerocopy or --zero-copy maybe better follow what we did with the other parameters :-)quoted
./vsock_perf --zc --sender <cid> --port <port> --bytes <bytes to send> With this option MSG_ZEROCOPY flag will be passed to the 'send()' call. Signed-off-by: Arseniy Krasnov <redacted> --- tools/testing/vsock/vsock_perf.c | 127 +++++++++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 7 deletions(-)diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index a72520338f84..1d435be9b48e 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c@@ -18,6 +18,8 @@#include <poll.h> #include <sys/socket.h> #include <linux/vm_sockets.h> +#include <sys/mman.h> +#include <linux/errqueue.h> #define DEFAULT_BUF_SIZE_BYTES (128 * 1024) #define DEFAULT_TO_SEND_BYTES (64 * 1024)@@ -28,9 +30,14 @@#define BYTES_PER_GB (1024 * 1024 * 1024ULL) #define NSEC_PER_SEC (1000000000ULL) +#ifndef SOL_VSOCK +#define SOL_VSOCK 287 +#endifI thought we use the current kernel headers when we compile the tests, do we need to fix something in the makefile?
Not sure, of course we are using uapi. But i see, that defines like SOL_XXX is not defined in uapi headers. For example SOL_IP is defined in include/linux/socket.h, but userspace app uses SOL_IP from in.h (at least on my machine). E.g. SOL_XXX is not exported to user.
quoted
+ static unsigned int port = DEFAULT_PORT; static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; +static bool zerocopy; static void error(const char *s) {@@ -247,15 +254,74 @@ static void run_receiver(unsigned long rcvlowat_bytes)close(fd); } +static void recv_completion(int fd) +{ + struct sock_extended_err *serr; + char cmsg_data[128]; + struct cmsghdr *cm; + struct msghdr msg; + int ret; + + msg.msg_control = cmsg_data; + msg.msg_controllen = sizeof(cmsg_data); + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1) + return; + + cm = CMSG_FIRSTHDR(&msg); + if (!cm) { + fprintf(stderr, "cmsg: no cmsg\n"); + return; + } + + if (cm->cmsg_level != SOL_VSOCK) { + fprintf(stderr, "cmsg: unexpected 'cmsg_level'\n"); + return; + } + + if (cm->cmsg_type) { + fprintf(stderr, "cmsg: unexpected 'cmsg_type'\n"); + return; + } + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + fprintf(stderr, "serr: wrong origin\n"); + return; + } + + if (serr->ee_errno) { + fprintf(stderr, "serr: wrong error code\n"); + return; + } + + if (zerocopy && (serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) + fprintf(stderr, "warning: copy instead of zerocopy\n"); +} + +static void enable_so_zerocopy(int fd) +{ + int val = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) + error("setsockopt(SO_ZEROCOPY)"); +} + static void run_sender(int peer_cid, unsigned long to_send_bytes) { time_t tx_begin_ns; time_t tx_total_ns; size_t total_send; + time_t time_in_send; void *data; int fd; - printf("Run as sender\n"); + if (zerocopy) + printf("Run as sender MSG_ZEROCOPY\n"); + else + printf("Run as sender\n"); + printf("Connect to %i:%u\n", peer_cid, port); printf("Send %lu bytes\n", to_send_bytes); printf("TX buffer %lu bytes\n", buf_size_bytes);@@ -265,25 +331,58 @@ static void run_sender(int peer_cid, unsigned long to_send_bytes)if (fd < 0) exit(EXIT_FAILURE); - data = malloc(buf_size_bytes); + if (zerocopy) { + enable_so_zerocopy(fd); - if (!data) { - fprintf(stderr, "'malloc()' failed\n"); - exit(EXIT_FAILURE); + data = mmap(NULL, buf_size_bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (data == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + } else { + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } }Eventually to simplify the code I think we can use the mmaped buffer in both cases.quoted
memset(data, 0, buf_size_bytes); total_send = 0; + time_in_send = 0; tx_begin_ns = current_nsec(); while (total_send < to_send_bytes) { ssize_t sent; + size_t rest_bytes; + time_t before; + + rest_bytes = to_send_bytes - total_send; - sent = write(fd, data, buf_size_bytes); + before = current_nsec(); + sent = send(fd, data, (rest_bytes > buf_size_bytes) ? + buf_size_bytes : rest_bytes, + zerocopy ? MSG_ZEROCOPY : 0); + time_in_send += (current_nsec() - before); if (sent <= 0) error("write"); + if (zerocopy) { + struct pollfd fds = { 0 }; + + fds.fd = fd;Which event are we waiting for here?quoted
+ + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + }We need this because we use only one buffer, but if we use more than one, we could take full advantage of zerocopy, right? Otherwise, I don't think it's a fair comparison with non-zerocopy.
Yes or course, in next versions i'll update this test for using more long iovs.
Thanks, Stefanoquoted
+ + recv_completion(fd); + } + total_send += sent; }@@ -294,9 +393,14 @@ static void run_sender(int peer_cid, unsigned long to_send_bytes)get_gbps(total_send * 8, tx_total_ns)); printf("total time in 'write()': %f sec\n", (float)tx_total_ns / NSEC_PER_SEC); + printf("time in send %f\n", (float)time_in_send / NSEC_PER_SEC); close(fd); - free(data); + + if (zerocopy) + munmap(data, buf_size_bytes); + else + free(data); } static const char optstring[] = "";@@ -336,6 +440,11 @@ static const struct option longopts[] = {.has_arg = required_argument, .val = 'R', }, + { + .name = "zc", + .has_arg = no_argument, + .val = 'Z', + }, {}, };@@ -351,6 +460,7 @@ static void usage(void)" --help This message\n" " --sender <cid> Sender mode (receiver default)\n" " <cid> of the receiver to connect to\n" + " --zc Enable zerocopy\n" " --port <port> Port (default %d)\n" " --bytes <bytes>KMG Bytes to send (default %d)\n" " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n"@@ -413,6 +523,9 @@ int main(int argc, char **argv)case 'H': /* Help. */ usage(); break; + case 'Z': /* Zerocopy. */ + zerocopy = true; + break; default: usage(); } -- 2.25.1