Make more Windows socket fixes and improvements

This change makes send() / sendto() always block on Windows. It's needed
because poll(POLLOUT) doesn't guarantee a socket is immediately writable
on Windows, and it caused rsync to fail because it made that assumption.
The only exception is when a SO_SNDTIMEO is specified which will EAGAIN.

Tests are added confirming MSG_WAITALL and MSG_NOSIGNAL work as expected
on all our supported OSes. Most of the platform-specific MSG_FOO magnums
have been deleted, with the exception of MSG_FASTOPEN. Your --strace log
will now show MSG_FOO flags as symbols rather than numbers.

I've also removed cv_wait_example_test because it's 0.3% flaky with Qemu
under system load since it depends on a process being readily scheduled.
This commit is contained in:
Justine Tunney 2024-09-18 19:54:56 -07:00
parent ce2fbf9325
commit 87a6669900
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
41 changed files with 584 additions and 184 deletions

View file

@ -65,7 +65,7 @@ int sys_select_nt(int, fd_set *, fd_set *, fd_set *, struct timeval *,
size_t __iovec2nt(struct NtIovec[hasatleast 16], const struct iovec *, size_t);
ssize_t __winsock_block(int64_t, uint32_t, int, uint32_t, uint64_t,
ssize_t __winsock_block(int64_t, uint32_t, bool, uint32_t, uint64_t,
int (*)(int64_t, struct NtOverlapped *, uint32_t *,
void *),
void *);

View file

@ -17,15 +17,17 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/internal.h"
#include "libc/calls/struct/iovec.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/intrin/fds.h"
#include "libc/nt/struct/iovec.h"
#include "libc/nt/struct/overlapped.h"
#include "libc/nt/thunk/msabi.h"
#include "libc/nt/winsock.h"
#include "libc/sock/internal.h"
#include "libc/sock/syscall_fd.internal.h"
#include "libc/sysv/consts/fio.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/errfuns.h"
#include "libc/vga/vga.internal.h"
#ifdef __x86_64__
#define _MSG_OOB 1
@ -33,6 +35,8 @@
#define _MSG_WAITALL 8
#define _MSG_DONTWAIT 64
__msabi extern typeof(__sys_ioctlsocket_nt) *const __imp_ioctlsocket;
struct RecvArgs {
const struct iovec *iov;
size_t iovlen;
@ -54,13 +58,24 @@ textwindows ssize_t sys_recv_nt(int fd, const struct iovec *iov, size_t iovlen,
return einval();
ssize_t rc;
struct Fd *f = g_fds.p + fd;
sigset_t m = __sig_block();
bool nonblock = !(flags & _MSG_WAITALL) &&
((f->flags & O_NONBLOCK) || (flags & _MSG_DONTWAIT));
flags &= ~_MSG_DONTWAIT;
rc = __winsock_block(f->handle, flags, nonblock, f->rcvtimeo, m,
sys_recv_nt_start, &(struct RecvArgs){iov, iovlen});
__sig_unblock(m);
sigset_t waitmask = __sig_block();
// "Be aware that if the underlying transport provider does not
// support MSG_WAITALL, or if the socket is in a non-blocking mode,
// then this call will fail with WSAEOPNOTSUPP. Also, if MSG_WAITALL
// is specified along with MSG_OOB, MSG_PEEK, or MSG_PARTIAL, then
// this call will fail with WSAEOPNOTSUPP."
// —Quoth MSDN § WSARecv
if (flags & _MSG_WAITALL)
__imp_ioctlsocket(f->handle, FIONBIO, (uint32_t[]){0});
rc = __winsock_block(f->handle, flags & ~_MSG_DONTWAIT,
(f->flags & O_NONBLOCK) || (flags & _MSG_DONTWAIT),
f->rcvtimeo, waitmask, sys_recv_nt_start,
&(struct RecvArgs){iov, iovlen});
__sig_unblock(waitmask);
return rc;
}

View file

@ -20,20 +20,40 @@
#include "libc/calls/internal.h"
#include "libc/calls/struct/iovec.internal.h"
#include "libc/dce.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/sock/internal.h"
#include "libc/sock/sock.h"
#include "libc/sock/syscall_fd.internal.h"
#include "libc/sysv/consts/msg.h"
#include "libc/sysv/errfuns.h"
/**
* Receives data from network socket.
*
* Calling `recv(fd, p, n, 0)` is equivalent to `read(fd, p, n)`.
*
* Unlike files where the OS tries very hard to fulfill the entire
* requested `size` before returning, read operations on sockets aim to
* return as quickly as possible. For example, if 10 bytes are requested
* and a packet comes in with only 5 bytes, then recv() will most likely
* return those 5 bytes before waiting longer. The `MSG_WAITALL` flag
* may be passed when waiting longer is desired. In that case, short
* reads should only be possible when the connection status changes or
* the receive operation is interrupted by a signal.
*
* @param fd is the file descriptor returned by socket()
* @param buf is where received network data gets copied
* @param size is the byte capacity of buf
* @param flags can have `MSG_OOB`, `MSG_PEEK`, `MSG_DONTWAIT`, `MSG_WAITALL`
* @return number of bytes received, 0 on remote close, or -1 w/ errno
* @raise EINTR if signal handler was called instead
* @raise EINVAL if unknown bits were passed in `flags`
* @raise EINVAL if flag isn't supported by host operating system
* @raise EINVAL if `MSG_WAITALL` and `MSG_PEEK` were both passed
* @raise EBADF if `fd` is an invalid file descriptor
* @raise EAGAIN if `MSG_DONTWAIT` was passed and no data was available
* @raise EAGAIN if `O_NONBLOCK` is in play and no data was available
* @error EINTR, EHOSTUNREACH, ECONNRESET (UDP ICMP Port Unreachable),
* EPIPE (if MSG_NOSIGNAL), EMSGSIZE, ENOTSOCK, EFAULT, etc.
* @cancelationpoint
@ -44,7 +64,11 @@ ssize_t recv(int fd, void *buf, size_t size, int flags) {
ssize_t rc;
BEGIN_CANCELATION_POINT;
if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) {
if ((flags & (MSG_WAITALL | MSG_PEEK)) == (MSG_WAITALL | MSG_PEEK)) {
// this is possible on some OSes like Linux but it breaks FreeBSD
// and Windows will raise EOPNOTSUPP when it gets passed together
return einval();
} else if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) {
rc = enotsock();
} else if (!IsWindows()) {
rc = sys_recvfrom(fd, buf, size, flags, 0, 0);
@ -65,7 +89,8 @@ ssize_t recv(int fd, void *buf, size_t size, int flags) {
}
END_CANCELATION_POINT;
DATATRACE("recv(%d, [%#.*hhs%s], %'zu, %#x) → %'ld% lm", fd,
MAX(0, MIN(40, rc)), buf, rc > 40 ? "..." : "", size, flags, rc);
DATATRACE("recv(%d, [%#.*hhs%s], %'zu, %s) → %'ld% lm", fd,
MAX(0, MIN(40, rc)), buf, rc > 40 ? "..." : "", size,
DescribeMsg(flags), rc);
return rc;
}

View file

@ -59,14 +59,13 @@ textwindows ssize_t sys_recvfrom_nt(int fd, const struct iovec *iov,
return einval();
ssize_t rc;
struct Fd *f = g_fds.p + fd;
sigset_t m = __sig_block();
bool nonblock = (f->flags & O_NONBLOCK) || (flags & _MSG_DONTWAIT);
flags &= ~_MSG_DONTWAIT;
rc = __winsock_block(f->handle, flags, nonblock, f->rcvtimeo, m,
sys_recvfrom_nt_start,
sigset_t waitmask = __sig_block();
rc = __winsock_block(f->handle, flags & ~_MSG_DONTWAIT,
(f->flags & O_NONBLOCK) || (flags & _MSG_DONTWAIT),
f->rcvtimeo, waitmask, sys_recvfrom_nt_start,
&(struct RecvFromArgs){iov, iovlen, opt_out_srcaddr,
opt_inout_srcaddrsize});
__sig_unblock(m);
__sig_unblock(waitmask);
return rc;
}

View file

@ -21,6 +21,7 @@
#include "libc/calls/struct/iovec.h"
#include "libc/calls/struct/iovec.internal.h"
#include "libc/dce.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/nt/winsock.h"
#include "libc/sock/internal.h"
@ -95,7 +96,11 @@ ssize_t recvfrom(int fd, void *buf, size_t size, int flags,
}
END_CANCELATION_POINT;
DATATRACE("recvfrom(%d, [%#.*hhs%s], %'zu, %#x) → %'ld% lm", fd,
MAX(0, MIN(40, rc)), buf, rc > 40 ? "..." : "", size, flags, rc);
DATATRACE(
"recvfrom(%d, [%#.*hhs%s], %'zu, %s, %s) → %'ld% lm", fd,
MAX(0, MIN(40, rc)), buf, rc > 40 ? "..." : "", size, DescribeMsg(flags),
DescribeSockaddr(opt_out_srcaddr,
opt_inout_srcaddrsize ? *opt_inout_srcaddrsize : 0),
rc);
return rc;
}

View file

@ -17,20 +17,25 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/internal.h"
#include "libc/calls/struct/iovec.h"
#include "libc/calls/sig.internal.h"
#include "libc/calls/struct/iovec.internal.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/intrin/fds.h"
#include "libc/errno.h"
#include "libc/nt/errors.h"
#include "libc/nt/struct/iovec.h"
#include "libc/nt/struct/overlapped.h"
#include "libc/nt/winsock.h"
#include "libc/sock/internal.h"
#include "libc/sock/syscall_fd.internal.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
#include "libc/vga/vga.internal.h"
#ifdef __x86_64__
#define _MSG_OOB 1
#define _MSG_DONTROUTE 4
#define _MSG_DONTWAIT 64
#define _MSG_NOSIGNAL 0x10000000
struct SendArgs {
const struct iovec *iov;
@ -49,23 +54,24 @@ textwindows static int sys_send_nt_start(int64_t handle,
textwindows ssize_t sys_send_nt(int fd, const struct iovec *iov, size_t iovlen,
uint32_t flags) {
if (flags & ~(_MSG_DONTWAIT | _MSG_OOB | _MSG_DONTROUTE))
if (flags & ~(_MSG_DONTWAIT | _MSG_OOB | _MSG_DONTROUTE | _MSG_NOSIGNAL))
return einval();
ssize_t rc;
struct Fd *f = g_fds.p + fd;
sigset_t m = __sig_block();
sigset_t waitmask = __sig_block();
// we don't check O_NONBLOCK because we want to avoid needing to call
// WSAPoll() every time we write() to a non-blocking socket. WIN32 is
// unsafe at canceling socket sends. lots of code doesn't check write
// return status. good programs that sincerely want to avoid blocking
// on send() operations should have already called poll() beforehand.
bool nonblock = flags & _MSG_DONTWAIT;
rc = __winsock_block(f->handle, flags & ~(_MSG_DONTWAIT | _MSG_NOSIGNAL),
false, f->sndtimeo, waitmask, sys_send_nt_start,
&(struct SendArgs){iov, iovlen});
__sig_unblock(waitmask);
if (rc == -1 && errno == WSAESHUTDOWN) { // ESHUTDOWN
errno = kNtErrorBrokenPipe; // EPIPE
if (!(flags & _MSG_NOSIGNAL))
__sig_raise(SIGPIPE, SI_KERNEL);
}
flags &= ~_MSG_DONTWAIT;
rc = __winsock_block(f->handle, flags, -nonblock, f->sndtimeo, m,
sys_send_nt_start, &(struct SendArgs){iov, iovlen});
__sig_unblock(m);
return rc;
}

View file

@ -21,6 +21,7 @@
#include "libc/calls/struct/iovec.h"
#include "libc/calls/struct/iovec.internal.h"
#include "libc/dce.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/macros.h"
#include "libc/sock/internal.h"
@ -78,7 +79,7 @@ ssize_t send(int fd, const void *buf, size_t size, int flags) {
}
END_CANCELATION_POINT;
DATATRACE("send(%d, %#.*hhs%s, %'zu, %#x) → %'ld% lm", fd,
MAX(0, MIN(40, rc)), buf, rc > 40 ? "..." : "", size, flags, rc);
DATATRACE("send(%d, %#.*hhs%s, %'zu, %s) → %'ld% lm", fd, MAX(0, MIN(40, rc)),
buf, rc > 40 ? "..." : "", size, DescribeMsg(flags), rc);
return rc;
}

View file

@ -17,20 +17,26 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/internal.h"
#include "libc/calls/sig.internal.h"
#include "libc/calls/struct/iovec.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/errno.h"
#include "libc/intrin/fds.h"
#include "libc/nt/errors.h"
#include "libc/nt/struct/iovec.h"
#include "libc/nt/winsock.h"
#include "libc/sock/internal.h"
#include "libc/sock/syscall_fd.internal.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
#ifdef __x86_64__
#define _MSG_OOB 1
#define _MSG_DONTROUTE 4
#define _MSG_DONTWAIT 64
#define _MSG_NOSIGNAL 0x10000000
struct SendToArgs {
const struct iovec *iov;
@ -52,17 +58,25 @@ static textwindows int sys_sendto_nt_start(int64_t handle,
textwindows ssize_t sys_sendto_nt(int fd, const struct iovec *iov,
size_t iovlen, uint32_t flags,
void *opt_in_addr, uint32_t in_addrsize) {
if (flags & ~(_MSG_DONTWAIT | _MSG_OOB | _MSG_DONTROUTE))
if (flags & ~(_MSG_DONTWAIT | _MSG_OOB | _MSG_DONTROUTE | _MSG_NOSIGNAL))
return einval();
ssize_t rc;
struct Fd *f = g_fds.p + fd;
sigset_t m = __sig_block();
bool nonblock = (f->flags & O_NONBLOCK) || (flags & _MSG_DONTWAIT);
flags &= ~_MSG_DONTWAIT;
rc = __winsock_block(
f->handle, flags, -nonblock, f->sndtimeo, m, sys_sendto_nt_start,
&(struct SendToArgs){iov, iovlen, opt_in_addr, in_addrsize});
__sig_unblock(m);
sigset_t waitmask = __sig_block();
rc = __winsock_block(f->handle, flags & ~(_MSG_DONTWAIT | _MSG_NOSIGNAL),
false, f->sndtimeo, waitmask, sys_sendto_nt_start,
&(struct SendToArgs){iov, iovlen, //
opt_in_addr, in_addrsize});
__sig_unblock(waitmask);
if (rc == -1 && errno == WSAESHUTDOWN) { // ESHUTDOWN
errno = kNtErrorBrokenPipe; // EPIPE
if (!(flags & _MSG_NOSIGNAL))
__sig_raise(SIGPIPE, SI_KERNEL);
}
return rc;
}

View file

@ -22,6 +22,7 @@
#include "libc/calls/struct/iovec.h"
#include "libc/calls/struct/iovec.internal.h"
#include "libc/dce.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/macros.h"
#include "libc/sock/internal.h"
@ -88,8 +89,8 @@ ssize_t sendto(int fd, const void *buf, size_t size, int flags,
}
END_CANCELATION_POINT;
DATATRACE("sendto(%d, %#.*hhs%s, %'zu, %#x, %p, %u) → %'ld% lm", fd,
MAX(0, MIN(40, rc)), buf, rc > 40 ? "..." : "", size, flags,
opt_addr, addrsize, rc);
DATATRACE("sendto(%d, %#.*hhs%s, %'zu, %s, %s) → %'ld% lm", fd,
MAX(0, MIN(40, rc)), buf, rc > 40 ? "..." : "", size,
DescribeMsg(flags), DescribeSockaddr(opt_addr, addrsize), rc);
return rc;
}

View file

@ -42,7 +42,7 @@
#ifdef __x86_64__
textwindows ssize_t
__winsock_block(int64_t handle, uint32_t flags, int nonblock,
__winsock_block(int64_t handle, uint32_t flags, bool nonblock,
uint32_t srwtimeout, sigset_t waitmask,
int StartSocketOp(int64_t handle, struct NtOverlapped *overlap,
uint32_t *flags, void *arg),
@ -63,21 +63,6 @@ __winsock_block(int64_t handle, uint32_t flags, int nonblock,
bool got_eagain = false;
uint32_t other_error = 0;
// send() and sendto() provide O_NONBLOCK as a negative number
// because winsock has a bug that causes CancelIoEx() to cause
// WSAGetOverlappedResult() to report errors when it succeeded
if (nonblock < 0) {
struct sys_pollfd_nt fds[1] = {{handle, POLLOUT}};
switch (WSAPoll(fds, 1, 0)) {
case -1:
return __winsockerr();
case 0:
return eagain();
default:
break;
}
}
// create event handle for overlapped i/o
intptr_t event;
if (!(event = WSACreateEvent()))
@ -86,7 +71,10 @@ __winsock_block(int64_t handle, uint32_t flags, int nonblock,
struct NtOverlapped overlap = {.hEvent = event};
bool32 ok = !StartSocketOp(handle, &overlap, &flags, arg);
if (!ok && WSAGetLastError() == kNtErrorIoPending) {
if (nonblock > 0) {
if (nonblock) {
// send() and sendto() shall not pass O_NONBLOCK along to here
// because winsock has a bug that causes CancelIoEx() to cause
// WSAGetOverlappedResult() to report errors when it succeeded
CancelIoEx(handle, &overlap);
got_eagain = true;
} else {