Share file offset across processes

This change ensures that if a file descriptor for an open disk file gets
shared by multiple processes within a process tree, then lseek() changes
will be visible across processes, and read() / write() are synchronized.
Note this only applies to Windows, because UNIX kernels already do this.
This commit is contained in:
Justine Tunney 2024-08-03 01:24:46 -07:00
parent a80ab3f8fe
commit 761c6ad615
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
15 changed files with 256 additions and 63 deletions

View file

@ -17,13 +17,14 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/internal.h"
#include "libc/intrin/fds.h"
#include "libc/calls/syscall-nt.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/fds.h"
#include "libc/intrin/weaken.h"
#include "libc/nt/enum/filetype.h"
#include "libc/nt/files.h"
#include "libc/nt/runtime.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/zipos.internal.h"
#include "libc/sock/syscall_fd.internal.h"
#include "libc/sysv/consts/o.h"
@ -64,5 +65,7 @@ textwindows int sys_close_nt(int fd, int fildes) {
default:
break;
}
if (f->shared && !f->isdup)
munmap(f->shared, sizeof(struct Cursor));
return CloseHandle(f->handle) ? 0 : __winerr();
}

View file

@ -82,6 +82,8 @@ static textwindows int sys_dup_nt_impl(int oldfd, int newfd, int flags,
g_fds.p[newfd] = g_fds.p[oldfd];
g_fds.p[newfd].handle = handle;
g_fds.p[newfd].isdup = true;
g_fds.p[oldfd].isdup = true; // TODO(jart): is it possible to avoid leak?
if (flags & _O_CLOEXEC) {
g_fds.p[newfd].flags |= _O_CLOEXEC;
} else {

View file

@ -20,13 +20,13 @@
#include "libc/calls/calls.h"
#include "libc/calls/createfileflags.internal.h"
#include "libc/calls/internal.h"
#include "libc/intrin/fds.h"
#include "libc/calls/struct/flock.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/calls/syscall-nt.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/calls/wincrash.internal.h"
#include "libc/errno.h"
#include "libc/intrin/fds.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/weaken.h"
#include "libc/limits.h"
@ -151,7 +151,7 @@ static textwindows int sys_fcntl_nt_lock(struct Fd *f, int fd, int cmd,
case SEEK_SET:
break;
case SEEK_CUR:
off = f->pointer + off;
off = f->shared->pointer + off;
break;
case SEEK_END: {
int64_t size;
@ -351,9 +351,14 @@ textwindows int sys_fcntl_nt(int fd, int cmd, uintptr_t arg) {
}
rc = 0;
} else if (cmd == F_SETLK || cmd == F_SETLKW || cmd == F_GETLK) {
pthread_mutex_lock(&g_locks.mu);
rc = sys_fcntl_nt_lock(g_fds.p + fd, fd, cmd, arg);
pthread_mutex_unlock(&g_locks.mu);
struct Fd *f = g_fds.p + fd;
if (f->shared) {
pthread_mutex_lock(&g_locks.mu);
rc = sys_fcntl_nt_lock(f, fd, cmd, arg);
pthread_mutex_unlock(&g_locks.mu);
} else {
rc = ebadf();
}
} else if (cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC) {
rc = sys_fcntl_nt_dupfd(fd, cmd, arg);
} else {

View file

@ -31,7 +31,7 @@ static textwindows int64_t GetPosition(struct Fd *f, int whence) {
case SEEK_SET:
return 0;
case SEEK_CUR:
return f->pointer;
return f->shared->pointer;
case SEEK_END: {
struct NtByHandleFileInformation wst;
if (!GetFileInformationByHandle(f->handle, &wst)) {
@ -67,11 +67,14 @@ textwindows int64_t sys_lseek_nt(int fd, int64_t offset, int whence) {
} else if (__isfdkind(fd, kFdFile)) {
struct Fd *f = g_fds.p + fd;
int filetype = GetFileType(f->handle);
if (filetype != kNtFileTypePipe && filetype != kNtFileTypeChar) {
if (filetype != kNtFileTypePipe && //
filetype != kNtFileTypeChar && //
f->shared) {
int64_t res;
if ((res = Seek(f, offset, whence)) != -1) {
f->pointer = res;
}
__fd_lock(f);
if ((res = Seek(f, offset, whence)) != -1)
f->shared->pointer = res;
__fd_unlock(f);
return res;
} else {
return espipe();

View file

@ -138,6 +138,7 @@ static textwindows int sys_open_nt_file(int dirfd, const char *file,
int64_t handle;
if ((handle = sys_open_nt_impl(dirfd, file, flags, mode,
kNtFileFlagOverlapped)) != -1) {
g_fds.p[fd].shared = __cursor_new();
g_fds.p[fd].handle = handle;
g_fds.p[fd].kind = kFdFile;
g_fds.p[fd].flags = flags;
@ -170,14 +171,14 @@ static textwindows int sys_open_nt_no_handle(int fd, int flags, int mode,
static textwindows int sys_open_nt_dup(int fd, int flags, int mode, int oldfd) {
int64_t handle;
if (!__isfdopen(oldfd)) {
if (!__isfdopen(oldfd))
return enoent();
}
if (DuplicateHandle(GetCurrentProcess(), g_fds.p[oldfd].handle,
GetCurrentProcess(), &handle, 0, true,
kNtDuplicateSameAccess)) {
g_fds.p[fd] = g_fds.p[oldfd];
g_fds.p[fd].handle = handle;
g_fds.p[fd].isdup = true;
g_fds.p[fd].mode = mode;
if (!sys_fcntl_nt_setfl(fd, flags)) {
return fd;

View file

@ -19,9 +19,9 @@
#include "libc/calls/createfileflags.internal.h"
#include "libc/calls/internal.h"
#include "libc/calls/sig.internal.h"
#include "libc/intrin/fds.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/fds.h"
#include "libc/intrin/weaken.h"
#include "libc/nt/enum/filetype.h"
#include "libc/nt/errors.h"
@ -51,20 +51,19 @@ sys_readwrite_nt(int fd, void *data, size_t size, ssize_t offset,
uint32_t exchanged;
struct Fd *f = g_fds.p + fd;
// win32 i/o apis generally take 32-bit values thus we implicitly
// truncate outrageously large sizes. linux actually does it too!
size = MIN(size, 0x7ffff000);
// pread() and pwrite() perform an implicit lseek() operation, so
// similar to the lseek() system call, they too raise ESPIPE when
// operating on a non-seekable file.
bool pwriting = offset != -1;
bool seekable =
(f->kind == kFdFile && GetFileType(handle) == kNtFileTypeDisk) ||
f->kind == kFdDevNull || f->kind == kFdDevRandom;
if (pwriting && !seekable) {
bool isdisk = f->kind == kFdFile && GetFileType(handle) == kNtFileTypeDisk;
bool seekable = isdisk || f->kind == kFdDevNull || f->kind == kFdDevRandom;
if (pwriting && !seekable)
return espipe();
}
// determine if we need to lock a file descriptor across processes
bool locked = isdisk && !pwriting && f->shared;
if (locked)
__fd_lock(f);
// when a file is opened in overlapped mode win32 requires that we
// take over full responsibility for managing our own file pointer
@ -72,8 +71,8 @@ sys_readwrite_nt(int fd, void *data, size_t size, ssize_t offset,
// the sense that it behaves so differently from linux, that using
// win32 i/o required more compatibilty toil than doing it by hand
if (!pwriting) {
if (seekable) {
offset = f->pointer;
if (seekable && f->shared) {
offset = f->shared->pointer;
} else {
offset = 0;
}
@ -82,8 +81,11 @@ sys_readwrite_nt(int fd, void *data, size_t size, ssize_t offset,
RestartOperation:
bool eagained = false;
// check for signals and cancelation
if (_check_cancel() == -1)
if (_check_cancel() == -1) {
if (locked)
__fd_unlock(f);
return -1; // ECANCELED
}
if (_weaken(__sig_get) && (sig = _weaken(__sig_get)(waitmask))) {
goto HandleInterrupt;
}
@ -114,16 +116,16 @@ RestartOperation:
}
ok = true;
}
if (ok) {
if (ok)
ok = GetOverlappedResult(handle, &overlap, &exchanged, true);
}
CloseHandle(overlap.hEvent);
// if i/o succeeded then return its result
if (ok) {
if (!pwriting && seekable) {
f->pointer = offset + exchanged;
}
if (!pwriting && seekable && f->shared)
f->shared->pointer = offset + exchanged;
if (locked)
__fd_unlock(f);
return exchanged;
}
@ -131,23 +133,32 @@ RestartOperation:
if (GetLastError() == kNtErrorOperationAborted) {
// raise EAGAIN if it's due to O_NONBLOCK mmode
if (eagained) {
if (locked)
__fd_unlock(f);
return eagain();
}
// otherwise it must be due to a kill() via __sig_cancel()
if (_weaken(__sig_relay) && (sig = _weaken(__sig_get)(waitmask))) {
HandleInterrupt:
if (locked)
__fd_unlock(f);
int handler_was_called = _weaken(__sig_relay)(sig, SI_KERNEL, waitmask);
if (_check_cancel() == -1)
return -1; // possible if we SIGTHR'd
if (locked)
__fd_lock(f);
// read() is @restartable unless non-SA_RESTART hands were called
if (!(handler_was_called & SIG_HANDLED_NO_RESTART)) {
if (!(handler_was_called & SIG_HANDLED_NO_RESTART))
goto RestartOperation;
}
}
if (locked)
__fd_unlock(f);
return eintr();
}
// read() and write() have generally different error-handling paths
if (locked)
__fd_unlock(f);
return -2;
}

View file

@ -16,13 +16,13 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/fds.h"
#include "libc/calls/internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/ttydefaults.h"
#include "libc/dce.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/extend.h"
#include "libc/intrin/fds.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/nomultics.h"
#include "libc/intrin/pushpop.h"
@ -40,6 +40,7 @@
#include "libc/sock/sock.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/thread.h"
#define OPEN_MAX 16
@ -156,12 +157,29 @@ textstartup void __init_fds(int argc, char **argv, char **envp) {
f->kind = kind;
f->flags = flags;
f->mode = mode;
f->pointer = pointer;
f->type = type;
f->family = family;
f->protocol = protocol;
atomic_store_explicit(&fds->f, fd + 1, memory_order_relaxed);
//
// - v1 abi: This field was originally the file pointer.
//
// - v2 abi: This field is the negated shared memory address.
//
if (f->kind == kFdFile) {
if (pointer < 0) {
f->shared = (struct Cursor *)(uintptr_t)-pointer;
} else if ((f->shared = __cursor_new())) {
f->shared->pointer = pointer;
}
}
}
}
for (int i = 0; i < 3; ++i) {
struct Fd *f = fds->p + i;
if (f->kind == kFdFile && !f->shared)
f->shared = __cursor_new();
}
}
}

View file

@ -1,5 +1,7 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_STRUCT_FD_INTERNAL_H_
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_FD_INTERNAL_H_
#include "libc/atomic.h"
#include "libc/thread/thread.h"
COSMOPOLITAN_C_START_
#define kFdEmpty 0
@ -13,19 +15,25 @@ COSMOPOLITAN_C_START_
#define kFdDevNull 9
#define kFdDevRandom 10
struct Cursor {
pthread_mutex_t lock;
long pointer;
};
struct Fd {
char kind;
bool isdup;
bool isbound;
unsigned flags;
unsigned mode;
long handle;
long pointer;
int family;
int type;
int protocol;
unsigned rcvtimeo; /* millis; 0 means wait forever */
unsigned sndtimeo; /* millis; 0 means wait forever */
void *connect_op;
struct Cursor *shared;
};
struct Fds {
@ -34,5 +42,9 @@ struct Fds {
struct Fd *p, *e;
};
void __fd_lock(struct Fd *);
void __fd_unlock(struct Fd *);
struct Cursor *__cursor_new(void);
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_CALLS_STRUCT_FD_INTERNAL_H_ */

View file

@ -17,6 +17,8 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/state.internal.h"
#include "libc/intrin/fds.h"
#include "libc/runtime/runtime.h"
#include "libc/thread/thread.h"
void __fds_lock(void) {
@ -26,3 +28,23 @@ void __fds_lock(void) {
void __fds_unlock(void) {
pthread_mutex_unlock(&__fds_lock_obj);
}
void __fd_lock(struct Fd *f) {
pthread_mutex_lock(&f->shared->lock);
}
void __fd_unlock(struct Fd *f) {
pthread_mutex_unlock(&f->shared->lock);
}
struct Cursor *__cursor_new(void) {
struct Cursor *c;
if ((c = _mapshared(sizeof(struct Cursor)))) {
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
pthread_mutex_init(&c->lock, &attr);
pthread_mutexattr_destroy(&attr);
}
return c;
}

View file

@ -17,9 +17,9 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/intrin/fds.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/fds.h"
#include "libc/intrin/strace.h"
#include "libc/mem/mem.h"
#include "libc/nt/files.h"
@ -151,7 +151,12 @@ textwindows char *__describe_fds(const struct Fd *fds, size_t fdslen,
*p++ = '_';
p = FormatInt64(p, f->mode);
*p++ = '_';
p = FormatInt64(p, f->pointer);
//
// - v1 abi: This field was originally the file pointer.
//
// - v2 abi: This field is the negated shared memory address.
//
p = FormatInt64(p, -(uintptr_t)f->shared);
*p++ = '_';
p = FormatInt64(p, f->type);
*p++ = '_';

View file

@ -62,9 +62,10 @@ static dontinline textwindows ssize_t sys_sendfile_nt(
int outfd, int infd, int64_t *opt_in_out_inoffset, uint32_t uptobytes) {
ssize_t rc;
uint32_t flags = 0;
bool locked = false;
int64_t ih, oh, eof, offset;
struct NtByHandleFileInformation wst;
if (!__isfdkind(infd, kFdFile))
if (!__isfdkind(infd, kFdFile) || !g_fds.p[infd].shared)
return ebadf();
if (!__isfdkind(outfd, kFdSocket))
return ebadf();
@ -73,7 +74,9 @@ static dontinline textwindows ssize_t sys_sendfile_nt(
if (opt_in_out_inoffset) {
offset = *opt_in_out_inoffset;
} else {
offset = g_fds.p[infd].pointer;
locked = true;
__fd_lock(&g_fds.p[infd]);
offset = g_fds.p[infd].shared->pointer;
}
if (GetFileInformationByHandle(ih, &wst)) {
// TransmitFile() returns EINVAL if `uptobytes` goes past EOF.
@ -82,9 +85,10 @@ static dontinline textwindows ssize_t sys_sendfile_nt(
uptobytes = eof - offset;
}
} else {
if (locked)
__fd_unlock(&g_fds.p[infd]);
return ebadf();
}
BLOCK_SIGNALS;
struct NtOverlapped ov = {.hEvent = WSACreateEvent(), .Pointer = offset};
cosmo_once(&g_transmitfile.once, transmitfile_init);
if (g_transmitfile.lpTransmitFile(oh, ih, uptobytes, 0, &ov, 0, 0) ||
@ -95,7 +99,7 @@ static dontinline textwindows ssize_t sys_sendfile_nt(
if (opt_in_out_inoffset) {
*opt_in_out_inoffset = offset + rc;
} else {
g_fds.p[infd].pointer = offset + rc;
g_fds.p[infd].shared->pointer = offset + rc;
}
} else {
rc = __winsockerr();
@ -103,8 +107,9 @@ static dontinline textwindows ssize_t sys_sendfile_nt(
} else {
rc = __winsockerr();
}
if (locked)
__fd_unlock(&g_fds.p[infd]);
WSACloseEvent(ov.hEvent);
ALLOW_SIGNALS;
return rc;
}
@ -186,7 +191,9 @@ ssize_t sendfile(int outfd, int infd, int64_t *opt_in_out_inoffset,
} else if (IsFreebsd() || IsXnu()) {
rc = sys_sendfile_bsd(outfd, infd, opt_in_out_inoffset, uptobytes);
} else if (IsWindows()) {
BLOCK_SIGNALS;
rc = sys_sendfile_nt(outfd, infd, opt_in_out_inoffset, uptobytes);
ALLOW_SIGNALS;
} else {
rc = enosys();
}