Remove callback from cosmoaudio API

Using callbacks is still problematic with cosmo_dlopen() due to the need
to restore the TLS register. So using callbacks is even more strict than
using signal handlers. We are better off introducing a cosmoaudio_poll()
function. It makes the API more UNIX-like. How bad could the latency be?
This commit is contained in:
Justine Tunney 2024-09-07 17:42:15 -07:00
parent d99f066114
commit d50d954a3c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
17 changed files with 433 additions and 158 deletions

View file

@ -186,7 +186,6 @@ if [ ! -x bin/x86_64-linux-cosmo-gcc ]; then
rm -f x86_64-gcc.zip
rm -f llvm.zip
mv bin/clang-19 bin/cosmo-clang
mv bin/clang-format bin/clang-format
fi
rm -f bin/*-cpp
rm -f bin/*-gcc-*

View file

@ -36,6 +36,7 @@ TOOL_VIZ_LIB_A_DIRECTDEPS = \
LIBC_RUNTIME \
LIBC_STDIO \
LIBC_STR \
LIBC_THREAD \
LIBC_SYSV \
LIBC_TESTLIB \
LIBC_TINYMATH \

View file

@ -43,6 +43,7 @@
#include "libc/str/str.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/thread.h"
#include "libc/time.h"
#include "libc/x/x.h"
#include "tool/viz/lib/graphic.h"
@ -69,6 +70,7 @@ struct timespec magikarp_start_;
struct YCbCr {
bool yonly;
int cpu_count;
int magnums[8][4];
int lighting[6][4];
unsigned char transfer[2][256];
@ -165,6 +167,7 @@ void YCbCrInit(struct YCbCr **ycbcr, bool yonly, int swing, double gamma,
if (!*ycbcr)
*ycbcr = xcalloc(1, sizeof(struct YCbCr));
(*ycbcr)->yonly = yonly;
(*ycbcr)->cpu_count = __get_cpu_count();
bzero((*ycbcr)->magnums, sizeof((*ycbcr)->magnums));
bzero((*ycbcr)->lighting, sizeof((*ycbcr)->lighting));
YCbCrComputeCoefficients(swing, gamma, gamut, illuminant, (*ycbcr)->magnums,
@ -263,14 +266,32 @@ void YCbCrConvert(struct YCbCr *me, long yn, long xn,
const unsigned char Y[restrict yys][yxs], long cys, long cxs,
unsigned char Cb[restrict cys][cxs],
unsigned char Cr[restrict cys][cxs]) {
struct timespec ts = timespec_real();
struct timespec ts = timespec_mono();
if (!me->yonly) {
YCbCr2Rgb(yn, xn, RGB, yys, yxs, Y, cys, cxs, Cb, Cr, me->magnums,
me->lighting, me->transfer[pf10_]);
} else {
Y2Rgb(yn, xn, RGB, yys, yxs, Y, me->magnums, me->transfer[pf10_]);
}
ycbcr2rgb_latency_ = timespec_tomicros(timespec_sub(timespec_real(), ts));
ycbcr2rgb_latency_ = timespec_tomicros(timespec_sub(timespec_mono(), ts));
}
struct YCbCr2RgbScalerThreadData {
long syw, sxw, dyw, dxw, dyn, dxn, syn, sxn;
unsigned char *src;
unsigned char *dst;
int min, max;
struct SamplingSolution *cy, *cx;
bool sharpen;
};
static void *YCbCr2RgbScalerThread(void *arg) {
struct YCbCr2RgbScalerThreadData *data =
(struct YCbCr2RgbScalerThreadData *)arg;
GyaradosUint8(data->syw, data->sxw, data->src, data->dyw, data->dxw,
data->dst, data->dyn, data->dxn, data->syn, data->sxn,
data->min, data->max, data->cy, data->cx, data->sharpen);
return NULL;
}
void YCbCr2RgbScaler(struct YCbCr *me, long dyn, long dxn,
@ -297,7 +318,7 @@ void YCbCr2RgbScaler(struct YCbCr *me, long dyn, long dxn,
Magkern2xY(cys, cxs, Cr, scyn, scxn), HALF(yyn), yxn,
HALF(cyn), scxn, syn / 2, sxn, pry, prx);
} else {
struct timespec ts = timespec_real();
struct timespec ts = timespec_mono();
magikarp_latency_ = timespec_tomicros(timespec_sub(ts, magikarp_start_));
yry = syn / dyn;
yrx = sxn / dxn;
@ -322,13 +343,83 @@ void YCbCr2RgbScaler(struct YCbCr *me, long dyn, long dxn,
sharpen(1, yys, yxs, (void *)Y, yyn, yxn);
if (pf9_)
unsharp(1, yys, yxs, (void *)Y, yyn, yxn);
GyaradosUint8(yys, yxs, Y, yys, yxs, Y, dyn, dxn, syn, sxn, 0, 255,
me->luma.cy, me->luma.cx, true);
GyaradosUint8(cys, cxs, Cb, cys, cxs, Cb, dyn, dxn, scyn, scxn, 0, 255,
me->chroma.cy, me->chroma.cx, false);
GyaradosUint8(cys, cxs, Cr, cys, cxs, Cr, dyn, dxn, scyn, scxn, 0, 255,
me->chroma.cy, me->chroma.cx, false);
gyarados_latency_ = timespec_tomicros(timespec_sub(timespec_real(), ts));
if (me->cpu_count < 6) {
GyaradosUint8(yys, yxs, Y, yys, yxs, Y, dyn, dxn, syn, sxn, 0, 255,
me->luma.cy, me->luma.cx, true);
GyaradosUint8(cys, cxs, Cb, cys, cxs, Cb, dyn, dxn, scyn, scxn, 0, 255,
me->chroma.cy, me->chroma.cx, false);
GyaradosUint8(cys, cxs, Cr, cys, cxs, Cr, dyn, dxn, scyn, scxn, 0, 255,
me->chroma.cy, me->chroma.cx, false);
} else {
pthread_t threads[3];
struct YCbCr2RgbScalerThreadData thread_data[3];
// Set up thread data for Y plane.
thread_data[0] = (struct YCbCr2RgbScalerThreadData){
.syw = yys,
.sxw = yxs,
.dyw = yys,
.dxw = yxs,
.dyn = dyn,
.dxn = dxn,
.syn = syn,
.sxn = sxn,
.src = (unsigned char *)Y,
.dst = (unsigned char *)Y,
.min = 0,
.max = 255,
.cy = me->luma.cy,
.cx = me->luma.cx,
.sharpen = true,
};
// Set up thread data for Cb plane.
thread_data[1] = (struct YCbCr2RgbScalerThreadData){
.syw = cys,
.sxw = cxs,
.dyw = cys,
.dxw = cxs,
.dyn = dyn,
.dxn = dxn,
.syn = scyn,
.sxn = scxn,
.src = (unsigned char *)Cb,
.dst = (unsigned char *)Cb,
.min = 0,
.max = 255,
.cy = me->chroma.cy,
.cx = me->chroma.cx,
.sharpen = false,
};
// Set up thread data for Cr plane.
thread_data[2] = (struct YCbCr2RgbScalerThreadData){
.syw = cys,
.sxw = cxs,
.dyw = cys,
.dxw = cxs,
.dyn = dyn,
.dxn = dxn,
.syn = scyn,
.sxn = scxn,
.src = (unsigned char *)Cr,
.dst = (unsigned char *)Cr,
.min = 0,
.max = 255,
.cy = me->chroma.cy,
.cx = me->chroma.cx,
.sharpen = false,
};
// Dispatch threads.
for (int i = 0; i < 3; i++)
pthread_create(&threads[i], NULL, YCbCr2RgbScalerThread,
&thread_data[i]);
for (int i = 3; i--;)
pthread_join(threads[i], NULL);
}
gyarados_latency_ = timespec_tomicros(timespec_sub(timespec_mono(), ts));
YCbCrConvert(me, dyn, dxn, RGB, yys, yxs, Y, cys, cxs, Cb, Cr);
INFOF("done");
}
@ -383,7 +474,7 @@ void *YCbCr2RgbScale(long dyn, long dxn,
CHECK_LE(cyn, cys);
CHECK_LE(cxn, cxs);
INFOF("magikarp2x");
magikarp_start_ = timespec_real();
magikarp_start_ = timespec_mono();
minyys = MAX(ceil(syn), MAX(yyn, ceil(dyn * pry)));
minyxs = MAX(ceil(sxn), MAX(yxn, ceil(dxn * prx)));
mincys = MAX(cyn, ceil(dyn * pry));

View file

@ -1130,7 +1130,7 @@ static bool ShouldDraw(void) {
static struct timespec next;
if (!isdragging)
return true;
now = timespec_real();
now = timespec_mono();
if (timespec_cmp(now, next) > 0 && !HasPendingInput()) {
next = timespec_add(now, timespec_frommicros(1. / 24 * 1e6));
return true;

View file

@ -35,14 +35,14 @@ void *worker(void *arg) {
}
void test(int n) {
struct timespec start = timespec_real();
struct timespec start = timespec_mono();
pthread_t *th = malloc(sizeof(pthread_t) * n);
for (int i = 0; i < n; ++i)
pthread_create(th + i, 0, worker, 0);
for (int i = 0; i < n; ++i)
pthread_join(th[i], 0);
free(th);
struct timespec end = timespec_real();
struct timespec end = timespec_mono();
printf("%2d threads * %d allocs = %ld us\n", n, ALLOCATIONS,
timespec_tomicros(timespec_sub(end, start)));
}

View file

@ -335,10 +335,11 @@ static long Index(long y, long x) {
static void PreventBufferbloat(void) {
struct timespec now, rate;
static struct timespec last;
now = timespec_real();
now = timespec_mono();
rate = timespec_frommicros(1. / fps * 1e6);
if (timespec_cmp(timespec_sub(now, last), rate) < 0) {
timespec_sleep(CLOCK_REALTIME, timespec_sub(rate, timespec_sub(now, last)));
timespec_sleep(CLOCK_MONOTONIC,
timespec_sub(rate, timespec_sub(now, last)));
}
last = now;
}

View file

@ -40,6 +40,7 @@
#include "libc/calls/ucontext.h"
#include "libc/ctype.h"
#include "libc/cxxabi.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/fmt/conv.h"
#include "libc/fmt/itoa.h"
@ -56,7 +57,9 @@
#include "libc/nexgen32e/bench.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/nt/console.h"
#include "libc/nt/enum/threadpriority.h"
#include "libc/nt/runtime.h"
#include "libc/nt/thread.h"
#include "libc/runtime/runtime.h"
#include "libc/sock/sock.h"
#include "libc/sock/struct/pollfd.h"
@ -1398,6 +1401,10 @@ static void TryToOpenFrameBuffer(void) {
int main(int argc, char *argv[]) {
sigset_t wut;
ShowCrashReports();
#ifdef __x86_64__
if (IsWindows())
SetThreadPriority(GetCurrentThread(), kNtThreadPriorityHighest);
#endif
gamma_ = 2.4;
volscale_ = 1.f;
dither_ = true;