mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-01 03:53:33 +00:00
416fd86676
- Emulator can now test the αcτµαlly pδrταblε εxεcµταblε bootloader - Whipped up a webserver named redbean. It services 150k requests per second on a single core. Bundling assets inside zip enables extremely fast serving for two reasons. The first is that zip central directory lookups go faster than stat() system calls. The second is that both zip and gzip content-encoding use DEFLATE, therefore, compressed responses can be served via the sendfile() system call which does an in-kernel copy directly from the zip executable structure. Also note that red bean zip executables can be deployed easily to all platforms, since these native executables work on Linux, Mac, BSD, and Windows. - Address sanitizer now works very well
390 lines
11 KiB
C
390 lines
11 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ This program is free software; you can redistribute it and/or modify │
|
|
│ it under the terms of the GNU General Public License as published by │
|
|
│ the Free Software Foundation; version 2 of the License. │
|
|
│ │
|
|
│ This program is distributed in the hope that it will be useful, but │
|
|
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
|
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
|
│ General Public License for more details. │
|
|
│ │
|
|
│ You should have received a copy of the GNU General Public License │
|
|
│ along with this program; if not, write to the Free Software │
|
|
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
|
│ 02110-1301 USA │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/macros.h"
|
|
#include "libc/math.h"
|
|
#include "tool/build/lib/cvt.h"
|
|
#include "tool/build/lib/endian.h"
|
|
#include "tool/build/lib/machine.h"
|
|
#include "tool/build/lib/memory.h"
|
|
#include "tool/build/lib/modrm.h"
|
|
#include "tool/build/lib/throw.h"
|
|
|
|
#define kOpCvt0f2a 0
|
|
#define kOpCvtt0f2c 4
|
|
#define kOpCvt0f2d 8
|
|
#define kOpCvt0f5a 12
|
|
#define kOpCvt0f5b 16
|
|
#define kOpCvt0fE6 20
|
|
|
|
static double SseRoundDouble(struct Machine *m, double x) {
|
|
switch (m->sse.rc) {
|
|
case 0:
|
|
return rint(x);
|
|
case 1:
|
|
return floor(x);
|
|
case 2:
|
|
return ceil(x);
|
|
case 3:
|
|
return trunc(x);
|
|
default:
|
|
unreachable;
|
|
}
|
|
}
|
|
|
|
static void OpGdqpWssCvttss2si(struct Machine *m, uint32_t rde) {
|
|
float f;
|
|
int64_t n;
|
|
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
|
|
n = f;
|
|
if (!Rexw(rde)) n &= 0xffffffff;
|
|
Write64(RegRexrReg(m, rde), n);
|
|
}
|
|
|
|
static void OpGdqpWsdCvttsd2si(struct Machine *m, uint32_t rde) {
|
|
double d;
|
|
int64_t n;
|
|
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
n = d;
|
|
if (!Rexw(rde)) n &= 0xffffffff;
|
|
Write64(RegRexrReg(m, rde), n);
|
|
}
|
|
|
|
static void OpGdqpWssCvtss2si(struct Machine *m, uint32_t rde) {
|
|
float f;
|
|
int64_t n;
|
|
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
|
|
n = rintf(f);
|
|
if (!Rexw(rde)) n &= 0xffffffff;
|
|
Write64(RegRexrReg(m, rde), n);
|
|
}
|
|
|
|
static void OpGdqpWsdCvtsd2si(struct Machine *m, uint32_t rde) {
|
|
double d;
|
|
int64_t n;
|
|
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
n = SseRoundDouble(m, d);
|
|
if (!Rexw(rde)) n &= 0xffffffff;
|
|
Write64(RegRexrReg(m, rde), n);
|
|
}
|
|
|
|
static void OpVssEdqpCvtsi2ss(struct Machine *m, uint32_t rde) {
|
|
float f;
|
|
int64_t n;
|
|
uint8_t *p;
|
|
if (Rexw(rde)) {
|
|
n = (int64_t)Read64(GetModrmRegisterWordPointerRead8(m, rde));
|
|
} else {
|
|
n = (int32_t)Read32(GetModrmRegisterWordPointerRead4(m, rde));
|
|
}
|
|
f = n;
|
|
memcpy(XmmRexrReg(m, rde), &f, 4);
|
|
}
|
|
|
|
static void OpVsdEdqpCvtsi2sd(struct Machine *m, uint32_t rde) {
|
|
double d;
|
|
int64_t n;
|
|
uint8_t *p;
|
|
if (Rexw(rde)) {
|
|
n = (int64_t)Read64(GetModrmRegisterWordPointerRead8(m, rde));
|
|
} else {
|
|
n = (int32_t)Read32(GetModrmRegisterWordPointerRead4(m, rde));
|
|
}
|
|
d = n;
|
|
memcpy(XmmRexrReg(m, rde), &d, 8);
|
|
}
|
|
|
|
static void OpVpsQpiCvtpi2ps(struct Machine *m, uint32_t rde) {
|
|
uint8_t *p;
|
|
float f[2];
|
|
int32_t i[2];
|
|
p = GetModrmRegisterMmPointerRead8(m, rde);
|
|
i[0] = Read32(p + 0);
|
|
i[1] = Read32(p + 4);
|
|
f[0] = i[0];
|
|
f[1] = i[1];
|
|
memcpy(XmmRexrReg(m, rde), f, 8);
|
|
}
|
|
|
|
static void OpVpdQpiCvtpi2pd(struct Machine *m, uint32_t rde) {
|
|
uint8_t *p;
|
|
double f[2];
|
|
int32_t n[2];
|
|
p = GetModrmRegisterMmPointerRead8(m, rde);
|
|
n[0] = Read32(p + 0);
|
|
n[1] = Read32(p + 4);
|
|
f[0] = n[0];
|
|
f[1] = n[1];
|
|
memcpy(XmmRexrReg(m, rde), f, 16);
|
|
}
|
|
|
|
static void OpPpiWpsqCvtps2pi(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
float f[2];
|
|
int32_t n[2];
|
|
memcpy(f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
switch (m->sse.rc) {
|
|
case 0:
|
|
for (i = 0; i < 2; ++i) n[i] = rintf(f[i]);
|
|
break;
|
|
case 1:
|
|
for (i = 0; i < 2; ++i) n[i] = floorf(f[i]);
|
|
break;
|
|
case 2:
|
|
for (i = 0; i < 2; ++i) n[i] = ceilf(f[i]);
|
|
break;
|
|
case 3:
|
|
for (i = 0; i < 2; ++i) n[i] = truncf(f[i]);
|
|
break;
|
|
default:
|
|
unreachable;
|
|
}
|
|
Write32(MmReg(m, rde) + 0, n[0]);
|
|
Write32(MmReg(m, rde) + 4, n[1]);
|
|
}
|
|
|
|
static void OpPpiWpsqCvttps2pi(struct Machine *m, uint32_t rde) {
|
|
float f[2];
|
|
int32_t n[2];
|
|
memcpy(&f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
n[0] = f[0];
|
|
n[1] = f[1];
|
|
Write32(MmReg(m, rde) + 0, n[0]);
|
|
Write32(MmReg(m, rde) + 4, n[1]);
|
|
}
|
|
|
|
static void OpPpiWpdCvtpd2pi(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
double d[2];
|
|
int32_t n[2];
|
|
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
for (i = 0; i < 2; ++i) n[i] = SseRoundDouble(m, d[i]);
|
|
Write32(MmReg(m, rde) + 0, n[0]);
|
|
Write32(MmReg(m, rde) + 4, n[1]);
|
|
}
|
|
|
|
static void OpPpiWpdCvttpd2pi(struct Machine *m, uint32_t rde) {
|
|
double d[2];
|
|
int32_t n[2];
|
|
memcpy(&d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
n[0] = d[0];
|
|
n[1] = d[1];
|
|
Write32(MmReg(m, rde) + 0, n[0]);
|
|
Write32(MmReg(m, rde) + 4, n[1]);
|
|
}
|
|
|
|
static void OpVpdWpsCvtps2pd(struct Machine *m, uint32_t rde) {
|
|
float f[2];
|
|
double d[2];
|
|
memcpy(f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
d[0] = f[0];
|
|
d[1] = f[1];
|
|
memcpy(XmmRexrReg(m, rde), d, 16);
|
|
}
|
|
|
|
static void OpVpsWpdCvtpd2ps(struct Machine *m, uint32_t rde) {
|
|
float f[2];
|
|
double d[2];
|
|
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
f[0] = d[0];
|
|
f[1] = d[1];
|
|
memcpy(XmmRexrReg(m, rde), f, 8);
|
|
}
|
|
|
|
static void OpVssWsdCvtsd2ss(struct Machine *m, uint32_t rde) {
|
|
float f;
|
|
double d;
|
|
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
f = d;
|
|
memcpy(XmmRexrReg(m, rde), &f, 4);
|
|
}
|
|
|
|
static void OpVsdWssCvtss2sd(struct Machine *m, uint32_t rde) {
|
|
float f;
|
|
double d;
|
|
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
|
|
d = f;
|
|
memcpy(XmmRexrReg(m, rde), &d, 8);
|
|
}
|
|
|
|
static void OpVpsWdqCvtdq2ps(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
float f[4];
|
|
int32_t n[4];
|
|
memcpy(n, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
for (i = 0; i < 4; ++i) f[i] = n[i];
|
|
memcpy(XmmRexrReg(m, rde), f, 16);
|
|
}
|
|
|
|
static void OpVpdWdqCvtdq2pd(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
double d[2];
|
|
int32_t n[2];
|
|
memcpy(n, GetModrmRegisterXmmPointerRead8(m, rde), 8);
|
|
for (i = 0; i < 2; ++i) d[i] = n[i];
|
|
memcpy(XmmRexrReg(m, rde), d, 16);
|
|
}
|
|
|
|
static void OpVdqWpsCvttps2dq(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
float f[4];
|
|
int32_t n[4];
|
|
memcpy(f, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
for (i = 0; i < 4; ++i) n[i] = f[i];
|
|
memcpy(XmmRexrReg(m, rde), n, 16);
|
|
}
|
|
|
|
static void OpVdqWpsCvtps2dq(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
float f[4];
|
|
int32_t n[4];
|
|
memcpy(f, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
switch (m->sse.rc) {
|
|
case 0:
|
|
for (i = 0; i < 4; ++i) n[i] = rintf(f[i]);
|
|
break;
|
|
case 1:
|
|
for (i = 0; i < 4; ++i) n[i] = floorf(f[i]);
|
|
break;
|
|
case 2:
|
|
for (i = 0; i < 4; ++i) n[i] = ceilf(f[i]);
|
|
break;
|
|
case 3:
|
|
for (i = 0; i < 4; ++i) n[i] = truncf(f[i]);
|
|
break;
|
|
default:
|
|
unreachable;
|
|
}
|
|
memcpy(XmmRexrReg(m, rde), n, 16);
|
|
}
|
|
|
|
static void OpVdqWpdCvttpd2dq(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
double d[2];
|
|
int32_t n[2];
|
|
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
for (i = 0; i < 2; ++i) n[i] = d[i];
|
|
memcpy(XmmRexrReg(m, rde), n, 8);
|
|
}
|
|
|
|
static void OpVdqWpdCvtpd2dq(struct Machine *m, uint32_t rde) {
|
|
unsigned i;
|
|
double d[2];
|
|
int32_t n[2];
|
|
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
|
|
for (i = 0; i < 2; ++i) n[i] = SseRoundDouble(m, d[i]);
|
|
memcpy(XmmRexrReg(m, rde), n, 8);
|
|
}
|
|
|
|
static void OpCvt(struct Machine *m, uint32_t rde, unsigned long op) {
|
|
switch (op | Rep(rde) | Osz(rde)) {
|
|
case kOpCvt0f2a + 0:
|
|
OpVpsQpiCvtpi2ps(m, rde);
|
|
break;
|
|
case kOpCvt0f2a + 1:
|
|
OpVpdQpiCvtpi2pd(m, rde);
|
|
break;
|
|
case kOpCvt0f2a + 2:
|
|
OpVsdEdqpCvtsi2sd(m, rde);
|
|
break;
|
|
case kOpCvt0f2a + 3:
|
|
OpVssEdqpCvtsi2ss(m, rde);
|
|
break;
|
|
case kOpCvtt0f2c + 0:
|
|
OpPpiWpsqCvttps2pi(m, rde);
|
|
break;
|
|
case kOpCvtt0f2c + 1:
|
|
OpPpiWpdCvttpd2pi(m, rde);
|
|
break;
|
|
case kOpCvtt0f2c + 2:
|
|
OpGdqpWsdCvttsd2si(m, rde);
|
|
break;
|
|
case kOpCvtt0f2c + 3:
|
|
OpGdqpWssCvttss2si(m, rde);
|
|
break;
|
|
case kOpCvt0f2d + 0:
|
|
OpPpiWpsqCvtps2pi(m, rde);
|
|
break;
|
|
case kOpCvt0f2d + 1:
|
|
OpPpiWpdCvtpd2pi(m, rde);
|
|
break;
|
|
case kOpCvt0f2d + 2:
|
|
OpGdqpWsdCvtsd2si(m, rde);
|
|
break;
|
|
case kOpCvt0f2d + 3:
|
|
OpGdqpWssCvtss2si(m, rde);
|
|
break;
|
|
case kOpCvt0f5a + 0:
|
|
OpVpdWpsCvtps2pd(m, rde);
|
|
break;
|
|
case kOpCvt0f5a + 1:
|
|
OpVpsWpdCvtpd2ps(m, rde);
|
|
break;
|
|
case kOpCvt0f5a + 2:
|
|
OpVssWsdCvtsd2ss(m, rde);
|
|
break;
|
|
case kOpCvt0f5a + 3:
|
|
OpVsdWssCvtss2sd(m, rde);
|
|
break;
|
|
case kOpCvt0f5b + 0:
|
|
OpVpsWdqCvtdq2ps(m, rde);
|
|
break;
|
|
case kOpCvt0f5b + 1:
|
|
OpVdqWpsCvtps2dq(m, rde);
|
|
break;
|
|
case kOpCvt0f5b + 3:
|
|
OpVdqWpsCvttps2dq(m, rde);
|
|
break;
|
|
case kOpCvt0fE6 + 1:
|
|
OpVdqWpdCvtpd2dq(m, rde);
|
|
break;
|
|
case kOpCvt0fE6 + 2:
|
|
OpVdqWpdCvttpd2dq(m, rde);
|
|
break;
|
|
case kOpCvt0fE6 + 3:
|
|
OpVpdWdqCvtdq2pd(m, rde);
|
|
break;
|
|
default:
|
|
OpUd(m, rde);
|
|
}
|
|
}
|
|
|
|
void OpCvt0f2a(struct Machine *m, uint32_t rde) {
|
|
OpCvt(m, rde, kOpCvt0f2a);
|
|
}
|
|
|
|
void OpCvtt0f2c(struct Machine *m, uint32_t rde) {
|
|
OpCvt(m, rde, kOpCvtt0f2c);
|
|
}
|
|
|
|
void OpCvt0f2d(struct Machine *m, uint32_t rde) {
|
|
OpCvt(m, rde, kOpCvt0f2d);
|
|
}
|
|
|
|
void OpCvt0f5a(struct Machine *m, uint32_t rde) {
|
|
OpCvt(m, rde, kOpCvt0f5a);
|
|
}
|
|
|
|
void OpCvt0f5b(struct Machine *m, uint32_t rde) {
|
|
OpCvt(m, rde, kOpCvt0f5b);
|
|
}
|
|
|
|
void OpCvt0fE6(struct Machine *m, uint32_t rde) {
|
|
OpCvt(m, rde, kOpCvt0fE6);
|
|
}
|