cosmopolitan/tool/build/lib/cvt.c
Justine Tunney 416fd86676 Make improvements
- Emulator can now test the αcτµαlly pδrταblε εxεcµταblε bootloader

- Whipped up a webserver named redbean. It services 150k requests per
  second on a single core. Bundling assets inside zip enables extremely
  fast serving for two reasons. The first is that zip central directory
  lookups go faster than stat() system calls. The second is that both
  zip and gzip content-encoding use DEFLATE, therefore, compressed
  responses can be served via the sendfile() system call which does an
  in-kernel copy directly from the zip executable structure. Also note
  that red bean zip executables can be deployed easily to all platforms,
  since these native executables work on Linux, Mac, BSD, and Windows.

- Address sanitizer now works very well
2020-09-14 00:02:34 -07:00

390 lines
11 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2020 Justine Alexandra Roberts Tunney │
│ │
│ This program is free software; you can redistribute it and/or modify │
│ it under the terms of the GNU General Public License as published by │
│ the Free Software Foundation; version 2 of the License. │
│ │
│ This program is distributed in the hope that it will be useful, but │
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
│ General Public License for more details. │
│ │
│ You should have received a copy of the GNU General Public License │
│ along with this program; if not, write to the Free Software │
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
│ 02110-1301 USA │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/macros.h"
#include "libc/math.h"
#include "tool/build/lib/cvt.h"
#include "tool/build/lib/endian.h"
#include "tool/build/lib/machine.h"
#include "tool/build/lib/memory.h"
#include "tool/build/lib/modrm.h"
#include "tool/build/lib/throw.h"
#define kOpCvt0f2a 0
#define kOpCvtt0f2c 4
#define kOpCvt0f2d 8
#define kOpCvt0f5a 12
#define kOpCvt0f5b 16
#define kOpCvt0fE6 20
static double SseRoundDouble(struct Machine *m, double x) {
switch (m->sse.rc) {
case 0:
return rint(x);
case 1:
return floor(x);
case 2:
return ceil(x);
case 3:
return trunc(x);
default:
unreachable;
}
}
static void OpGdqpWssCvttss2si(struct Machine *m, uint32_t rde) {
float f;
int64_t n;
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
n = f;
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpGdqpWsdCvttsd2si(struct Machine *m, uint32_t rde) {
double d;
int64_t n;
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
n = d;
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpGdqpWssCvtss2si(struct Machine *m, uint32_t rde) {
float f;
int64_t n;
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
n = rintf(f);
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpGdqpWsdCvtsd2si(struct Machine *m, uint32_t rde) {
double d;
int64_t n;
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
n = SseRoundDouble(m, d);
if (!Rexw(rde)) n &= 0xffffffff;
Write64(RegRexrReg(m, rde), n);
}
static void OpVssEdqpCvtsi2ss(struct Machine *m, uint32_t rde) {
float f;
int64_t n;
uint8_t *p;
if (Rexw(rde)) {
n = (int64_t)Read64(GetModrmRegisterWordPointerRead8(m, rde));
} else {
n = (int32_t)Read32(GetModrmRegisterWordPointerRead4(m, rde));
}
f = n;
memcpy(XmmRexrReg(m, rde), &f, 4);
}
static void OpVsdEdqpCvtsi2sd(struct Machine *m, uint32_t rde) {
double d;
int64_t n;
uint8_t *p;
if (Rexw(rde)) {
n = (int64_t)Read64(GetModrmRegisterWordPointerRead8(m, rde));
} else {
n = (int32_t)Read32(GetModrmRegisterWordPointerRead4(m, rde));
}
d = n;
memcpy(XmmRexrReg(m, rde), &d, 8);
}
static void OpVpsQpiCvtpi2ps(struct Machine *m, uint32_t rde) {
uint8_t *p;
float f[2];
int32_t i[2];
p = GetModrmRegisterMmPointerRead8(m, rde);
i[0] = Read32(p + 0);
i[1] = Read32(p + 4);
f[0] = i[0];
f[1] = i[1];
memcpy(XmmRexrReg(m, rde), f, 8);
}
static void OpVpdQpiCvtpi2pd(struct Machine *m, uint32_t rde) {
uint8_t *p;
double f[2];
int32_t n[2];
p = GetModrmRegisterMmPointerRead8(m, rde);
n[0] = Read32(p + 0);
n[1] = Read32(p + 4);
f[0] = n[0];
f[1] = n[1];
memcpy(XmmRexrReg(m, rde), f, 16);
}
static void OpPpiWpsqCvtps2pi(struct Machine *m, uint32_t rde) {
unsigned i;
float f[2];
int32_t n[2];
memcpy(f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
switch (m->sse.rc) {
case 0:
for (i = 0; i < 2; ++i) n[i] = rintf(f[i]);
break;
case 1:
for (i = 0; i < 2; ++i) n[i] = floorf(f[i]);
break;
case 2:
for (i = 0; i < 2; ++i) n[i] = ceilf(f[i]);
break;
case 3:
for (i = 0; i < 2; ++i) n[i] = truncf(f[i]);
break;
default:
unreachable;
}
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpPpiWpsqCvttps2pi(struct Machine *m, uint32_t rde) {
float f[2];
int32_t n[2];
memcpy(&f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
n[0] = f[0];
n[1] = f[1];
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpPpiWpdCvtpd2pi(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 2; ++i) n[i] = SseRoundDouble(m, d[i]);
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpPpiWpdCvttpd2pi(struct Machine *m, uint32_t rde) {
double d[2];
int32_t n[2];
memcpy(&d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
n[0] = d[0];
n[1] = d[1];
Write32(MmReg(m, rde) + 0, n[0]);
Write32(MmReg(m, rde) + 4, n[1]);
}
static void OpVpdWpsCvtps2pd(struct Machine *m, uint32_t rde) {
float f[2];
double d[2];
memcpy(f, GetModrmRegisterXmmPointerRead8(m, rde), 8);
d[0] = f[0];
d[1] = f[1];
memcpy(XmmRexrReg(m, rde), d, 16);
}
static void OpVpsWpdCvtpd2ps(struct Machine *m, uint32_t rde) {
float f[2];
double d[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
f[0] = d[0];
f[1] = d[1];
memcpy(XmmRexrReg(m, rde), f, 8);
}
static void OpVssWsdCvtsd2ss(struct Machine *m, uint32_t rde) {
float f;
double d;
memcpy(&d, GetModrmRegisterXmmPointerRead8(m, rde), 8);
f = d;
memcpy(XmmRexrReg(m, rde), &f, 4);
}
static void OpVsdWssCvtss2sd(struct Machine *m, uint32_t rde) {
float f;
double d;
memcpy(&f, GetModrmRegisterXmmPointerRead4(m, rde), 4);
d = f;
memcpy(XmmRexrReg(m, rde), &d, 8);
}
static void OpVpsWdqCvtdq2ps(struct Machine *m, uint32_t rde) {
unsigned i;
float f[4];
int32_t n[4];
memcpy(n, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 4; ++i) f[i] = n[i];
memcpy(XmmRexrReg(m, rde), f, 16);
}
static void OpVpdWdqCvtdq2pd(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(n, GetModrmRegisterXmmPointerRead8(m, rde), 8);
for (i = 0; i < 2; ++i) d[i] = n[i];
memcpy(XmmRexrReg(m, rde), d, 16);
}
static void OpVdqWpsCvttps2dq(struct Machine *m, uint32_t rde) {
unsigned i;
float f[4];
int32_t n[4];
memcpy(f, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 4; ++i) n[i] = f[i];
memcpy(XmmRexrReg(m, rde), n, 16);
}
static void OpVdqWpsCvtps2dq(struct Machine *m, uint32_t rde) {
unsigned i;
float f[4];
int32_t n[4];
memcpy(f, GetModrmRegisterXmmPointerRead16(m, rde), 16);
switch (m->sse.rc) {
case 0:
for (i = 0; i < 4; ++i) n[i] = rintf(f[i]);
break;
case 1:
for (i = 0; i < 4; ++i) n[i] = floorf(f[i]);
break;
case 2:
for (i = 0; i < 4; ++i) n[i] = ceilf(f[i]);
break;
case 3:
for (i = 0; i < 4; ++i) n[i] = truncf(f[i]);
break;
default:
unreachable;
}
memcpy(XmmRexrReg(m, rde), n, 16);
}
static void OpVdqWpdCvttpd2dq(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 2; ++i) n[i] = d[i];
memcpy(XmmRexrReg(m, rde), n, 8);
}
static void OpVdqWpdCvtpd2dq(struct Machine *m, uint32_t rde) {
unsigned i;
double d[2];
int32_t n[2];
memcpy(d, GetModrmRegisterXmmPointerRead16(m, rde), 16);
for (i = 0; i < 2; ++i) n[i] = SseRoundDouble(m, d[i]);
memcpy(XmmRexrReg(m, rde), n, 8);
}
static void OpCvt(struct Machine *m, uint32_t rde, unsigned long op) {
switch (op | Rep(rde) | Osz(rde)) {
case kOpCvt0f2a + 0:
OpVpsQpiCvtpi2ps(m, rde);
break;
case kOpCvt0f2a + 1:
OpVpdQpiCvtpi2pd(m, rde);
break;
case kOpCvt0f2a + 2:
OpVsdEdqpCvtsi2sd(m, rde);
break;
case kOpCvt0f2a + 3:
OpVssEdqpCvtsi2ss(m, rde);
break;
case kOpCvtt0f2c + 0:
OpPpiWpsqCvttps2pi(m, rde);
break;
case kOpCvtt0f2c + 1:
OpPpiWpdCvttpd2pi(m, rde);
break;
case kOpCvtt0f2c + 2:
OpGdqpWsdCvttsd2si(m, rde);
break;
case kOpCvtt0f2c + 3:
OpGdqpWssCvttss2si(m, rde);
break;
case kOpCvt0f2d + 0:
OpPpiWpsqCvtps2pi(m, rde);
break;
case kOpCvt0f2d + 1:
OpPpiWpdCvtpd2pi(m, rde);
break;
case kOpCvt0f2d + 2:
OpGdqpWsdCvtsd2si(m, rde);
break;
case kOpCvt0f2d + 3:
OpGdqpWssCvtss2si(m, rde);
break;
case kOpCvt0f5a + 0:
OpVpdWpsCvtps2pd(m, rde);
break;
case kOpCvt0f5a + 1:
OpVpsWpdCvtpd2ps(m, rde);
break;
case kOpCvt0f5a + 2:
OpVssWsdCvtsd2ss(m, rde);
break;
case kOpCvt0f5a + 3:
OpVsdWssCvtss2sd(m, rde);
break;
case kOpCvt0f5b + 0:
OpVpsWdqCvtdq2ps(m, rde);
break;
case kOpCvt0f5b + 1:
OpVdqWpsCvtps2dq(m, rde);
break;
case kOpCvt0f5b + 3:
OpVdqWpsCvttps2dq(m, rde);
break;
case kOpCvt0fE6 + 1:
OpVdqWpdCvtpd2dq(m, rde);
break;
case kOpCvt0fE6 + 2:
OpVdqWpdCvttpd2dq(m, rde);
break;
case kOpCvt0fE6 + 3:
OpVpdWdqCvtdq2pd(m, rde);
break;
default:
OpUd(m, rde);
}
}
void OpCvt0f2a(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f2a);
}
void OpCvtt0f2c(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvtt0f2c);
}
void OpCvt0f2d(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f2d);
}
void OpCvt0f5a(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f5a);
}
void OpCvt0f5b(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0f5b);
}
void OpCvt0fE6(struct Machine *m, uint32_t rde) {
OpCvt(m, rde, kOpCvt0fE6);
}