mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 19:43:32 +00:00
1575 lines
44 KiB
C
1575 lines
44 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/macros.internal.h"
|
|
#include "libc/str/str.h"
|
|
#include "tool/build/lib/endian.h"
|
|
#include "tool/build/lib/machine.h"
|
|
#include "tool/build/lib/modrm.h"
|
|
#include "tool/build/lib/throw.h"
|
|
|
|
static void MmxPor(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] |= y[i];
|
|
}
|
|
}
|
|
|
|
static void MmxPxor(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] ^= y[i];
|
|
}
|
|
}
|
|
|
|
static void MmxPsubb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] -= y[i];
|
|
}
|
|
}
|
|
|
|
static void MmxPaddb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] += y[i];
|
|
}
|
|
}
|
|
|
|
static void MmxPand(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] &= y[i];
|
|
}
|
|
}
|
|
|
|
static void MmxPandn(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = ~x[i] & y[i];
|
|
}
|
|
}
|
|
|
|
static void MmxPavgb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = (x[i] + y[i] + 1) >> 1;
|
|
}
|
|
}
|
|
|
|
static void MmxPabsb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = ABS((int8_t)y[i]);
|
|
}
|
|
}
|
|
|
|
static void MmxPminub(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = MIN(x[i], y[i]);
|
|
}
|
|
}
|
|
|
|
static void MmxPmaxub(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = MAX(x[i], y[i]);
|
|
}
|
|
}
|
|
|
|
static void MmxPaddusb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = MIN(255, x[i] + y[i]);
|
|
}
|
|
}
|
|
|
|
static void MmxPsubusb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = MIN(255, MAX(0, x[i] - y[i]));
|
|
}
|
|
}
|
|
|
|
static void MmxPcmpeqb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = -(x[i] == y[i]);
|
|
}
|
|
}
|
|
|
|
static void MmxPcmpgtb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = -((int8_t)x[i] > (int8_t)y[i]);
|
|
}
|
|
}
|
|
|
|
static void MmxPsubsb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = MAX(-128, MIN(127, (int8_t)x[i] - (int8_t)y[i]));
|
|
}
|
|
}
|
|
|
|
static void MmxPaddsb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
x[i] = MAX(-128, MIN(127, (int8_t)x[i] + (int8_t)y[i]));
|
|
}
|
|
}
|
|
|
|
static void MmxPmulhrsw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
int16_t a, b;
|
|
for (i = 0; i < 4; ++i) {
|
|
a = Read16(x + i * 2);
|
|
b = Read16(y + i * 2);
|
|
Write16(x + i * 2, (((a * b) >> 14) + 1) >> 1);
|
|
}
|
|
}
|
|
|
|
static void MmxPmaddubsw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2,
|
|
MAX(-32768, MIN(32767, (x[i * 2 + 0] * (int8_t)y[i * 2 + 0] +
|
|
x[i * 2 + 1] * (int8_t)y[i * 2 + 1]))));
|
|
}
|
|
}
|
|
|
|
static void MmxPsraw(uint8_t x[8], unsigned k) {
|
|
unsigned i;
|
|
if (k > 15) k = 15;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, (int16_t)Read16(x + i * 2) >> k);
|
|
}
|
|
}
|
|
|
|
static void MmxPsrad(uint8_t x[8], unsigned k) {
|
|
unsigned i;
|
|
if (k > 31) k = 31;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, (int32_t)Read32(x + i * 4) >> k);
|
|
}
|
|
}
|
|
|
|
static void MmxPsrlw(uint8_t x[8], unsigned k) {
|
|
unsigned i;
|
|
if (k < 16) {
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) >> k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsllw(uint8_t x[8], unsigned k) {
|
|
unsigned i;
|
|
if (k <= 15) {
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) << k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsrld(uint8_t x[8], unsigned k) {
|
|
unsigned i;
|
|
if (k <= 31) {
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) >> k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPslld(uint8_t x[8], unsigned k) {
|
|
unsigned i;
|
|
if (k <= 31) {
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) << k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsrlq(uint8_t x[8], unsigned k) {
|
|
if (k <= 63) {
|
|
Write64(x, Read64(x) >> k);
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsllq(uint8_t x[8], unsigned k) {
|
|
if (k <= 63) {
|
|
Write64(x, Read64(x) << k);
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPslldq(uint8_t x[8], unsigned k) {
|
|
unsigned i;
|
|
uint8_t t[8];
|
|
if (k > 8) k = 8;
|
|
for (i = 0; i < k; ++i) t[i] = 0;
|
|
for (i = 0; i < 8 - k; ++i) t[k + i] = x[i];
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPsrldq(uint8_t x[8], unsigned k) {
|
|
uint8_t t[8];
|
|
if (k > 8) k = 8;
|
|
memcpy(t, x + k, 8 - k);
|
|
memset(t + (8 - k), 0, k);
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPalignr(uint8_t x[8], const uint8_t y[8], unsigned k) {
|
|
uint8_t t[24];
|
|
memcpy(t, y, 8);
|
|
memcpy(t + 8, x, 8);
|
|
memset(t + 16, 0, 8);
|
|
memcpy(x, t + MIN(k, 16), 8);
|
|
}
|
|
|
|
static void MmxPsubw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) - Read16(y + i * 2));
|
|
}
|
|
}
|
|
|
|
static void MmxPaddw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) + Read16(y + i * 2));
|
|
}
|
|
}
|
|
|
|
static void MmxPsubd(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) - Read32(y + i * 4));
|
|
}
|
|
}
|
|
|
|
static void MmxPaddd(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) + Read32(y + i * 4));
|
|
}
|
|
}
|
|
|
|
static void MmxPaddq(uint8_t x[8], const uint8_t y[8]) {
|
|
Write64(x, Read64(x) + Read64(y));
|
|
}
|
|
|
|
static void MmxPsubq(uint8_t x[8], const uint8_t y[8]) {
|
|
Write64(x, Read64(x) - Read64(y));
|
|
}
|
|
|
|
static void MmxPaddsw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + i * 2) +
|
|
(int16_t)Read16(y + i * 2)))));
|
|
}
|
|
}
|
|
|
|
static void MmxPsubsw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + i * 2) -
|
|
(int16_t)Read16(y + i * 2)))));
|
|
}
|
|
}
|
|
|
|
static void MmxPaddusw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, MIN(65535, Read16(x + i * 2) + Read16(y + i * 2)));
|
|
}
|
|
}
|
|
|
|
static void MmxPsubusw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2,
|
|
MIN(65535, MAX(0, Read16(x + i * 2) - Read16(y + i * 2))));
|
|
}
|
|
}
|
|
|
|
static void MmxPminsw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2,
|
|
MIN((int16_t)Read16(x + i * 2), (int16_t)Read16(y + i * 2)));
|
|
}
|
|
}
|
|
|
|
static void MmxPmaxsw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2,
|
|
MAX((int16_t)Read16(x + i * 2), (int16_t)Read16(y + i * 2)));
|
|
}
|
|
}
|
|
|
|
static void MmxPackuswb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint8_t t[8];
|
|
for (i = 0; i < 4; ++i) {
|
|
t[i + 0] = MIN(255, MAX(0, (int16_t)Read16(x + i * 2)));
|
|
}
|
|
for (i = 0; i < 4; ++i) {
|
|
t[i + 4] = MIN(255, MAX(0, (int16_t)Read16(y + i * 2)));
|
|
}
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPacksswb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint8_t t[8];
|
|
for (i = 0; i < 4; ++i) {
|
|
t[i + 0] = MAX(-128, MIN(127, (int16_t)Read16(x + i * 2)));
|
|
}
|
|
for (i = 0; i < 4; ++i) {
|
|
t[i + 4] = MAX(-128, MIN(127, (int16_t)Read16(y + i * 2)));
|
|
}
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPackssdw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint8_t t[8];
|
|
for (i = 0; i < 2; ++i) {
|
|
Write16(t + i * 2 + 0, MAX(-32768, MIN(32767, (int32_t)Read32(x + i * 4))));
|
|
}
|
|
for (i = 0; i < 2; ++i) {
|
|
Write16(t + i * 2 + 4, MAX(-32768, MIN(32767, (int32_t)Read32(y + i * 4))));
|
|
}
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPcmpgtw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2,
|
|
-((int16_t)Read16(x + i * 2) > (int16_t)Read16(y + i * 2)));
|
|
}
|
|
}
|
|
|
|
static void MmxPcmpeqw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, -(Read16(x + i * 2) == Read16(y + i * 2)));
|
|
}
|
|
}
|
|
|
|
static void MmxPcmpgtd(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4,
|
|
-((int32_t)Read32(x + i * 4) > (int32_t)Read32(y + i * 4)));
|
|
}
|
|
}
|
|
|
|
static void MmxPcmpeqd(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, -(Read32(x + i * 4) == Read32(y + i * 4)));
|
|
}
|
|
}
|
|
|
|
static void MmxPsrawv(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k > 15) k = 15;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, (int16_t)Read16(x + i * 2) >> k);
|
|
}
|
|
}
|
|
|
|
static void MmxPsradv(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k > 31) k = 31;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, (int32_t)Read32(x + i * 4) >> k);
|
|
}
|
|
}
|
|
|
|
static void MmxPsrlwv(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k < 16) {
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) >> k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsllwv(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k < 16) {
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) << k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsrldv(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k < 32) {
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) >> k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPslldv(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k < 32) {
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) << k);
|
|
}
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsrlqv(uint8_t x[8], const uint8_t y[8]) {
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k < 64) {
|
|
Write64(x, Read64(x) >> k);
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPsllqv(uint8_t x[8], const uint8_t y[8]) {
|
|
uint64_t k;
|
|
k = Read64(y);
|
|
if (k < 64) {
|
|
Write64(x, Read64(x) << k);
|
|
} else {
|
|
memset(x, 0, 8);
|
|
}
|
|
}
|
|
|
|
static void MmxPavgw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, (Read16(x + i * 2) + Read16(y + i * 2) + 1) >> 1);
|
|
}
|
|
}
|
|
|
|
static void MmxPsadbw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i, s, t;
|
|
for (s = i = 0; i < 4; ++i) s += ABS(x[i] - y[i]);
|
|
for (t = 0; i < 8; ++i) t += ABS(x[i] - y[i]);
|
|
Write32(x + 0, s);
|
|
Write32(x + 4, t);
|
|
}
|
|
|
|
static void MmxPmaddwd(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4,
|
|
((int16_t)Read16(x + i * 4 + 0) * (int16_t)Read16(y + i * 4 + 0) +
|
|
(int16_t)Read16(x + i * 4 + 2) * (int16_t)Read16(y + i * 4 + 2)));
|
|
}
|
|
}
|
|
|
|
static void MmxPmulhuw(uint8_t x[8], const uint8_t y[8]) {
|
|
uint32_t v;
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
v = Read16(x + i * 2);
|
|
v *= Read16(y + i * 2);
|
|
v >>= 16;
|
|
Write16(x + i * 2, v);
|
|
}
|
|
}
|
|
|
|
static void MmxPmulhw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2,
|
|
((int16_t)Read16(x + i * 2) * (int16_t)Read16(y + i * 2)) >> 16);
|
|
}
|
|
}
|
|
|
|
static void MmxPmuludq(uint8_t x[8], const uint8_t y[8]) {
|
|
Write64(x, (uint64_t)Read32(x) * Read32(y));
|
|
}
|
|
|
|
static void MmxPmullw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, (int16_t)Read16(x + i * 2) * (int16_t)Read16(y + i * 2));
|
|
}
|
|
}
|
|
|
|
static void MmxPmulld(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) * Read32(y + i * 4));
|
|
}
|
|
}
|
|
|
|
static void MmxPshufb(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
uint8_t t[8];
|
|
for (i = 0; i < 8; ++i) {
|
|
t[i] = (y[i] & 128) ? 0 : x[y[i] & 7];
|
|
}
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPsignb(uint8_t x[8], const uint8_t y[8]) {
|
|
int v;
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
v = (int8_t)y[i];
|
|
if (!v) {
|
|
x[i] = 0;
|
|
} else if (v < 0) {
|
|
x[i] = -(int8_t)x[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void MmxPsignw(uint8_t x[8], const uint8_t y[8]) {
|
|
int v;
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
v = (int16_t)Read16(y + i * 2);
|
|
if (!v) {
|
|
Write16(x + i * 2, 0);
|
|
} else if (v < 0) {
|
|
Write16(x + i * 2, -(int16_t)Read16(x + i * 2));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void MmxPsignd(uint8_t x[8], const uint8_t y[8]) {
|
|
int32_t v;
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
v = Read32(y + i * 4);
|
|
if (!v) {
|
|
Write32(x + i * 4, 0);
|
|
} else if (v < 0) {
|
|
Write32(x + i * 4, -Read32(x + i * 4));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void MmxPabsw(uint8_t x[8], const uint8_t y[8]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(x + i * 2, ABS((int16_t)Read16(y + i * 2)));
|
|
}
|
|
}
|
|
|
|
static void MmxPabsd(uint8_t x[8], const uint8_t y[8]) {
|
|
int32_t v;
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
v = Read32(y + i * 4);
|
|
Write32(x + i * 4, v >= 0 ? v : -(uint32_t)v);
|
|
}
|
|
}
|
|
|
|
static void MmxPhaddw(uint8_t x[8], const uint8_t y[8]) {
|
|
uint8_t t[8];
|
|
Write16(t + 0 * 2, Read16(x + 0 * 2) + Read16(x + 1 * 2));
|
|
Write16(t + 1 * 2, Read16(x + 2 * 2) + Read16(x + 3 * 2));
|
|
Write16(t + 2 * 2, Read16(y + 0 * 2) + Read16(y + 1 * 2));
|
|
Write16(t + 3 * 2, Read16(y + 2 * 2) + Read16(y + 3 * 2));
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPhsubw(uint8_t x[8], const uint8_t y[8]) {
|
|
uint8_t t[8];
|
|
Write16(t + 0 * 2, Read16(x + 0 * 2) - Read16(x + 1 * 2));
|
|
Write16(t + 1 * 2, Read16(x + 2 * 2) - Read16(x + 3 * 2));
|
|
Write16(t + 2 * 2, Read16(y + 0 * 2) - Read16(y + 1 * 2));
|
|
Write16(t + 3 * 2, Read16(y + 2 * 2) - Read16(y + 3 * 2));
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPhaddd(uint8_t x[8], const uint8_t y[8]) {
|
|
uint8_t t[8];
|
|
Write32(t + 0 * 4, Read32(x + 0 * 4) + Read32(x + 1 * 4));
|
|
Write32(t + 1 * 4, Read32(y + 0 * 4) + Read32(y + 1 * 4));
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPhsubd(uint8_t x[8], const uint8_t y[8]) {
|
|
uint8_t t[8];
|
|
Write32(t + 0 * 4, Read32(x + 0 * 4) - Read32(x + 1 * 4));
|
|
Write32(t + 1 * 4, Read32(y + 0 * 4) - Read32(y + 1 * 4));
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPhaddsw(uint8_t x[8], const uint8_t y[8]) {
|
|
uint8_t t[8];
|
|
Write16(t + 0 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 0 * 2) +
|
|
(int16_t)Read16(x + 1 * 2)))));
|
|
Write16(t + 1 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 2 * 2) +
|
|
(int16_t)Read16(x + 3 * 2)))));
|
|
Write16(t + 2 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 0 * 2) +
|
|
(int16_t)Read16(y + 1 * 2)))));
|
|
Write16(t + 3 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 2 * 2) +
|
|
(int16_t)Read16(y + 3 * 2)))));
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPhsubsw(uint8_t x[8], const uint8_t y[8]) {
|
|
uint8_t t[8];
|
|
Write16(t + 0 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 0 * 2) -
|
|
(int16_t)Read16(x + 1 * 2)))));
|
|
Write16(t + 1 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 2 * 2) -
|
|
(int16_t)Read16(x + 3 * 2)))));
|
|
Write16(t + 2 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 0 * 2) -
|
|
(int16_t)Read16(x + 1 * 2)))));
|
|
Write16(t + 3 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 2 * 2) -
|
|
(int16_t)Read16(y + 3 * 2)))));
|
|
memcpy(x, t, 8);
|
|
}
|
|
|
|
static void MmxPunpcklbw(uint8_t x[8], const uint8_t y[8]) {
|
|
x[7] = y[3];
|
|
x[6] = x[3];
|
|
x[5] = y[2];
|
|
x[4] = x[2];
|
|
x[3] = y[1];
|
|
x[2] = x[1];
|
|
x[1] = y[0];
|
|
x[0] = x[0];
|
|
}
|
|
|
|
static void MmxPunpckhbw(uint8_t x[8], const uint8_t y[8]) {
|
|
x[0] = x[4];
|
|
x[1] = y[4];
|
|
x[2] = x[5];
|
|
x[3] = y[5];
|
|
x[4] = x[6];
|
|
x[5] = y[6];
|
|
x[6] = x[7];
|
|
x[7] = y[7];
|
|
}
|
|
|
|
static void MmxPunpcklwd(uint8_t x[8], const uint8_t y[8]) {
|
|
x[6] = y[2];
|
|
x[7] = y[3];
|
|
x[4] = x[2];
|
|
x[5] = x[3];
|
|
x[2] = y[0];
|
|
x[3] = y[1];
|
|
x[0] = x[0];
|
|
x[1] = x[1];
|
|
}
|
|
|
|
static void MmxPunpckldq(uint8_t x[8], const uint8_t y[8]) {
|
|
x[4] = y[0];
|
|
x[5] = y[1];
|
|
x[6] = y[2];
|
|
x[7] = y[3];
|
|
x[0] = x[0];
|
|
x[1] = x[1];
|
|
x[2] = x[2];
|
|
x[3] = x[3];
|
|
}
|
|
|
|
static void MmxPunpckhwd(uint8_t x[8], const uint8_t y[8]) {
|
|
x[0] = x[4];
|
|
x[1] = x[5];
|
|
x[2] = y[4];
|
|
x[3] = y[5];
|
|
x[4] = x[6];
|
|
x[5] = x[7];
|
|
x[6] = y[6];
|
|
x[7] = y[7];
|
|
}
|
|
|
|
static void MmxPunpckhdq(uint8_t x[8], const uint8_t y[8]) {
|
|
x[0] = x[4];
|
|
x[1] = x[5];
|
|
x[2] = x[6];
|
|
x[3] = x[7];
|
|
x[4] = y[4];
|
|
x[5] = y[5];
|
|
x[6] = y[6];
|
|
x[7] = y[7];
|
|
}
|
|
|
|
static void MmxPunpcklqdq(uint8_t x[8], const uint8_t y[8]) {
|
|
}
|
|
|
|
static void MmxPunpckhqdq(uint8_t x[8], const uint8_t y[8]) {
|
|
}
|
|
|
|
static void SsePsubb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] -= y[i];
|
|
}
|
|
}
|
|
|
|
static void SsePaddb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] += y[i];
|
|
}
|
|
}
|
|
|
|
static void SsePor(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] |= y[i];
|
|
}
|
|
}
|
|
|
|
static void SsePxor(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] ^= y[i];
|
|
}
|
|
}
|
|
|
|
static void SsePand(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] &= y[i];
|
|
}
|
|
}
|
|
|
|
static void SsePandn(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] = ~x[i] & y[i];
|
|
}
|
|
}
|
|
|
|
static void SsePcmpeqb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] = -(x[i] == y[i]);
|
|
}
|
|
}
|
|
|
|
static void SsePcmpgtb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] = -((int8_t)x[i] > (int8_t)y[i]);
|
|
}
|
|
}
|
|
|
|
static void SsePavgb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] = (x[i] + y[i] + 1) >> 1;
|
|
}
|
|
}
|
|
|
|
static void SsePabsb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] = ABS((int8_t)y[i]);
|
|
}
|
|
}
|
|
|
|
static void SsePminub(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] = MIN(x[i], y[i]);
|
|
}
|
|
}
|
|
|
|
static void SsePmaxub(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 16; ++i) {
|
|
x[i] = MAX(x[i], y[i]);
|
|
}
|
|
}
|
|
|
|
static void SsePslldq(uint8_t x[16], unsigned k) {
|
|
unsigned i;
|
|
uint8_t t[16];
|
|
if (k > 16) k = 16;
|
|
for (i = 0; i < k; ++i) t[i] = 0;
|
|
for (i = 0; i < 16 - k; ++i) t[k + i] = x[i];
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePsrldq(uint8_t x[16], unsigned k) {
|
|
uint8_t t[16];
|
|
if (k > 16) k = 16;
|
|
memcpy(t, x + k, 16 - k);
|
|
memset(t + (16 - k), 0, k);
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePalignr(uint8_t x[16], const uint8_t y[16], unsigned k) {
|
|
uint8_t t[48];
|
|
memcpy(t, y, 16);
|
|
memcpy(t + 16, x, 16);
|
|
memset(t + 32, 0, 16);
|
|
memcpy(x, t + MIN(k, 32), 16);
|
|
}
|
|
|
|
static void SsePsubw(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) - Read16(y + i * 2));
|
|
}
|
|
}
|
|
|
|
static void SsePaddw(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
Write16(x + i * 2, Read16(x + i * 2) + Read16(y + i * 2));
|
|
}
|
|
}
|
|
|
|
static void SsePsubd(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) - Read32(y + i * 4));
|
|
}
|
|
}
|
|
|
|
static void SsePaddd(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
Write32(x + i * 4, Read32(x + i * 4) + Read32(y + i * 4));
|
|
}
|
|
}
|
|
|
|
static void SsePaddq(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write64(x + i * 8, Read64(x + i * 8) + Read64(y + i * 8));
|
|
}
|
|
}
|
|
|
|
static void SsePsubq(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write64(x + i * 8, Read64(x + i * 8) - Read64(y + i * 8));
|
|
}
|
|
}
|
|
|
|
static void SsePaddusw(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 8; ++i) {
|
|
Write16(x + i * 2, MIN(65535, Read16(x + i * 2) + Read16(y + i * 2)));
|
|
}
|
|
}
|
|
|
|
static void SsePackuswb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
uint8_t t[16];
|
|
for (i = 0; i < 8; ++i) {
|
|
t[i + 0] = MIN(255, MAX(0, (int16_t)Read16(x + i * 2)));
|
|
}
|
|
for (i = 0; i < 8; ++i) {
|
|
t[i + 8] = MIN(255, MAX(0, (int16_t)Read16(y + i * 2)));
|
|
}
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePacksswb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
uint8_t t[16];
|
|
for (i = 0; i < 8; ++i) {
|
|
t[i + 0] = MAX(-128, MIN(127, (int16_t)Read16(x + i * 2)));
|
|
}
|
|
for (i = 0; i < 8; ++i) {
|
|
t[i + 8] = MAX(-128, MIN(127, (int16_t)Read16(y + i * 2)));
|
|
}
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePackssdw(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
uint8_t t[16];
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(t + i * 2 + 0, MAX(-32768, MIN(32767, (int32_t)Read32(x + i * 4))));
|
|
}
|
|
for (i = 0; i < 4; ++i) {
|
|
Write16(t + i * 2 + 8, MAX(-32768, MIN(32767, (int32_t)Read32(y + i * 4))));
|
|
}
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePsadbw(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i, s, t;
|
|
for (s = i = 0; i < 8; ++i) s += ABS(x[i] - y[i]);
|
|
for (t = 0; i < 16; ++i) t += ABS(x[i] - y[i]);
|
|
Write64(x + 0, s);
|
|
Write64(x + 8, t);
|
|
}
|
|
|
|
static void SsePmuludq(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
for (i = 0; i < 2; ++i) {
|
|
Write64(x + i * 8, (uint64_t)Read32(x + i * 8) * Read32(y + i * 8));
|
|
}
|
|
}
|
|
|
|
static void SsePshufb(uint8_t x[16], const uint8_t y[16]) {
|
|
unsigned i;
|
|
uint8_t t[16];
|
|
for (i = 0; i < 16; ++i) {
|
|
t[i] = (y[i] & 128) ? 0 : x[y[i] & 15];
|
|
}
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePhaddd(uint8_t x[16], const uint8_t y[16]) {
|
|
uint8_t t[16];
|
|
Write32(t + 0 * 4, Read32(x + 0 * 4) + Read32(x + 1 * 4));
|
|
Write32(t + 1 * 4, Read32(x + 2 * 4) + Read32(x + 3 * 4));
|
|
Write32(t + 2 * 4, Read32(y + 0 * 4) + Read32(y + 1 * 4));
|
|
Write32(t + 3 * 4, Read32(y + 2 * 4) + Read32(y + 3 * 4));
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePhsubd(uint8_t x[16], const uint8_t y[16]) {
|
|
uint8_t t[16];
|
|
Write32(t + 0 * 4, Read32(x + 0 * 4) - Read32(x + 1 * 4));
|
|
Write32(t + 1 * 4, Read32(x + 2 * 4) - Read32(x + 3 * 4));
|
|
Write32(t + 2 * 4, Read32(y + 0 * 4) - Read32(y + 1 * 4));
|
|
Write32(t + 3 * 4, Read32(y + 2 * 4) - Read32(y + 3 * 4));
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePhaddw(uint8_t x[16], const uint8_t y[16]) {
|
|
uint8_t t[16];
|
|
Write16(t + 0 * 2, Read16(x + 0 * 2) + Read16(x + 1 * 2));
|
|
Write16(t + 1 * 2, Read16(x + 2 * 2) + Read16(x + 3 * 2));
|
|
Write16(t + 2 * 2, Read16(x + 4 * 2) + Read16(x + 5 * 2));
|
|
Write16(t + 3 * 2, Read16(x + 6 * 2) + Read16(x + 7 * 2));
|
|
Write16(t + 4 * 2, Read16(y + 0 * 2) + Read16(y + 1 * 2));
|
|
Write16(t + 5 * 2, Read16(y + 2 * 2) + Read16(y + 3 * 2));
|
|
Write16(t + 6 * 2, Read16(y + 4 * 2) + Read16(y + 5 * 2));
|
|
Write16(t + 7 * 2, Read16(y + 6 * 2) + Read16(y + 7 * 2));
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePhsubw(uint8_t x[16], const uint8_t y[16]) {
|
|
uint8_t t[16];
|
|
Write16(t + 0 * 2, Read16(x + 0 * 2) - Read16(x + 1 * 2));
|
|
Write16(t + 1 * 2, Read16(x + 2 * 2) - Read16(x + 3 * 2));
|
|
Write16(t + 2 * 2, Read16(x + 4 * 2) - Read16(x + 5 * 2));
|
|
Write16(t + 3 * 2, Read16(x + 6 * 2) - Read16(x + 7 * 2));
|
|
Write16(t + 4 * 2, Read16(y + 0 * 2) - Read16(y + 1 * 2));
|
|
Write16(t + 5 * 2, Read16(y + 2 * 2) - Read16(y + 3 * 2));
|
|
Write16(t + 6 * 2, Read16(y + 4 * 2) - Read16(y + 5 * 2));
|
|
Write16(t + 7 * 2, Read16(y + 6 * 2) - Read16(y + 7 * 2));
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePhaddsw(uint8_t x[16], const uint8_t y[16]) {
|
|
uint8_t t[16];
|
|
Write16(t + 0 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 0 * 2) +
|
|
(int16_t)Read16(x + 1 * 2)))));
|
|
Write16(t + 1 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 2 * 2) +
|
|
(int16_t)Read16(x + 3 * 2)))));
|
|
Write16(t + 2 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 4 * 2) +
|
|
(int16_t)Read16(x + 5 * 2)))));
|
|
Write16(t + 3 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 6 * 2) +
|
|
(int16_t)Read16(x + 7 * 2)))));
|
|
Write16(t + 4 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 0 * 2) +
|
|
(int16_t)Read16(y + 1 * 2)))));
|
|
Write16(t + 5 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 2 * 2) +
|
|
(int16_t)Read16(y + 3 * 2)))));
|
|
Write16(t + 6 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 4 * 2) +
|
|
(int16_t)Read16(y + 5 * 2)))));
|
|
Write16(t + 7 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 6 * 2) +
|
|
(int16_t)Read16(y + 7 * 2)))));
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePhsubsw(uint8_t x[16], const uint8_t y[16]) {
|
|
uint8_t t[16];
|
|
Write16(t + 0 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 0 * 2) -
|
|
(int16_t)Read16(x + 1 * 2)))));
|
|
Write16(t + 1 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 2 * 2) -
|
|
(int16_t)Read16(x + 3 * 2)))));
|
|
Write16(t + 2 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 4 * 2) -
|
|
(int16_t)Read16(x + 5 * 2)))));
|
|
Write16(t + 3 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(x + 6 * 2) -
|
|
(int16_t)Read16(x + 7 * 2)))));
|
|
Write16(t + 4 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 0 * 2) -
|
|
(int16_t)Read16(y + 1 * 2)))));
|
|
Write16(t + 5 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 2 * 2) -
|
|
(int16_t)Read16(y + 3 * 2)))));
|
|
Write16(t + 6 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 4 * 2) -
|
|
(int16_t)Read16(y + 5 * 2)))));
|
|
Write16(t + 7 * 2, MAX(-32768, MIN(32767, ((int16_t)Read16(y + 6 * 2) -
|
|
(int16_t)Read16(y + 7 * 2)))));
|
|
memcpy(x, t, 16);
|
|
}
|
|
|
|
static void SsePunpcklbw(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0xf] = y[0x7];
|
|
x[0xe] = x[0x7];
|
|
x[0xd] = y[0x6];
|
|
x[0xc] = x[0x6];
|
|
x[0xb] = y[0x5];
|
|
x[0xa] = x[0x5];
|
|
x[0x9] = y[0x4];
|
|
x[0x8] = x[0x4];
|
|
x[0x7] = y[0x3];
|
|
x[0x6] = x[0x3];
|
|
x[0x5] = y[0x2];
|
|
x[0x4] = x[0x2];
|
|
x[0x3] = y[0x1];
|
|
x[0x2] = x[0x1];
|
|
x[0x1] = y[0x0];
|
|
x[0x0] = x[0x0];
|
|
}
|
|
|
|
static void SsePunpckhbw(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0x0] = x[0x8];
|
|
x[0x1] = y[0x8];
|
|
x[0x2] = x[0x9];
|
|
x[0x3] = y[0x9];
|
|
x[0x4] = x[0xa];
|
|
x[0x5] = y[0xa];
|
|
x[0x6] = x[0xb];
|
|
x[0x7] = y[0xb];
|
|
x[0x8] = x[0xc];
|
|
x[0x9] = y[0xc];
|
|
x[0xa] = x[0xd];
|
|
x[0xb] = y[0xd];
|
|
x[0xc] = x[0xe];
|
|
x[0xd] = y[0xe];
|
|
x[0xe] = x[0xf];
|
|
x[0xf] = y[0xf];
|
|
}
|
|
|
|
static void SsePunpcklwd(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0xe] = y[0x6];
|
|
x[0xf] = y[0x7];
|
|
x[0xc] = x[0x6];
|
|
x[0xd] = x[0x7];
|
|
x[0xa] = y[0x4];
|
|
x[0xb] = y[0x5];
|
|
x[0x8] = x[0x4];
|
|
x[0x9] = x[0x5];
|
|
x[0x6] = y[0x2];
|
|
x[0x7] = y[0x3];
|
|
x[0x4] = x[0x2];
|
|
x[0x5] = x[0x3];
|
|
x[0x2] = y[0x0];
|
|
x[0x3] = y[0x1];
|
|
x[0x0] = x[0x0];
|
|
x[0x1] = x[0x1];
|
|
}
|
|
|
|
static void SsePunpckldq(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0xc] = y[0x4];
|
|
x[0xd] = y[0x5];
|
|
x[0xe] = y[0x6];
|
|
x[0xf] = y[0x7];
|
|
x[0x8] = x[0x4];
|
|
x[0x9] = x[0x5];
|
|
x[0xa] = x[0x6];
|
|
x[0xb] = x[0x7];
|
|
x[0x4] = y[0x0];
|
|
x[0x5] = y[0x1];
|
|
x[0x6] = y[0x2];
|
|
x[0x7] = y[0x3];
|
|
x[0x0] = x[0x0];
|
|
x[0x1] = x[0x1];
|
|
x[0x2] = x[0x2];
|
|
x[0x3] = x[0x3];
|
|
}
|
|
|
|
static void SsePunpckhwd(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0x0] = x[0x8];
|
|
x[0x1] = x[0x9];
|
|
x[0x2] = y[0x8];
|
|
x[0x3] = y[0x9];
|
|
x[0x4] = x[0xa];
|
|
x[0x5] = x[0xb];
|
|
x[0x6] = y[0xa];
|
|
x[0x7] = y[0xb];
|
|
x[0x8] = x[0xc];
|
|
x[0x9] = x[0xd];
|
|
x[0xa] = y[0xc];
|
|
x[0xb] = y[0xd];
|
|
x[0xc] = x[0xe];
|
|
x[0xd] = x[0xf];
|
|
x[0xe] = y[0xe];
|
|
x[0xf] = y[0xf];
|
|
}
|
|
|
|
static void SsePunpckhdq(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0x0] = x[0x8];
|
|
x[0x1] = x[0x9];
|
|
x[0x2] = x[0xa];
|
|
x[0x3] = x[0xb];
|
|
x[0x4] = y[0x8];
|
|
x[0x5] = y[0x9];
|
|
x[0x6] = y[0xa];
|
|
x[0x7] = y[0xb];
|
|
x[0x8] = x[0xc];
|
|
x[0x9] = x[0xd];
|
|
x[0xa] = x[0xe];
|
|
x[0xb] = x[0xf];
|
|
x[0xc] = y[0xc];
|
|
x[0xd] = y[0xd];
|
|
x[0xe] = y[0xe];
|
|
x[0xf] = y[0xf];
|
|
}
|
|
|
|
static void SsePunpcklqdq(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0x8] = y[0x0];
|
|
x[0x9] = y[0x1];
|
|
x[0xa] = y[0x2];
|
|
x[0xb] = y[0x3];
|
|
x[0xc] = y[0x4];
|
|
x[0xd] = y[0x5];
|
|
x[0xe] = y[0x6];
|
|
x[0xf] = y[0x7];
|
|
x[0x0] = x[0x0];
|
|
x[0x1] = x[0x1];
|
|
x[0x2] = x[0x2];
|
|
x[0x3] = x[0x3];
|
|
x[0x4] = x[0x4];
|
|
x[0x5] = x[0x5];
|
|
x[0x6] = x[0x6];
|
|
x[0x7] = x[0x7];
|
|
}
|
|
|
|
static void SsePunpckhqdq(uint8_t x[16], const uint8_t y[16]) {
|
|
x[0x0] = x[0x8];
|
|
x[0x1] = x[0x9];
|
|
x[0x2] = x[0xa];
|
|
x[0x3] = x[0xb];
|
|
x[0x4] = x[0xc];
|
|
x[0x5] = x[0xd];
|
|
x[0x6] = x[0xe];
|
|
x[0x7] = x[0xf];
|
|
x[0x8] = y[0x8];
|
|
x[0x9] = y[0x9];
|
|
x[0xa] = y[0xa];
|
|
x[0xb] = y[0xb];
|
|
x[0xc] = y[0xc];
|
|
x[0xd] = y[0xd];
|
|
x[0xe] = y[0xe];
|
|
x[0xf] = y[0xf];
|
|
}
|
|
|
|
static void SsePsrlw(uint8_t x[16], unsigned k) {
|
|
MmxPsrlw(x + 0, k);
|
|
MmxPsrlw(x + 8, k);
|
|
}
|
|
|
|
static void SsePsraw(uint8_t x[16], unsigned k) {
|
|
MmxPsraw(x + 0, k);
|
|
MmxPsraw(x + 8, k);
|
|
}
|
|
|
|
static void SsePsllw(uint8_t x[16], unsigned k) {
|
|
MmxPsllw(x + 0, k);
|
|
MmxPsllw(x + 8, k);
|
|
}
|
|
|
|
static void SsePsrld(uint8_t x[16], unsigned k) {
|
|
MmxPsrld(x + 0, k);
|
|
MmxPsrld(x + 8, k);
|
|
}
|
|
|
|
static void SsePsrad(uint8_t x[16], unsigned k) {
|
|
MmxPsrad(x + 0, k);
|
|
MmxPsrad(x + 8, k);
|
|
}
|
|
|
|
static void SsePslld(uint8_t x[16], unsigned k) {
|
|
MmxPslld(x + 0, k);
|
|
MmxPslld(x + 8, k);
|
|
}
|
|
|
|
static void SsePsrlq(uint8_t x[16], unsigned k) {
|
|
MmxPsrlq(x + 0, k);
|
|
MmxPsrlq(x + 8, k);
|
|
}
|
|
|
|
static void SsePsllq(uint8_t x[16], unsigned k) {
|
|
MmxPsllq(x + 0, k);
|
|
MmxPsllq(x + 8, k);
|
|
}
|
|
|
|
static void SsePsubsb(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsubsb(x + 0, y + 0);
|
|
MmxPsubsb(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePaddsb(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPaddsb(x + 0, y + 0);
|
|
MmxPaddsb(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePsubsw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsubsw(x + 0, y + 0);
|
|
MmxPsubsw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePaddsw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPaddsw(x + 0, y + 0);
|
|
MmxPaddsw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePaddusb(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPaddusb(x + 0, y + 0);
|
|
MmxPaddusb(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePsubusb(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsubusb(x + 0, y + 0);
|
|
MmxPsubusb(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePsubusw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsubusw(x + 0, y + 0);
|
|
MmxPsubusw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePminsw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPminsw(x + 0, y + 0);
|
|
MmxPminsw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmaxsw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmaxsw(x + 0, y + 0);
|
|
MmxPmaxsw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePsignb(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsignb(x + 0, y + 0);
|
|
MmxPsignb(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePsignw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsignw(x + 0, y + 0);
|
|
MmxPsignw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePsignd(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsignd(x + 0, y + 0);
|
|
MmxPsignd(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmulhrsw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmulhrsw(x + 0, y + 0);
|
|
MmxPmulhrsw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePabsw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPabsw(x + 0, y + 0);
|
|
MmxPabsw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePabsd(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPabsd(x + 0, y + 0);
|
|
MmxPabsd(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePcmpgtw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPcmpgtw(x + 0, y + 0);
|
|
MmxPcmpgtw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePcmpeqw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPcmpeqw(x + 0, y + 0);
|
|
MmxPcmpeqw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePcmpgtd(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPcmpgtd(x + 0, y + 0);
|
|
MmxPcmpgtd(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePcmpeqd(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPcmpeqd(x + 0, y + 0);
|
|
MmxPcmpeqd(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePsrawv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsrawv(x + 0, y);
|
|
MmxPsrawv(x + 8, y);
|
|
}
|
|
|
|
static void SsePsradv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsradv(x + 0, y);
|
|
MmxPsradv(x + 8, y);
|
|
}
|
|
|
|
static void SsePsrlwv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsrlwv(x + 0, y);
|
|
MmxPsrlwv(x + 8, y);
|
|
}
|
|
|
|
static void SsePsllwv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsllwv(x + 0, y);
|
|
MmxPsllwv(x + 8, y);
|
|
}
|
|
|
|
static void SsePsrldv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsrldv(x + 0, y);
|
|
MmxPsrldv(x + 8, y);
|
|
}
|
|
|
|
static void SsePslldv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPslldv(x + 0, y);
|
|
MmxPslldv(x + 8, y);
|
|
}
|
|
|
|
static void SsePsrlqv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsrlqv(x + 0, y);
|
|
MmxPsrlqv(x + 8, y);
|
|
}
|
|
|
|
static void SsePsllqv(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPsllqv(x + 0, y);
|
|
MmxPsllqv(x + 8, y);
|
|
}
|
|
|
|
static void SsePavgw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPavgw(x + 0, y + 0);
|
|
MmxPavgw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmaddwd(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmaddwd(x + 0, y + 0);
|
|
MmxPmaddwd(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmulhuw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmulhuw(x + 0, y + 0);
|
|
MmxPmulhuw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmulhw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmulhw(x + 0, y + 0);
|
|
MmxPmulhw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmullw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmullw(x + 0, y + 0);
|
|
MmxPmullw(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmulld(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmulld(x + 0, y + 0);
|
|
MmxPmulld(x + 8, y + 8);
|
|
}
|
|
|
|
static void SsePmaddubsw(uint8_t x[16], const uint8_t y[16]) {
|
|
MmxPmaddubsw(x + 0, y + 0);
|
|
MmxPmaddubsw(x + 8, y + 8);
|
|
}
|
|
|
|
static void OpPsb(struct Machine *m, uint32_t rde,
|
|
void MmxKernel(uint8_t[8], unsigned),
|
|
void SseKernel(uint8_t[16], unsigned)) {
|
|
if (Osz(rde)) {
|
|
SseKernel(XmmRexbRm(m, rde), m->xedd->op.uimm0);
|
|
} else {
|
|
MmxKernel(XmmRexbRm(m, rde), m->xedd->op.uimm0);
|
|
}
|
|
}
|
|
|
|
void Op171(struct Machine *m, uint32_t rde) {
|
|
switch (ModrmReg(rde)) {
|
|
case 2:
|
|
OpPsb(m, rde, MmxPsrlw, SsePsrlw);
|
|
break;
|
|
case 4:
|
|
OpPsb(m, rde, MmxPsraw, SsePsraw);
|
|
break;
|
|
case 6:
|
|
OpPsb(m, rde, MmxPsllw, SsePsllw);
|
|
break;
|
|
default:
|
|
OpUd(m, rde);
|
|
}
|
|
}
|
|
|
|
void Op172(struct Machine *m, uint32_t rde) {
|
|
switch (ModrmReg(rde)) {
|
|
case 2:
|
|
OpPsb(m, rde, MmxPsrld, SsePsrld);
|
|
break;
|
|
case 4:
|
|
OpPsb(m, rde, MmxPsrad, SsePsrad);
|
|
break;
|
|
case 6:
|
|
OpPsb(m, rde, MmxPslld, SsePslld);
|
|
break;
|
|
default:
|
|
OpUd(m, rde);
|
|
}
|
|
}
|
|
|
|
void Op173(struct Machine *m, uint32_t rde) {
|
|
switch (ModrmReg(rde)) {
|
|
case 2:
|
|
OpPsb(m, rde, MmxPsrlq, SsePsrlq);
|
|
break;
|
|
case 3:
|
|
OpPsb(m, rde, MmxPsrldq, SsePsrldq);
|
|
break;
|
|
case 6:
|
|
OpPsb(m, rde, MmxPsllq, SsePsllq);
|
|
break;
|
|
case 7:
|
|
OpPsb(m, rde, MmxPslldq, SsePslldq);
|
|
break;
|
|
default:
|
|
OpUd(m, rde);
|
|
}
|
|
}
|
|
|
|
void OpSsePalignr(struct Machine *m, uint32_t rde) {
|
|
if (Osz(rde)) {
|
|
SsePalignr(XmmRexrReg(m, rde), GetModrmRegisterXmmPointerRead16(m, rde),
|
|
m->xedd->op.uimm0);
|
|
} else {
|
|
MmxPalignr(XmmRexrReg(m, rde), GetModrmRegisterXmmPointerRead8(m, rde),
|
|
m->xedd->op.uimm0);
|
|
}
|
|
}
|
|
|
|
static void OpSse(struct Machine *m, uint32_t rde,
|
|
void MmxKernel(uint8_t[8], const uint8_t[8]),
|
|
void SseKernel(uint8_t[16], const uint8_t[16])) {
|
|
if (Osz(rde)) {
|
|
SseKernel(XmmRexrReg(m, rde), GetModrmRegisterXmmPointerRead16(m, rde));
|
|
} else {
|
|
MmxKernel(XmmRexrReg(m, rde), GetModrmRegisterXmmPointerRead8(m, rde));
|
|
}
|
|
}
|
|
|
|
/* clang-format off */
|
|
void OpSsePunpcklbw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpcklbw, SsePunpcklbw); }
|
|
void OpSsePunpcklwd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpcklwd, SsePunpcklwd); }
|
|
void OpSsePunpckldq(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpckldq, SsePunpckldq); }
|
|
void OpSsePacksswb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPacksswb, SsePacksswb); }
|
|
void OpSsePcmpgtb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPcmpgtb, SsePcmpgtb); }
|
|
void OpSsePcmpgtw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPcmpgtw, SsePcmpgtw); }
|
|
void OpSsePcmpgtd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPcmpgtd, SsePcmpgtd); }
|
|
void OpSsePackuswb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPackuswb, SsePackuswb); }
|
|
void OpSsePunpckhbw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpckhbw, SsePunpckhbw); }
|
|
void OpSsePunpckhwd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpckhwd, SsePunpckhwd); }
|
|
void OpSsePunpckhdq(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpckhdq, SsePunpckhdq); }
|
|
void OpSsePackssdw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPackssdw, SsePackssdw); }
|
|
void OpSsePunpcklqdq(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpcklqdq, SsePunpcklqdq); }
|
|
void OpSsePunpckhqdq(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPunpckhqdq, SsePunpckhqdq); }
|
|
void OpSsePcmpeqb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPcmpeqb, SsePcmpeqb); }
|
|
void OpSsePcmpeqw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPcmpeqw, SsePcmpeqw); }
|
|
void OpSsePcmpeqd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPcmpeqd, SsePcmpeqd); }
|
|
void OpSsePsrlwv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsrlwv, SsePsrlwv); }
|
|
void OpSsePsrldv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsrldv, SsePsrldv); }
|
|
void OpSsePsrlqv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsrlqv, SsePsrlqv); }
|
|
void OpSsePaddq(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddq, SsePaddq); }
|
|
void OpSsePmullw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmullw, SsePmullw); }
|
|
void OpSsePsubusb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubusb, SsePsubusb); }
|
|
void OpSsePsubusw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubusw, SsePsubusw); }
|
|
void OpSsePminub(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPminub, SsePminub); }
|
|
void OpSsePand(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPand, SsePand); }
|
|
void OpSsePaddusb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddusb, SsePaddusb); }
|
|
void OpSsePaddusw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddusw, SsePaddusw); }
|
|
void OpSsePmaxub(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmaxub, SsePmaxub); }
|
|
void OpSsePandn(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPandn, SsePandn); }
|
|
void OpSsePavgb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPavgb, SsePavgb); }
|
|
void OpSsePsrawv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsrawv, SsePsrawv); }
|
|
void OpSsePsradv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsradv, SsePsradv); }
|
|
void OpSsePavgw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPavgw, SsePavgw); }
|
|
void OpSsePmulhuw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmulhuw, SsePmulhuw); }
|
|
void OpSsePmulhw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmulhw, SsePmulhw); }
|
|
void OpSsePsubsb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubsb, SsePsubsb); }
|
|
void OpSsePsubsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubsw, SsePsubsw); }
|
|
void OpSsePminsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPminsw, SsePminsw); }
|
|
void OpSsePor(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPor, SsePor); }
|
|
void OpSsePaddsb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddsb, SsePaddsb); }
|
|
void OpSsePaddsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddsw, SsePaddsw); }
|
|
void OpSsePmaxsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmaxsw, SsePmaxsw); }
|
|
void OpSsePxor(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPxor, SsePxor); }
|
|
void OpSsePsllwv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsllwv, SsePsllwv); }
|
|
void OpSsePslldv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPslldv, SsePslldv); }
|
|
void OpSsePsllqv(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsllqv, SsePsllqv); }
|
|
void OpSsePmuludq(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmuludq, SsePmuludq); }
|
|
void OpSsePmaddwd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmaddwd, SsePmaddwd); }
|
|
void OpSsePsadbw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsadbw, SsePsadbw); }
|
|
void OpSsePsubb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubb, SsePsubb); }
|
|
void OpSsePsubw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubw, SsePsubw); }
|
|
void OpSsePsubd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubd, SsePsubd); }
|
|
void OpSsePsubq(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsubq, SsePsubq); }
|
|
void OpSsePaddb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddb, SsePaddb); }
|
|
void OpSsePaddw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddw, SsePaddw); }
|
|
void OpSsePaddd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPaddd, SsePaddd); }
|
|
void OpSsePshufb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPshufb, SsePshufb); }
|
|
void OpSsePhaddw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPhaddw, SsePhaddw); }
|
|
void OpSsePhaddd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPhaddd, SsePhaddd); }
|
|
void OpSsePhaddsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPhaddsw, SsePhaddsw); }
|
|
void OpSsePmaddubsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmaddubsw, SsePmaddubsw); }
|
|
void OpSsePhsubw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPhsubw, SsePhsubw); }
|
|
void OpSsePhsubd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPhsubd, SsePhsubd); }
|
|
void OpSsePhsubsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPhsubsw, SsePhsubsw); }
|
|
void OpSsePsignb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsignb, SsePsignb); }
|
|
void OpSsePsignw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsignw, SsePsignw); }
|
|
void OpSsePsignd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPsignd, SsePsignd); }
|
|
void OpSsePmulhrsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmulhrsw, SsePmulhrsw); }
|
|
void OpSsePabsb(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPabsb, SsePabsb); }
|
|
void OpSsePabsw(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPabsw, SsePabsw); }
|
|
void OpSsePabsd(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPabsd, SsePabsd); }
|
|
void OpSsePmulld(struct Machine *m, uint32_t rde) { OpSse(m, rde, MmxPmulld, SsePmulld); }
|
|
/* clang-format on */
|