mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 19:43:32 +00:00
f4f4caab0e
I wanted a tiny scriptable meltdown proof way to run userspace programs and visualize how program execution impacts memory. It helps to explain how things like Actually Portable Executable works. It can show you how the GCC generated code is going about manipulating matrices and more. I didn't feel fully comfortable with Qemu and Bochs because I'm not smart enough to understand them. I wanted something like gVisor but with much stronger levels of assurances. I wanted a single binary that'll run, on all major operating systems with an embedded GPL barrier ZIP filesystem that is tiny enough to transpile to JavaScript and run in browsers too. https://justine.storage.googleapis.com/emulator625.mp4
96 lines
5.7 KiB
C
96 lines
5.7 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ This program is free software; you can redistribute it and/or modify │
|
|
│ it under the terms of the GNU General Public License as published by │
|
|
│ the Free Software Foundation; version 2 of the License. │
|
|
│ │
|
|
│ This program is distributed in the hope that it will be useful, but │
|
|
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
|
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
|
│ General Public License for more details. │
|
|
│ │
|
|
│ You should have received a copy of the GNU General Public License │
|
|
│ along with this program; if not, write to the Free Software │
|
|
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
|
│ 02110-1301 USA │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/str/internal.h"
|
|
|
|
/**
|
|
* Hashes data with hardware acceleration at 10GBps.
|
|
* @note needs Nehalem+ c. 2008 or Bulldozer+ c. 2011
|
|
*/
|
|
uint32_t crc32c$sse42(uint32_t init, const void *data, size_t n) {
|
|
const unsigned char *p = (const unsigned char *)data;
|
|
const unsigned char *pe = (const unsigned char *)data + n;
|
|
uint32_t h = init ^ 0xffffffff;
|
|
if (n >= 16 + 8) {
|
|
while ((uintptr_t)p & 7) asm("crc32b\t%1,%0" : "+r"(h) : "rm"(*p++));
|
|
uint64_t hl = h;
|
|
while (p < pe - 16ul) {
|
|
asm("crc32q\t%1,%0" : "+r"(hl) : "rm"(*(const uint64_t *)p));
|
|
p += 8;
|
|
asm("crc32q\t%1,%0" : "+r"(hl) : "rm"(*(const uint64_t *)p));
|
|
p += 8;
|
|
}
|
|
h = (uint32_t)hl;
|
|
}
|
|
while (p < pe) asm("crc32b\t%1,%0" : "+r"(h) : "rm"(*p++));
|
|
return h ^ 0xffffffff;
|
|
}
|
|
|
|
/*
|
|
bench_crc32c$sse42 for #c per n where c ≈ 0.293ns
|
|
N x1 x8 x64 mBps
|
|
------------------------------------------------------------
|
|
1 877.000 43.375 40.359 81
|
|
1 45.000 39.625 40.484 80
|
|
2 34.500 27.562 20.461 159
|
|
3 23.000 16.708 14.245 228
|
|
4 18.250 13.094 11.449 284
|
|
7 10.429 8.339 8.185 397
|
|
8 42.125 8.734 6.850 475
|
|
15 9.400 5.375 4.884 665
|
|
16 7.312 5.070 4.882 666
|
|
31 5.258 2.923 2.680 1213
|
|
32 3.969 2.676 2.562 1269
|
|
63 3.095 1.581 1.428 2276
|
|
64 2.234 1.623 1.478 2199
|
|
127 1.205 0.901 0.900 3610
|
|
128 1.164 0.960 0.915 3552
|
|
255 0.922 0.651 0.618 5260
|
|
256 0.715 0.650 0.609 5341
|
|
511 0.558 0.482 0.477 6819
|
|
512 0.529 0.475 0.469 6932
|
|
1023 0.425 0.400 0.396 8204
|
|
1024 0.417 0.392 0.388 8383
|
|
2047 0.367 0.355 0.353 9199
|
|
2048 0.374 0.366 0.364 8929
|
|
4095 0.351 0.338 0.337 9644
|
|
4096 0.353 0.338 0.338 9624
|
|
8191 0.335 0.338 0.337 9641
|
|
8192 0.335 0.329 0.329 9870
|
|
16383 0.336 0.325 0.325 10011
|
|
16384 0.336 0.326 0.375 8666
|
|
32767 0.329 0.323 0.323 10070
|
|
32768 0.327 0.324 0.323 10062
|
|
65535 0.322 0.322 0.322 10103
|
|
65536 0.321 0.322 0.322 10102
|
|
131071 0.322 0.321 0.321 10125
|
|
131072 0.321 0.321 0.321 10124
|
|
262143 0.322 0.321 0.335 9699
|
|
262144 0.321 0.321 0.321 10134
|
|
524287 0.321 0.321 0.499 6516
|
|
524288 0.321 0.321 0.339 9575
|
|
1048575 0.322 0.321 0.322 10095
|
|
1048576 0.320 1.001 0.323 10048
|
|
2097151 0.325 0.321 0.322 10086
|
|
2097152 0.330 0.320 0.323 10076
|
|
4194303 0.331 0.322 0.321 10128
|
|
4194304 0.332 0.321 0.325 10004
|
|
8388607 0.334 0.332 0.331 9829
|
|
8388608 0.334 0.329 0.327 9934
|
|
*/
|