mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 19:43:32 +00:00
f4f4caab0e
I wanted a tiny scriptable meltdown proof way to run userspace programs and visualize how program execution impacts memory. It helps to explain how things like Actually Portable Executable works. It can show you how the GCC generated code is going about manipulating matrices and more. I didn't feel fully comfortable with Qemu and Bochs because I'm not smart enough to understand them. I wanted something like gVisor but with much stronger levels of assurances. I wanted a single binary that'll run, on all major operating systems with an embedded GPL barrier ZIP filesystem that is tiny enough to transpile to JavaScript and run in browsers too. https://justine.storage.googleapis.com/emulator625.mp4
101 lines
4.5 KiB
ArmAsm
101 lines
4.5 KiB
ArmAsm
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||
│ │
|
||
│ This program is free software; you can redistribute it and/or modify │
|
||
│ it under the terms of the GNU General Public License as published by │
|
||
│ the Free Software Foundation; version 2 of the License. │
|
||
│ │
|
||
│ This program is distributed in the hope that it will be useful, but │
|
||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||
│ General Public License for more details. │
|
||
│ │
|
||
│ You should have received a copy of the GNU General Public License │
|
||
│ along with this program; if not, write to the Free Software │
|
||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||
│ 02110-1301 USA │
|
||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||
#include "libc/nexgen32e/macros.h"
|
||
#include "libc/nexgen32e/x86feature.h"
|
||
#include "libc/macros.h"
|
||
|
||
/ Mutates string to uppercase roman characters.
|
||
/
|
||
/ @param RDI points to non-const NUL-terminated string
|
||
/ @return RAX will be original RDI
|
||
/ @note 10x faster than C
|
||
strtoupper:
|
||
mov $'A-'a,%edx # adding this uppers
|
||
mov $'a|'z<<8,%ecx # uint8_t range a..z
|
||
jmp strcaseconv
|
||
.endfn strtoupper,globl
|
||
|
||
/ Mutates string to lowercase roman characters.
|
||
/
|
||
/ @param RDI points to non-const NUL-terminated string
|
||
/ @return RAX will be original RDI
|
||
/ @note 10x faster than C
|
||
strtolower:
|
||
mov $'a-'A,%edx # adding this lowers
|
||
mov $'A|'Z<<8,%ecx # uint8_t range A..Z
|
||
/ 𝑠𝑙𝑖𝑑𝑒
|
||
.endfn strtolower,globl
|
||
|
||
/ Support code for strtolower() and strtoupper().
|
||
/
|
||
/ @param RDI points to non-const NUL-terminated string
|
||
/ @param CL defines start of character range to mutate
|
||
/ @param CH defines end of character range to mutate
|
||
/ @param DL is added to each DIL ∈ [CL,CH]
|
||
/ @return RAX will be original RDI
|
||
strcaseconv:
|
||
.leafprologue
|
||
.profilable
|
||
mov %rdi,%rsi
|
||
0: testb $15,%sil # is it aligned?
|
||
#if X86_NEED(SSE4_2)
|
||
jz .Lsse4
|
||
#else
|
||
jnz 1f
|
||
testb X86_HAVE(SSE4_2)+kCpuids(%rip)
|
||
jnz .Lsse4 # is it nehalem?
|
||
#endif
|
||
1: lodsb # AL = *RSI++
|
||
test %al,%al # is it NUL?
|
||
jz 3f
|
||
cmp %cl,%al # is it in range?
|
||
jb 0b
|
||
cmp %ch,%al
|
||
ja 0b
|
||
add %dl,-1(%rsi)
|
||
jmp 0b
|
||
.Lsse4: movd %ecx,%xmm1 # XMM1 = ['A,'Z,0,0,...]
|
||
movd %edx,%xmm2 # XMM2 = ['a-'A,'a-'A,...]
|
||
pbroadcastb %xmm2
|
||
xor %ecx,%ecx
|
||
2: movdqa (%rsi,%rcx),%xmm3
|
||
/ ┌─0:index of the LEAST significant, set, bit is used
|
||
/ │ regardless of corresponding input element validity
|
||
/ │ intres2 is returned in least significant bits of xmm0
|
||
/ ├─1:index of the MOST significant, set, bit is used
|
||
/ │ regardless of corresponding input element validity
|
||
/ │ each bit of intres2 is expanded to byte/word
|
||
/ │┌─0:negation of intres1 is for all 16 (8) bits
|
||
/ │├─1:negation of intres1 is masked by reg/mem validity
|
||
/ ││┌─intres1 is negated (1’s complement)
|
||
/ │││┌─mode{equalany,ranges,equaleach,equalordered}
|
||
/ ││││ ┌─issigned
|
||
/ ││││ │┌─is16bit
|
||
/ u│││├┐││
|
||
pcmpistrm $0b01000100,%xmm3,%xmm1 # →XMM0 8-bit byte mask
|
||
pand %xmm2,%xmm0 # won't mask after NUL
|
||
paddb %xmm0,%xmm3
|
||
movdqa %xmm3,(%rsi,%rcx)
|
||
lea 16(%rcx),%rcx
|
||
jnz 2b # PCMPISTRM found NUL
|
||
3: mov %rdi,%rax
|
||
.leafepilogue
|
||
.endfn strcaseconv
|
||
.source __FILE__
|