mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-28 13:30:29 +00:00
Initial import
This commit is contained in:
commit
c91b3c5006
14915 changed files with 590219 additions and 0 deletions
186
libc/nexgen32e/strsak16.S
Normal file
186
libc/nexgen32e/strsak16.S
Normal file
|
@ -0,0 +1,186 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Returns pointer to first instance of character.
|
||||
/
|
||||
/ @param rdi is a non-null NUL-terminated char16_t string pointer
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to character, or to NUL word if not found
|
||||
/ @note this won't return NULL if search character is NUL
|
||||
strchrnul16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
or $-1,%r9
|
||||
jmp 0f
|
||||
.endfn strchrnul16,globl
|
||||
|
||||
/ Returns pointer to first instance of character.
|
||||
/
|
||||
/ @param rdi is a non-null NUL-terminated char16_t string pointer
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to first result, or NULL if not found
|
||||
/ @note this won't return NULL if search character is NUL
|
||||
/ @asyncsignalsafe
|
||||
strchr16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
xor %r9,%r9
|
||||
0: mov %esi,%edx
|
||||
xor %r11d,%r11d
|
||||
or $-1,%rsi
|
||||
xor %r8,%r8
|
||||
jmp strsak16
|
||||
.endfn strchr16,globl
|
||||
|
||||
/ Returns pointer to first instance of character in range.
|
||||
/
|
||||
/ @param rdi is a non-null pointer to memory
|
||||
/ @param esi is the search word
|
||||
/ @return rax points to word if found, or else undefined behavior
|
||||
rawmemchr16:
|
||||
or $-1,%rdx
|
||||
/ fallthrough
|
||||
.endfn rawmemchr16,globl
|
||||
|
||||
/ Returns pointer to first instance of character in range.
|
||||
/
|
||||
/ @param rdi is a non-null pointer to memory
|
||||
/ @param esi is the search word
|
||||
/ @param rdx is length of memory in shorts
|
||||
/ @return rax points to word if found or NULL
|
||||
/ @asyncsignalsafe
|
||||
memchr16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
xchg %rsi,%rdx
|
||||
mov %edx,%r11d
|
||||
xor %r8,%r8
|
||||
xor %r10,%r10
|
||||
jmp strsak16
|
||||
.endfn memchr16,globl
|
||||
|
||||
/ Returns length of char16_t string w/ security blankets.
|
||||
/
|
||||
/ This is like strnlen() except it'll return 0 if (1) RDI is NULL
|
||||
/ or (2) a NUL-terminator wasn't found in the first RSI shorts.
|
||||
/
|
||||
/ @param rdi is a nullable NUL-terminated char16_t string pointer
|
||||
/ @param rsi is the maximum number of shorts to consider
|
||||
/ @return rax is the number of shorts, excluding the NUL
|
||||
strnlen16_s:
|
||||
.leafprologue
|
||||
.profilable
|
||||
xor %eax,%eax
|
||||
xor %r10d,%r10d
|
||||
test %rdi,%rdi
|
||||
jnz 0f
|
||||
.leafepilogue
|
||||
.endfn strnlen16_s,globl
|
||||
|
||||
/ Returns length of NUL-terminated char16_t string.
|
||||
/
|
||||
/ @param rdi is non-null NUL-terminated char16_t string pointer
|
||||
/ @return rax is the number of shorts, excluding the NUL
|
||||
/ @asyncsignalsafe
|
||||
strlen16:
|
||||
or $-1,%rsi
|
||||
/ fallthrough
|
||||
.endfn strlen16,globl
|
||||
|
||||
/ Returns length of NUL-terminated memory, with limit.
|
||||
/
|
||||
/ @param rdi is non-null memory
|
||||
/ @param rsi is the maximum number of shorts to consider
|
||||
/ @return rax is the number of shorts, excluding the NUL
|
||||
/ @asyncsignalsafe
|
||||
strnlen16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
or $-1,%r10
|
||||
0: xor %edx,%edx
|
||||
xor %r11d,%r11d
|
||||
mov %rdi,%r8
|
||||
/ fallthrough
|
||||
.endfn strnlen16,globl
|
||||
|
||||
/ Swiss Army Knife of string char16_t scanning.
|
||||
/ Sixteen fast functions in one.
|
||||
/
|
||||
/ @param rdi is non-null string memory
|
||||
/ @param rsi is max number of shorts to consider
|
||||
/ @param dx is search character #1
|
||||
/ @param r11w is search character #2
|
||||
/ @param r8 is subtracted from result (for length vs. pointer)
|
||||
/ @param r9 masks result if DH is found (for NUL vs. NULL)
|
||||
/ @param r10 masks result on shorts exhausted (for length v. NULL)
|
||||
/ @return rax end pointer after r8/r9/r10 modifications
|
||||
strsak16:
|
||||
lea -2(%rdi),%rax
|
||||
1: add $2,%rax
|
||||
sub $1,%rsi
|
||||
jb .Lend
|
||||
test $31,%al
|
||||
jz .Lfast
|
||||
.Lword: mov (%rax),%cx
|
||||
cmp %cx,%dx
|
||||
je .Ldone
|
||||
cmp %cx,%r11w
|
||||
je .Lnul
|
||||
jmp 1b
|
||||
.Ldone: sub %r8,%rax
|
||||
jmp .Lret
|
||||
.Lend: mov %r10,%r9
|
||||
.Lnul: sub %r8,%rax
|
||||
and %r9,%rax
|
||||
.Lret: test %r8,%r8
|
||||
jz 0f
|
||||
shr %rax
|
||||
0: .leafepilogue
|
||||
.Lslow: add $32,%rsi
|
||||
jmp .Lword
|
||||
.Lfast:
|
||||
#if !X86_NEED(AVX2)
|
||||
testb X86_HAVE(AVX2)+kCpuids(%rip)
|
||||
jz .Lword
|
||||
#endif
|
||||
movzwl %dx,%ecx
|
||||
movd %ecx,%xmm0
|
||||
movzwl %r11w,%ecx
|
||||
movd %ecx,%xmm1
|
||||
vpbroadcastw %xmm0,%ymm0
|
||||
vpbroadcastw %xmm1,%ymm1
|
||||
sub $32,%rax
|
||||
1: add $32,%rax
|
||||
sub $16,%rsi
|
||||
jb .Lslow
|
||||
vmovdqa (%rax),%ymm2
|
||||
vpcmpeqw %ymm0,%ymm2,%ymm3
|
||||
vpcmpeqw %ymm1,%ymm2,%ymm2
|
||||
vpor %ymm3,%ymm2,%ymm2
|
||||
vpmovmskb %ymm2,%ecx
|
||||
bsf %ecx,%ecx
|
||||
je 1b
|
||||
vzeroupper
|
||||
add %rcx,%rax
|
||||
jmp .Lword
|
||||
.endfn strsak16
|
||||
.yoink __FILE__
|
Loading…
Add table
Add a link
Reference in a new issue