Add minor improvements and cleanup

This commit is contained in:
Justine Tunney 2020-10-27 03:39:46 -07:00
parent 9e3e985ae5
commit feed0d2b0e
163 changed files with 2286 additions and 2245 deletions

View file

@ -25,11 +25,10 @@
* @param s is a NUL-terminated string
* @param suffix is also NUL-terminated
*/
bool(endswith)(const char *s, const char *suffix) {
size_t l1, l2;
if (s == suffix) return true;
l1 = strlen(s);
l2 = strnlen(suffix, l1);
if (l2 > l1) return false;
return memcmp(s + (l1 - l2) * sizeof(char), suffix, l2 * sizeof(char)) == 0;
bool endswith(const char *s, const char *suffix) {
size_t n, m;
n = strlen(s);
m = strlen(suffix);
if (m > n) return false;
return memcmp(s + n - m, suffix, m) == 0;
}

118
libc/str/escapedos.c Normal file
View file

@ -0,0 +1,118 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/str/str.h"
static textwindows bool shouldescapedos(const char16_t c) {
if (c == u'"') return true;
if (c == u'&') return true;
if (c == u'%') return true;
if (c == u'^') return true;
if (c == u'<') return true;
if (c == u'>') return true;
if (c == u'|') return true;
return false;
}
static textwindows bool shouldquotedos(const char16_t c) {
if (c == u' ') return true;
if (c == u'\t') return true;
if (c == u'\n') return true;
if (c == u'\v') return true;
if (c == u'"') return true;
if (c == u'*') return true;
return shouldescapedos(c);
}
/**
* Escapes command so DOS can run it.
* @see Iain Patterson's NSSM for original code in public domain
*/
textwindows bool escapedos(char16_t *buffer, unsigned buflen,
const char16_t *unquoted, unsigned len) {
unsigned i, j, n;
if (len > buflen - 1) return false;
bool escape = false;
bool quotes = false;
for (i = 0; i < len; i++) {
if (shouldescapedos(unquoted[i])) {
escape = quotes = true;
break;
}
if (shouldquotedos(unquoted[i])) quotes = true;
}
if (!quotes) {
memmove(buffer, unquoted, (len + 1) * sizeof(char16_t));
return true;
}
/* "" */
unsigned quoted_len = 2;
if (escape) quoted_len += 2;
for (i = 0;; i++) {
n = 0;
while (i != len && unquoted[i] == u'\\') {
i++;
n++;
}
if (i == len) {
quoted_len += n * 2;
break;
} else if (unquoted[i] == u'"')
quoted_len += n * 2 + 2;
else
quoted_len += n + 1;
if (shouldescapedos(unquoted[i])) quoted_len += n;
}
if (quoted_len > buflen - 1) return false;
char16_t *s = buffer;
if (escape) *s++ = u'^';
*s++ = u'"';
for (i = 0;; i++) {
n = 0;
while (i != len && unquoted[i] == u'\\') {
i++;
n++;
}
if (i == len) {
for (j = 0; j < n * 2; j++) {
if (escape) *s++ = u'^';
*s++ = u'\\';
}
break;
} else if (unquoted[i] == u'"') {
for (j = 0; j < n * 2 + 1; j++) {
if (escape) *s++ = u'^';
*s++ = u'\\';
}
if (escape && shouldescapedos(unquoted[i])) *s++ = u'^';
*s++ = unquoted[i];
} else {
for (j = 0; j < n; j++) {
if (escape) *s++ = u'^';
*s++ = u'\\';
}
if (escape && shouldescapedos(unquoted[i])) *s++ = u'^';
*s++ = unquoted[i];
}
}
if (escape) *s++ = u'^';
*s++ = u'"';
*s++ = u'\0';
return true;
}

View file

@ -20,11 +20,11 @@
#include "libc/str/str.h"
int getkvlin(const char *name, const char *const unsorted[]) {
unsigned i, n;
if (unsorted) {
unsigned namelen = strlen(name);
for (int i = 0; unsorted[i]; ++i) {
if (strncmp(unsorted[i], name, namelen) == 0 &&
unsorted[i][namelen] == '=') {
n = strlen(name);
for (i = 0; unsorted[i]; ++i) {
if (strncmp(unsorted[i], name, n) == 0 && unsorted[i][n] == '=') {
return i;
}
}

View file

@ -19,11 +19,14 @@
*/
#include "libc/str/str.h"
const char *indexdoublenulstring(const char *p, unsigned i) {
const char *IndexDoubleNulString(const char *s, unsigned i) {
size_t n;
while (i--) {
const char *p2 = rawmemchr(p, '\0');
if (p2 == p) return NULL;
p = p2 + 1;
if ((n = strlen(s))) {
s += n + 1;
} else {
return NULL;
}
}
return p;
return s;
}

View file

@ -26,27 +26,12 @@
hidden extern const uint32_t kSha256Tab[64];
extern const struct TpEncode {
uint8_t mark;
uint8_t len;
} kTpDecoderRing[32];
forceinline struct TpEncode UseTpDecoderRing(wint_t c) {
unsigned msb;
if (c) {
asm("bsr\t%1,%0" : "=r"(msb) : "rm"(c) : "cc");
} else {
msb = 0;
}
return kTpDecoderRing[msb];
}
nodebuginfo forceinline bool32 ismoar(wint_t c) {
return (c & 0b11000000) == 0b11000000;
return (c & 0300) == 0300;
}
nodebuginfo forceinline bool32 iscont(wint_t c) {
return (c & 0b11000000) == 0b10000000;
return (c & 0300) == 0200;
}
char *strstr$sse42(const char *, const char *) strlenesque hidden;

View file

@ -17,6 +17,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/str/str.h"
int isascii(int c) {
return 0x00 <= c && c <= 0x7F;

View file

@ -1,64 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
/**
* Checks if memory address contains non-plain text.
*
* @param data points to memory that's interpreted as char
* @param size is usually strlen(data) and provided by caller
* @return NULL if plain text, or pointer to first non-text datum
* @type char may be 6/7/8/16/32/64-bit signed/unsigned single/multi
* @author Justine Alexandra Roberts Tunney <jtunney@gmail.com>
* @see ASA X3.4, ISO/IEC 646, ITU T.50, ANSI X3.64-1979
* @perf 27gBps on i7-6700 w/ -O3 -mavx2
* @cost 143 bytes of code w/ -Os
*/
void *isnotplaintext(const void *data, size_t size) {
/*
* ASCII, EBCDIC, UNICODE, ISO IR-67, etc. all agree upon the
* encoding of the NUL, SOH, STX, and ETX characters due to a
* longstanding human tradition of using them for the purpose
* of delimiting text from non-text, b/c fixed width integers
* makes their presence in binary formats nearly unavoidable.
*/
#define isnotplain(C) (0 <= (C) && (C) < 4)
char no;
unsigned i;
const char *p, *pe;
if (CHAR_BIT > 6) {
p = (const char *)data;
pe = (const char *)(p + size);
for (; ((intptr_t)p & 31) && p < pe; ++p) {
if (isnotplain(*p)) return p;
}
for (; p + 64 < pe; p += 64) {
no = 0;
for (i = 0; i < 64; ++i) {
no |= isnotplain(p[i]);
}
if (no & 1) break;
}
for (; p < pe; ++p) {
if (isnotplain(*p)) return p;
}
}
return 0;
#undef isnotplain
}

View file

@ -21,11 +21,11 @@
/**
* Returns true if s has prefix.
*
* @param s is a NUL-terminated string
* @param prefix is also NUL-terminated
*/
bool(startswith)(const char *s, const char *prefix) {
if (s == prefix) return true;
bool startswith(const char *s, const char *prefix) {
for (;;) {
if (!*prefix) return true;
if (!*s) return false;

View file

@ -17,20 +17,45 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/pcmpeqb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/str/str.h"
/**
* Copies string and advances destination pointer.
* Copies bytes from 𝑠 to 𝑑 until a NUL is encountered.
*
* @param 𝑑 is destination memory
* @param 𝑠 is a NUL-terminated string
* @note 𝑑 and 𝑠 can't overlap
* @return pointer to nul byte
* @see strcpy(), memccpy()
* @asyncsignalsafe
*/
char *stpcpy(char *dst, const char *src) {
char c;
for (;;) {
c = *src;
*dst = c;
if (!c) break;
++src;
++dst;
char *stpcpy(char *d, const char *s) {
size_t i;
uint8_t v1[16], v2[16], vz[16];
i = 0;
while (((uintptr_t)(s + i) & 15)) {
if (!(d[i] = s[i])) {
return d + i;
}
++i;
}
for (;;) {
memset(vz, 0, 16);
memcpy(v1, s + i, 16);
pcmpeqb(v2, v1, vz);
if (!pmovmskb(v2)) {
memcpy(d + i, v1, 16);
i += 16;
} else {
break;
}
}
for (;;) {
if (!(d[i] = s[i])) {
return d + i;
}
++i;
}
return dst;
}

View file

@ -30,8 +30,7 @@ int tolower(int);
int ispunct(int);
int toupper(int);
int hextoint(int);
void *isnotplaintext(const void *, size_t) nothrow nocallback nosideeffect;
int cescapec(int);
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § characters » thompson-pike encoding
@ -173,7 +172,7 @@ bool wcsstartswith(const wchar_t *, const wchar_t *) strlenesque;
bool endswith(const char *, const char *) strlenesque;
bool endswith16(const char16_t *, const char16_t *) strlenesque;
bool wcsendswith(const wchar_t *, const wchar_t *) strlenesque;
const char *indexdoublenulstring(const char *, unsigned) strlenesque;
const char *IndexDoubleNulString(const char *, unsigned) strlenesque;
int getkvlin(const char *, const char *const[]);
wchar_t *wmemset(wchar_t *, wchar_t, size_t) memcpyesque;
char16_t *memset16(char16_t *, char16_t, size_t) memcpyesque;
@ -194,6 +193,8 @@ char *chomp(char *);
char16_t *chomp16(char16_t *);
wchar_t *wchomp(wchar_t *);
bool escapedos(char16_t *, unsigned, const char16_t *, unsigned);
/*───────────────────────────────────────────────────────────────────────────│─╗
cosmopolitan § strings » multibyte
*/
@ -341,27 +342,6 @@ extern int (*const hook$wcsncmp)(const wchar_t *, const wchar_t *, size_t);
: strncasecmp16, default \
: strncasecmp)(s1, s2, n)
#define startswith(s, c) \
_Generic(*(s), wchar_t \
: wcsstartswith, char16_t \
: startswith16, default \
: startswith)(s, c)
#define endswith(s, c) \
_Generic(*(s), wchar_t \
: wcsendswith, char16_t \
: endswith16, default \
: endswith)(s, c)
#define strclen(s) \
_Generic(*(s), wchar_t : wcslen, char16_t : strclen16, default : strclen)(s)
#define strnclen(s, n) \
_Generic(*(s), wchar_t \
: wcslen, char16_t \
: strnclen16, default \
: strnclen)(s, n)
#define chomp(s) \
_Generic(*(s), wchar_t : wchomp, char16_t : chomp16, default : chomp)(s)

View file

@ -28,6 +28,7 @@ LIBC_STR_A_CHECKS = \
$(LIBC_STR_A_HDRS:%=o/$(MODE)/%.ok)
LIBC_STR_A_DIRECTDEPS = \
LIBC_INTRIN \
LIBC_STUBS \
LIBC_NEXGEN32E

View file

@ -17,23 +17,23 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/str/internal.h"
#include "libc/str/str.h"
/**
* Returns number of characters in UTF-8 string.
*/
size_t(strclen)(const char *s) { return strnclen(s, -1ull); }
size_t strclen(const char *s) {
return strnclen(s, -1);
}
noinline size_t(strnclen)(const char *s, size_t n) {
const unsigned char *p = (const unsigned char *)s;
size_t l = 0;
noinline size_t strnclen(const char *s, size_t n) {
size_t r = 0;
if (n) {
while (*p && n && iscont(*p)) ++p, --n;
while (*p) {
if (!iscont(*p++)) l++;
while (n && *s && (*s & 0300) == 0200) ++s, --n;
while (*s) {
if ((*s++ & 0300) != 0200) r++;
if (!--n) break;
}
}
return l;
return r;
}

View file

@ -17,7 +17,10 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/intrin/pcmpeqb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/limits.h"
#include "libc/nexgen32e/bsf.h"
#include "libc/str/str.h"
/**
@ -31,6 +34,30 @@
* @asyncsignalsafe
*/
char *strcpy(char *d, const char *s) {
memccpy(d, s, '\0', SIZE_MAX);
return d;
size_t i;
uint8_t v1[16], v2[16], vz[16];
i = 0;
while (((uintptr_t)(s + i) & 15)) {
if (!(d[i] = s[i])) {
return d;
}
++i;
}
for (;;) {
memset(vz, 0, 16);
memcpy(v1, s + i, 16);
pcmpeqb(v2, v1, vz);
if (!pmovmskb(v2)) {
memcpy(d + i, v1, 16);
i += 16;
} else {
break;
}
}
for (;;) {
if (!(d[i] = s[i])) {
return d;
}
++i;
}
}

View file

@ -1,5 +1,5 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
@ -17,40 +17,34 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
#include "libc/macros.h"
.source __FILE__
#include "libc/assert.h"
#include "libc/intrin/pcmpeqb.h"
#include "libc/intrin/pmovmskb.h"
#include "libc/nexgen32e/bsf.h"
#include "libc/str/str.h"
/ Thompson-Pike Decoder Ring.
/
/ The IA-32 BSR instruction can be used to turn a 32-bit
/ number into an index for this table.
/
/ @see libc/str/internal.h
.rodata
.align 2
kTpDecoderRing:
.rept 7 # MSB6 (0x7F)
.byte 0b00000000,1 # mark,len
/ 0b11000000 # mask
.endr
.rept 4 # MSB10 (0x7FF)
.byte 0b11000000,2 # mark,len
/ 0b11100000 # mask
.endr
.rept 5 # MSB15 (0xFFFF)
.byte 0b11100000,3 # mark,len
/ 0b11110000 # mask
.endr
.rept 5 # MSB20 (0x1FFFFF)
.byte 0b11110000,4 # mark,len
/ 0b11111000 # mask
.endr
.rept 5 # MSB25 (0x3FFFFFF)
.byte 0b11111000,5 # mark,len
/ 0b11111100 # mask
.endr
.rept 6 # MSB31 (0xffffffff)
.byte 0b11111100,6 # mark,len
.endr
.endobj kTpDecoderRing,globl,hidden
.previous
/**
* Returns length of NUL-terminated string.
*
* @param s is non-null NUL-terminated string pointer
* @return number of bytes (excluding NUL)
* @asyncsignalsafe
*/
size_t strlen(const char *s) {
const char *p;
unsigned k, m;
uint8_t v1[16], vz[16];
k = (uintptr_t)s & 15;
p = (const char *)((uintptr_t)s & -16);
memset(vz, 0, 16);
memcpy(v1, p, 16);
pcmpeqb(v1, v1, vz);
m = pmovmskb(v1) >> k << k;
while (!m) {
p += 16;
memcpy(v1, p, 16);
pcmpeqb(v1, v1, vz);
m = pmovmskb(v1);
}
return p + bsf(m) - s;
}

View file

@ -2,10 +2,10 @@
#define COSMOPOLITAN_LIBC_STR_THOMPIKE_H_
#include "libc/nexgen32e/bsr.h"
#define ThomPikeCont(x) (((x)&0b11000000) == 0b10000000)
#define ThomPikeCont(x) (((x)&0300) == 0200)
#define ThomPikeByte(x) ((x) & (((1 << ThomPikeMsb(x)) - 1) | 3))
#define ThomPikeLen(x) (7 - ThomPikeMsb(x))
#define ThomPikeMsb(x) (((x)&0xff) < 252 ? bsr(~(x)&0xff) : 1)
#define ThomPikeMerge(x, y) ((x) << 6 | (y)&0b00111111)
#define ThomPikeMerge(x, y) ((x) << 6 | (y)&077)
#endif /* COSMOPOLITAN_LIBC_STR_THOMPIKE_H_ */