mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-01 03:53:33 +00:00
9b29358511
Status lines for Emacs and Vim have been added to Python sources so they'll be easier to edit using Python's preferred coding style. Some DNS helper functions have been broken up into multiple files. It's nice to have one function per file whenever possible, since that way we don't need -ffunction-sections. Another reason it's good to have small source files, is because the build will be enforcing resource limits on compilation and testing soon.
258 lines
8.4 KiB
C++
258 lines
8.4 KiB
C++
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Python 3 │
|
|
│ https://docs.python.org/3/license.html │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
/* clang-format off */
|
|
|
|
/* stringlib: fastsearch implementation */
|
|
|
|
#define STRINGLIB_FASTSEARCH_H
|
|
|
|
/* fast search/count implementation, based on a mix between boyer-
|
|
moore and horspool, with a few more bells and whistles on the top.
|
|
for some more background, see: http://effbot.org/zone/stringlib.htm */
|
|
|
|
/* note: fastsearch may access s[n], which isn't a problem when using
|
|
Python's ordinary string types, but may cause problems if you're
|
|
using this code in other contexts. also, the count mode returns -1
|
|
if there cannot possible be a match in the target string, and 0 if
|
|
it has actually checked for matches, but didn't find any. callers
|
|
beware! */
|
|
|
|
#define FAST_COUNT 0
|
|
#define FAST_SEARCH 1
|
|
#define FAST_RSEARCH 2
|
|
|
|
#if LONG_BIT >= 128
|
|
#define STRINGLIB_BLOOM_WIDTH 128
|
|
#elif LONG_BIT >= 64
|
|
#define STRINGLIB_BLOOM_WIDTH 64
|
|
#elif LONG_BIT >= 32
|
|
#define STRINGLIB_BLOOM_WIDTH 32
|
|
#else
|
|
#error "LONG_BIT is smaller than 32"
|
|
#endif
|
|
|
|
#define STRINGLIB_BLOOM_ADD(mask, ch) \
|
|
((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
|
#define STRINGLIB_BLOOM(mask, ch) \
|
|
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
|
|
|
|
Py_LOCAL_INLINE(Py_ssize_t)
|
|
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
|
|
{
|
|
const STRINGLIB_CHAR *p, *e;
|
|
|
|
p = s;
|
|
e = s + n;
|
|
if (n > 10) {
|
|
#if STRINGLIB_SIZEOF_CHAR == 1
|
|
p = memchr(s, ch, n);
|
|
if (p != NULL)
|
|
return (p - s);
|
|
return -1;
|
|
#else
|
|
/* use memchr if we can choose a needle without two many likely
|
|
false positives */
|
|
unsigned char needle = ch & 0xff;
|
|
/* If looking for a multiple of 256, we'd have too
|
|
many false positives looking for the '\0' byte in UCS2
|
|
and UCS4 representations. */
|
|
if (needle != 0) {
|
|
while (p < e) {
|
|
void *candidate = memchr(p, needle,
|
|
(e - p) * sizeof(STRINGLIB_CHAR));
|
|
if (candidate == NULL)
|
|
return -1;
|
|
p = (const STRINGLIB_CHAR *)
|
|
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
|
if (*p == ch)
|
|
return (p - s);
|
|
/* False positive */
|
|
p++;
|
|
}
|
|
return -1;
|
|
}
|
|
#endif
|
|
}
|
|
while (p < e) {
|
|
if (*p == ch)
|
|
return (p - s);
|
|
p++;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(Py_ssize_t)
|
|
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
|
|
{
|
|
const STRINGLIB_CHAR *p;
|
|
#ifdef HAVE_MEMRCHR
|
|
/* memrchr() is a GNU extension, available since glibc 2.1.91.
|
|
it doesn't seem as optimized as memchr(), but is still quite
|
|
faster than our hand-written loop below */
|
|
|
|
if (n > 10) {
|
|
#if STRINGLIB_SIZEOF_CHAR == 1
|
|
p = memrchr(s, ch, n);
|
|
if (p != NULL)
|
|
return (p - s);
|
|
return -1;
|
|
#else
|
|
/* use memrchr if we can choose a needle without two many likely
|
|
false positives */
|
|
unsigned char needle = ch & 0xff;
|
|
/* If looking for a multiple of 256, we'd have too
|
|
many false positives looking for the '\0' byte in UCS2
|
|
and UCS4 representations. */
|
|
if (needle != 0) {
|
|
while (n > 0) {
|
|
void *candidate = memrchr(s, needle,
|
|
n * sizeof(STRINGLIB_CHAR));
|
|
if (candidate == NULL)
|
|
return -1;
|
|
p = (const STRINGLIB_CHAR *)
|
|
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
|
|
n = p - s;
|
|
if (*p == ch)
|
|
return n;
|
|
/* False positive */
|
|
}
|
|
return -1;
|
|
}
|
|
#endif
|
|
}
|
|
#endif /* HAVE_MEMRCHR */
|
|
p = s + n;
|
|
while (p > s) {
|
|
p--;
|
|
if (*p == ch)
|
|
return (p - s);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
Py_LOCAL_INLINE(Py_ssize_t)
|
|
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
|
|
const STRINGLIB_CHAR* p, Py_ssize_t m,
|
|
Py_ssize_t maxcount, int mode)
|
|
{
|
|
unsigned long mask;
|
|
Py_ssize_t skip, count = 0;
|
|
Py_ssize_t i, j, mlast, w;
|
|
|
|
w = n - m;
|
|
|
|
if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
|
|
return -1;
|
|
|
|
/* look for special cases */
|
|
if (m <= 1) {
|
|
if (m <= 0)
|
|
return -1;
|
|
/* use special case for 1-character strings */
|
|
if (mode == FAST_SEARCH)
|
|
return STRINGLIB(find_char)(s, n, p[0]);
|
|
else if (mode == FAST_RSEARCH)
|
|
return STRINGLIB(rfind_char)(s, n, p[0]);
|
|
else { /* FAST_COUNT */
|
|
for (i = 0; i < n; i++)
|
|
if (s[i] == p[0]) {
|
|
count++;
|
|
if (count == maxcount)
|
|
return maxcount;
|
|
}
|
|
return count;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
mlast = m - 1;
|
|
skip = mlast - 1;
|
|
mask = 0;
|
|
|
|
if (mode != FAST_RSEARCH) {
|
|
const STRINGLIB_CHAR *ss = s + m - 1;
|
|
const STRINGLIB_CHAR *pp = p + m - 1;
|
|
|
|
/* create compressed boyer-moore delta 1 table */
|
|
|
|
/* process pattern[:-1] */
|
|
for (i = 0; i < mlast; i++) {
|
|
STRINGLIB_BLOOM_ADD(mask, p[i]);
|
|
if (p[i] == p[mlast])
|
|
skip = mlast - i - 1;
|
|
}
|
|
/* process pattern[-1] outside the loop */
|
|
STRINGLIB_BLOOM_ADD(mask, p[mlast]);
|
|
|
|
for (i = 0; i <= w; i++) {
|
|
/* note: using mlast in the skip path slows things down on x86 */
|
|
if (ss[i] == pp[0]) {
|
|
/* candidate match */
|
|
for (j = 0; j < mlast; j++)
|
|
if (s[i+j] != p[j])
|
|
break;
|
|
if (j == mlast) {
|
|
/* got a match! */
|
|
if (mode != FAST_COUNT)
|
|
return i;
|
|
count++;
|
|
if (count == maxcount)
|
|
return maxcount;
|
|
i = i + mlast;
|
|
continue;
|
|
}
|
|
/* miss: check if next character is part of pattern */
|
|
if (!STRINGLIB_BLOOM(mask, ss[i+1]))
|
|
i = i + m;
|
|
else
|
|
i = i + skip;
|
|
} else {
|
|
/* skip: check if next character is part of pattern */
|
|
if (!STRINGLIB_BLOOM(mask, ss[i+1]))
|
|
i = i + m;
|
|
}
|
|
}
|
|
} else { /* FAST_RSEARCH */
|
|
|
|
/* create compressed boyer-moore delta 1 table */
|
|
|
|
/* process pattern[0] outside the loop */
|
|
STRINGLIB_BLOOM_ADD(mask, p[0]);
|
|
/* process pattern[:0:-1] */
|
|
for (i = mlast; i > 0; i--) {
|
|
STRINGLIB_BLOOM_ADD(mask, p[i]);
|
|
if (p[i] == p[0])
|
|
skip = i - 1;
|
|
}
|
|
|
|
for (i = w; i >= 0; i--) {
|
|
if (s[i] == p[0]) {
|
|
/* candidate match */
|
|
for (j = mlast; j > 0; j--)
|
|
if (s[i+j] != p[j])
|
|
break;
|
|
if (j == 0)
|
|
/* got a match! */
|
|
return i;
|
|
/* miss: check if previous character is part of pattern */
|
|
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
|
|
i = i - m;
|
|
else
|
|
i = i - skip;
|
|
} else {
|
|
/* skip: check if previous character is part of pattern */
|
|
if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
|
|
i = i - m;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (mode != FAST_COUNT)
|
|
return -1;
|
|
return count;
|
|
}
|
|
|