mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
Reduce build latency and fix old cpu bugs
This commit is contained in:
parent
df8ab0aa0c
commit
533f3d1ef1
69 changed files with 43069 additions and 43683 deletions
109
third_party/quickjs/libregexp.c
vendored
109
third_party/quickjs/libregexp.c
vendored
|
@ -22,6 +22,7 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/likely.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/mem/alloca.h"
|
||||
|
@ -133,7 +134,7 @@ static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
|
|||
uint32_t res[LRE_CC_RES_LEN_MAX];
|
||||
int len;
|
||||
if (is_utf16) {
|
||||
if (likely(c < 128)) {
|
||||
if (LIKELY(c < 128)) {
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
c = c - 'A' + 'a';
|
||||
} else {
|
||||
|
@ -141,7 +142,7 @@ static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
|
|||
c = res[0];
|
||||
}
|
||||
} else {
|
||||
if (likely(c < 128)) {
|
||||
if (LIKELY(c < 128)) {
|
||||
if (c >= 'a' && c <= 'z')
|
||||
c = c - 'a' + 'A';
|
||||
} else {
|
||||
|
@ -233,7 +234,7 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
|
|||
BOOL invert;
|
||||
const uint16_t *c_pt;
|
||||
int len, i;
|
||||
|
||||
|
||||
invert = c & 1;
|
||||
c_pt = char_range_table[c >> 1];
|
||||
len = *c_pt++;
|
||||
|
@ -283,7 +284,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
|||
{
|
||||
int pos, len, opcode, bc_len, re_flags, i;
|
||||
uint32_t val;
|
||||
|
||||
|
||||
assert(buf_len >= RE_HEADER_LEN);
|
||||
|
||||
re_flags= buf[0];
|
||||
|
@ -455,7 +456,7 @@ static int parse_digits(const uint8_t **pp, BOOL allow_overflow)
|
|||
const uint8_t *p;
|
||||
uint64_t v;
|
||||
int c;
|
||||
|
||||
|
||||
p = *pp;
|
||||
v = 0;
|
||||
for(;;) {
|
||||
|
@ -500,7 +501,6 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
|
|||
{
|
||||
const uint8_t *p;
|
||||
uint32_t c;
|
||||
|
||||
p = *pp;
|
||||
c = *p++;
|
||||
switch(c) {
|
||||
|
@ -527,7 +527,7 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
|
|||
{
|
||||
int h, n, i;
|
||||
uint32_t c1;
|
||||
|
||||
|
||||
if (*p == '{' && allow_utf16) {
|
||||
p++;
|
||||
c = 0;
|
||||
|
@ -548,7 +548,6 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
|
|||
} else {
|
||||
n = 4;
|
||||
}
|
||||
|
||||
c = 0;
|
||||
for(i = 0; i < n; i++) {
|
||||
h = from_hex(*p++);
|
||||
|
@ -721,7 +720,7 @@ static int get_class_atom(REParseState *s, CharRange *cr,
|
|||
const uint8_t *p;
|
||||
uint32_t c;
|
||||
int ret;
|
||||
|
||||
|
||||
p = *pp;
|
||||
|
||||
c = *p;
|
||||
|
@ -829,7 +828,7 @@ static int re_emit_range(REParseState *s, const CharRange *cr)
|
|||
{
|
||||
int len, i;
|
||||
uint32_t high;
|
||||
|
||||
|
||||
len = (unsigned)cr->len / 2;
|
||||
if (len >= 65535)
|
||||
return re_parse_error(s, "too many ranges");
|
||||
|
@ -870,7 +869,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
|
|||
CharRange cr_s, *cr = &cr_s;
|
||||
CharRange cr1_s, *cr1 = &cr1_s;
|
||||
BOOL invert;
|
||||
|
||||
|
||||
cr_init(cr, s->opaque, lre_realloc);
|
||||
p = *pp;
|
||||
p++; /* skip '[' */
|
||||
|
@ -960,12 +959,12 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
|||
uint32_t val, last;
|
||||
BOOL has_back_reference;
|
||||
uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
|
||||
|
||||
|
||||
ret = -2; /* not known yet */
|
||||
pos = 0;
|
||||
has_back_reference = FALSE;
|
||||
memset(capture_bitmap, 0, sizeof(capture_bitmap));
|
||||
|
||||
|
||||
while (pos < bc_buf_len) {
|
||||
opcode = bc_buf[pos];
|
||||
len = reopcode_info[opcode].size;
|
||||
|
@ -1042,7 +1041,7 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
|
|||
{
|
||||
int pos, opcode, len, count;
|
||||
uint32_t val;
|
||||
|
||||
|
||||
count = 0;
|
||||
pos = 0;
|
||||
while (pos < bc_buf_len) {
|
||||
|
@ -1202,7 +1201,7 @@ static int find_group_name(REParseState *s, const char *name)
|
|||
const char *p, *buf_end;
|
||||
size_t len, name_len;
|
||||
int capture_index;
|
||||
|
||||
|
||||
name_len = strlen(name);
|
||||
p = (char *)s->group_names.buf;
|
||||
buf_end = (char *)s->group_names.buf + s->group_names.size;
|
||||
|
@ -1225,7 +1224,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
int c, last_atom_start, quant_min, quant_max, last_capture_count;
|
||||
BOOL greedy, add_zero_advance_check, is_neg, is_backward_lookahead;
|
||||
CharRange cr_s, *cr = &cr_s;
|
||||
|
||||
|
||||
last_atom_start = -1;
|
||||
last_capture_count = 0;
|
||||
p = s->buf_ptr;
|
||||
|
@ -1348,15 +1347,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
capture_index = s->capture_count++;
|
||||
re_emit_op_u8(s, REOP_save_start + is_backward_dir,
|
||||
capture_index);
|
||||
|
||||
|
||||
s->buf_ptr = p;
|
||||
if (re_parse_disjunction(s, is_backward_dir))
|
||||
return -1;
|
||||
p = s->buf_ptr;
|
||||
|
||||
|
||||
re_emit_op_u8(s, REOP_save_start + 1 - is_backward_dir,
|
||||
capture_index);
|
||||
|
||||
|
||||
if (re_parse_expect(s, &p, ')'))
|
||||
return -1;
|
||||
}
|
||||
|
@ -1372,7 +1371,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
{
|
||||
const uint8_t *p1;
|
||||
int dummy_res;
|
||||
|
||||
|
||||
p1 = p;
|
||||
if (p1[2] != '<') {
|
||||
/* annex B: we tolerate invalid group names in non
|
||||
|
@ -1425,10 +1424,10 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
goto normal_char;
|
||||
case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8':
|
||||
case '9':
|
||||
case '9':
|
||||
{
|
||||
const uint8_t *q = ++p;
|
||||
|
||||
|
||||
c = parse_digits(&p, FALSE);
|
||||
if (c < 0 || (c >= s->capture_count && c >= re_count_captures(s))) {
|
||||
if (!s->is_utf16) {
|
||||
|
@ -1569,7 +1568,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
}
|
||||
if (greedy) {
|
||||
int len, pos;
|
||||
|
||||
|
||||
if (quant_max > 0) {
|
||||
/* specific optimization for simple quantifiers */
|
||||
if (dbuf_error(&s->byte_code))
|
||||
|
@ -1578,7 +1577,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
s->byte_code.size - last_atom_start);
|
||||
if (len > 0) {
|
||||
re_emit_op(s, REOP_match);
|
||||
|
||||
|
||||
if (dbuf_insert(&s->byte_code, last_atom_start, 17))
|
||||
goto out_of_memory;
|
||||
pos = last_atom_start;
|
||||
|
@ -1595,7 +1594,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (dbuf_error(&s->byte_code))
|
||||
goto out_of_memory;
|
||||
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
|
||||
|
@ -1603,7 +1602,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
} else {
|
||||
add_zero_advance_check = FALSE;
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
int len, pos;
|
||||
len = s->byte_code.size - last_atom_start;
|
||||
|
@ -1638,7 +1637,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
atom (only works if the atom has no
|
||||
side effect) */
|
||||
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
|
||||
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
|
||||
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
|
||||
} else {
|
||||
re_emit_goto(s, REOP_goto, last_atom_start);
|
||||
}
|
||||
|
@ -1689,7 +1688,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5);
|
||||
/* copy the atom */
|
||||
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
||||
|
||||
|
||||
re_emit_goto(s, REOP_loop, pos);
|
||||
re_emit_op(s, REOP_drop);
|
||||
}
|
||||
|
@ -1741,14 +1740,14 @@ static int re_parse_alternative(REParseState *s, BOOL is_backward_dir)
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
|
||||
{
|
||||
int start, len, pos;
|
||||
|
||||
if (lre_check_stack_overflow(s->opaque, 0))
|
||||
return re_parse_error(s, "stack overflow");
|
||||
|
||||
|
||||
start = s->byte_code.size;
|
||||
if (re_parse_alternative(s, is_backward_dir))
|
||||
return -1;
|
||||
|
@ -1768,7 +1767,7 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
|
|||
|
||||
if (re_parse_alternative(s, is_backward_dir))
|
||||
return -1;
|
||||
|
||||
|
||||
/* patch the goto */
|
||||
len = s->byte_code.size - (pos + 4);
|
||||
put_u32(s->byte_code.buf + pos, len);
|
||||
|
@ -1781,7 +1780,7 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
|
|||
{
|
||||
int stack_size, stack_size_max, pos, opcode, len;
|
||||
uint32_t val;
|
||||
|
||||
|
||||
stack_size = 0;
|
||||
stack_size_max = 0;
|
||||
bc_buf += RE_HEADER_LEN;
|
||||
|
@ -1832,7 +1831,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|||
REParseState s_s, *s = &s_s;
|
||||
int stack_size;
|
||||
BOOL is_sticky;
|
||||
|
||||
|
||||
memset(s, 0, sizeof(*s));
|
||||
s->opaque = opaque;
|
||||
s->buf_ptr = (const uint8_t *)buf;
|
||||
|
@ -1846,7 +1845,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|||
s->capture_count = 1;
|
||||
s->total_capture_count = -1;
|
||||
s->has_named_captures = -1;
|
||||
|
||||
|
||||
dbuf_init2(&s->byte_code, opaque, lre_realloc);
|
||||
dbuf_init2(&s->group_names, opaque, lre_realloc);
|
||||
|
||||
|
@ -1854,7 +1853,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|||
dbuf_putc(&s->byte_code, 0); /* second element is the number of captures */
|
||||
dbuf_putc(&s->byte_code, 0); /* stack size */
|
||||
dbuf_put_u32(&s->byte_code, 0); /* bytecode length */
|
||||
|
||||
|
||||
if (!is_sticky) {
|
||||
/* iterate thru all positions (about the same as .*?( ... ) )
|
||||
. We do it without an explicit loop so that lock step
|
||||
|
@ -1876,7 +1875,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|||
}
|
||||
|
||||
re_emit_op_u8(s, REOP_save_end, 0);
|
||||
|
||||
|
||||
re_emit_op(s, REOP_match);
|
||||
|
||||
if (*s->buf_ptr != '\0') {
|
||||
|
@ -1888,13 +1887,13 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|||
re_parse_out_of_memory(s);
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
||||
stack_size = compute_stack_size(s->byte_code.buf, s->byte_code.size);
|
||||
if (stack_size < 0) {
|
||||
re_parse_error(s, "too many imbricated quantifiers");
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
||||
s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count;
|
||||
s->byte_code.buf[RE_HEADER_STACK_SIZE] = stack_size;
|
||||
put_u32(s->byte_code.buf + 3, s->byte_code.size - RE_HEADER_LEN);
|
||||
|
@ -1905,11 +1904,11 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|||
s->byte_code.buf[RE_HEADER_FLAGS] |= LRE_FLAG_NAMED_GROUPS;
|
||||
}
|
||||
dbuf_free(&s->group_names);
|
||||
|
||||
|
||||
#ifdef DUMP_REOP
|
||||
lre_dump_bytecode(s->byte_code.buf, s->byte_code.size);
|
||||
#endif
|
||||
|
||||
|
||||
error_msg[0] = '\0';
|
||||
*plen = s->byte_code.size;
|
||||
return s->byte_code.buf;
|
||||
|
@ -2040,7 +2039,7 @@ typedef struct {
|
|||
const uint8_t *cbuf;
|
||||
const uint8_t *cbuf_end;
|
||||
/* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16 */
|
||||
int cbuf_type;
|
||||
int cbuf_type;
|
||||
int capture_count;
|
||||
int stack_size_max;
|
||||
BOOL multi_line;
|
||||
|
@ -2065,7 +2064,7 @@ static int push_state(REExecContext *s,
|
|||
size_t new_size, i, n;
|
||||
StackInt *stack_buf;
|
||||
|
||||
if (unlikely((s->state_stack_len + 1) > s->state_stack_size)) {
|
||||
if (UNLIKELY((s->state_stack_len + 1) > s->state_stack_size)) {
|
||||
/* reallocate the stack */
|
||||
new_size = s->state_stack_size * 3 / 2;
|
||||
if (new_size < 8)
|
||||
|
@ -2102,7 +2101,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
int cbuf_type;
|
||||
uint32_t val, c;
|
||||
const uint8_t *cbuf_end;
|
||||
|
||||
|
||||
cbuf_type = s->cbuf_type;
|
||||
cbuf_end = s->cbuf_end;
|
||||
|
||||
|
@ -2200,7 +2199,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
case REOP_split_next_first:
|
||||
{
|
||||
const uint8_t *pc1;
|
||||
|
||||
|
||||
val = get_u32(pc);
|
||||
pc += 4;
|
||||
if (opcode == REOP_split_next_first) {
|
||||
|
@ -2226,7 +2225,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
if (ret < 0)
|
||||
return -1;
|
||||
break;
|
||||
|
||||
|
||||
case REOP_goto:
|
||||
val = get_u32(pc);
|
||||
pc += 4 + (int)val;
|
||||
|
@ -2332,7 +2331,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
{
|
||||
const uint8_t *cptr1, *cptr1_end, *cptr1_start;
|
||||
uint32_t c1, c2;
|
||||
|
||||
|
||||
val = *pc++;
|
||||
if (val >= s->capture_count)
|
||||
goto no_match;
|
||||
|
@ -2375,7 +2374,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
{
|
||||
int n;
|
||||
uint32_t low, high, idx_min, idx_max, idx;
|
||||
|
||||
|
||||
n = get_u16(pc); /* n must be >= 1 */
|
||||
pc += 2;
|
||||
if (cptr >= cbuf_end)
|
||||
|
@ -2391,7 +2390,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
idx_max = n - 1;
|
||||
high = get_u16(pc + idx_max * 4 + 2);
|
||||
/* 0xffff in for last value means +infinity */
|
||||
if (unlikely(c >= 0xffff) && high == 0xffff)
|
||||
if (UNLIKELY(c >= 0xffff) && high == 0xffff)
|
||||
goto range_match;
|
||||
if (c > high)
|
||||
goto no_match;
|
||||
|
@ -2415,7 +2414,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
{
|
||||
int n;
|
||||
uint32_t low, high, idx_min, idx_max, idx;
|
||||
|
||||
|
||||
n = get_u16(pc); /* n must be >= 1 */
|
||||
pc += 2;
|
||||
if (cptr >= cbuf_end)
|
||||
|
@ -2460,14 +2459,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|||
size_t q;
|
||||
intptr_t res;
|
||||
const uint8_t *pc1;
|
||||
|
||||
|
||||
next_pos = get_u32(pc);
|
||||
quant_min = get_u32(pc + 4);
|
||||
quant_max = get_u32(pc + 8);
|
||||
pc += 16;
|
||||
pc1 = pc;
|
||||
pc += (int)next_pos;
|
||||
|
||||
|
||||
q = 0;
|
||||
for(;;) {
|
||||
res = lre_exec_backtrack(s, capture, stack, stack_len,
|
||||
|
@ -2510,7 +2509,7 @@ int lre_exec(uint8_t **capture,
|
|||
REExecContext s_s, *s = &s_s;
|
||||
int re_flags, i, alloca_size, ret;
|
||||
StackInt *stack_buf;
|
||||
|
||||
|
||||
re_flags = bc_buf[RE_HEADER_FLAGS];
|
||||
s->multi_line = (re_flags & LRE_FLAG_MULTILINE) != 0;
|
||||
s->ignore_case = (re_flags & LRE_FLAG_IGNORECASE) != 0;
|
||||
|
@ -2530,7 +2529,7 @@ int lre_exec(uint8_t **capture,
|
|||
s->state_stack = NULL;
|
||||
s->state_stack_len = 0;
|
||||
s->state_stack_size = 0;
|
||||
|
||||
|
||||
for(i = 0; i < s->capture_count * 2; i++)
|
||||
capture[i] = NULL;
|
||||
alloca_size = s->stack_size_max * sizeof(stack_buf[0]);
|
||||
|
@ -2582,7 +2581,7 @@ int main(int argc, char **argv)
|
|||
uint8_t *capture[CAPTURE_COUNT_MAX * 2];
|
||||
const char *input;
|
||||
int input_len, capture_count;
|
||||
|
||||
|
||||
if (argc < 3) {
|
||||
printf("usage: %s regexp input\n", argv[0]);
|
||||
exit(1);
|
||||
|
@ -2596,7 +2595,7 @@ int main(int argc, char **argv)
|
|||
|
||||
input = argv[2];
|
||||
input_len = strlen(input);
|
||||
|
||||
|
||||
ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL);
|
||||
printf("ret=%d\n", ret);
|
||||
if (ret == 1) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue