Reduce build latency and fix old cpu bugs

This commit is contained in:
Justine Tunney 2021-08-05 14:43:53 -07:00
parent df8ab0aa0c
commit 533f3d1ef1
69 changed files with 43069 additions and 43683 deletions

View file

@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/bits/likely.h"
#include "libc/fmt/fmt.h"
#include "libc/limits.h"
#include "libc/mem/alloca.h"
@ -133,7 +134,7 @@ static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
uint32_t res[LRE_CC_RES_LEN_MAX];
int len;
if (is_utf16) {
if (likely(c < 128)) {
if (LIKELY(c < 128)) {
if (c >= 'A' && c <= 'Z')
c = c - 'A' + 'a';
} else {
@ -141,7 +142,7 @@ static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
c = res[0];
}
} else {
if (likely(c < 128)) {
if (LIKELY(c < 128)) {
if (c >= 'a' && c <= 'z')
c = c - 'a' + 'A';
} else {
@ -233,7 +234,7 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
BOOL invert;
const uint16_t *c_pt;
int len, i;
invert = c & 1;
c_pt = char_range_table[c >> 1];
len = *c_pt++;
@ -283,7 +284,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
{
int pos, len, opcode, bc_len, re_flags, i;
uint32_t val;
assert(buf_len >= RE_HEADER_LEN);
re_flags= buf[0];
@ -455,7 +456,7 @@ static int parse_digits(const uint8_t **pp, BOOL allow_overflow)
const uint8_t *p;
uint64_t v;
int c;
p = *pp;
v = 0;
for(;;) {
@ -500,7 +501,6 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
{
const uint8_t *p;
uint32_t c;
p = *pp;
c = *p++;
switch(c) {
@ -527,7 +527,7 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
{
int h, n, i;
uint32_t c1;
if (*p == '{' && allow_utf16) {
p++;
c = 0;
@ -548,7 +548,6 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
} else {
n = 4;
}
c = 0;
for(i = 0; i < n; i++) {
h = from_hex(*p++);
@ -721,7 +720,7 @@ static int get_class_atom(REParseState *s, CharRange *cr,
const uint8_t *p;
uint32_t c;
int ret;
p = *pp;
c = *p;
@ -829,7 +828,7 @@ static int re_emit_range(REParseState *s, const CharRange *cr)
{
int len, i;
uint32_t high;
len = (unsigned)cr->len / 2;
if (len >= 65535)
return re_parse_error(s, "too many ranges");
@ -870,7 +869,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
CharRange cr_s, *cr = &cr_s;
CharRange cr1_s, *cr1 = &cr1_s;
BOOL invert;
cr_init(cr, s->opaque, lre_realloc);
p = *pp;
p++; /* skip '[' */
@ -960,12 +959,12 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
uint32_t val, last;
BOOL has_back_reference;
uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
ret = -2; /* not known yet */
pos = 0;
has_back_reference = FALSE;
memset(capture_bitmap, 0, sizeof(capture_bitmap));
while (pos < bc_buf_len) {
opcode = bc_buf[pos];
len = reopcode_info[opcode].size;
@ -1042,7 +1041,7 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
{
int pos, opcode, len, count;
uint32_t val;
count = 0;
pos = 0;
while (pos < bc_buf_len) {
@ -1202,7 +1201,7 @@ static int find_group_name(REParseState *s, const char *name)
const char *p, *buf_end;
size_t len, name_len;
int capture_index;
name_len = strlen(name);
p = (char *)s->group_names.buf;
buf_end = (char *)s->group_names.buf + s->group_names.size;
@ -1225,7 +1224,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
int c, last_atom_start, quant_min, quant_max, last_capture_count;
BOOL greedy, add_zero_advance_check, is_neg, is_backward_lookahead;
CharRange cr_s, *cr = &cr_s;
last_atom_start = -1;
last_capture_count = 0;
p = s->buf_ptr;
@ -1348,15 +1347,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
capture_index = s->capture_count++;
re_emit_op_u8(s, REOP_save_start + is_backward_dir,
capture_index);
s->buf_ptr = p;
if (re_parse_disjunction(s, is_backward_dir))
return -1;
p = s->buf_ptr;
re_emit_op_u8(s, REOP_save_start + 1 - is_backward_dir,
capture_index);
if (re_parse_expect(s, &p, ')'))
return -1;
}
@ -1372,7 +1371,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
{
const uint8_t *p1;
int dummy_res;
p1 = p;
if (p1[2] != '<') {
/* annex B: we tolerate invalid group names in non
@ -1425,10 +1424,10 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
goto normal_char;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8':
case '9':
case '9':
{
const uint8_t *q = ++p;
c = parse_digits(&p, FALSE);
if (c < 0 || (c >= s->capture_count && c >= re_count_captures(s))) {
if (!s->is_utf16) {
@ -1569,7 +1568,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
}
if (greedy) {
int len, pos;
if (quant_max > 0) {
/* specific optimization for simple quantifiers */
if (dbuf_error(&s->byte_code))
@ -1578,7 +1577,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
s->byte_code.size - last_atom_start);
if (len > 0) {
re_emit_op(s, REOP_match);
if (dbuf_insert(&s->byte_code, last_atom_start, 17))
goto out_of_memory;
pos = last_atom_start;
@ -1595,7 +1594,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
goto done;
}
}
if (dbuf_error(&s->byte_code))
goto out_of_memory;
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
@ -1603,7 +1602,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
} else {
add_zero_advance_check = FALSE;
}
{
int len, pos;
len = s->byte_code.size - last_atom_start;
@ -1638,7 +1637,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
atom (only works if the atom has no
side effect) */
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
} else {
re_emit_goto(s, REOP_goto, last_atom_start);
}
@ -1689,7 +1688,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5);
/* copy the atom */
dbuf_put_self(&s->byte_code, last_atom_start, len);
re_emit_goto(s, REOP_loop, pos);
re_emit_op(s, REOP_drop);
}
@ -1741,14 +1740,14 @@ static int re_parse_alternative(REParseState *s, BOOL is_backward_dir)
}
return 0;
}
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
{
int start, len, pos;
if (lre_check_stack_overflow(s->opaque, 0))
return re_parse_error(s, "stack overflow");
start = s->byte_code.size;
if (re_parse_alternative(s, is_backward_dir))
return -1;
@ -1768,7 +1767,7 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
if (re_parse_alternative(s, is_backward_dir))
return -1;
/* patch the goto */
len = s->byte_code.size - (pos + 4);
put_u32(s->byte_code.buf + pos, len);
@ -1781,7 +1780,7 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
{
int stack_size, stack_size_max, pos, opcode, len;
uint32_t val;
stack_size = 0;
stack_size_max = 0;
bc_buf += RE_HEADER_LEN;
@ -1832,7 +1831,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
REParseState s_s, *s = &s_s;
int stack_size;
BOOL is_sticky;
memset(s, 0, sizeof(*s));
s->opaque = opaque;
s->buf_ptr = (const uint8_t *)buf;
@ -1846,7 +1845,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
s->capture_count = 1;
s->total_capture_count = -1;
s->has_named_captures = -1;
dbuf_init2(&s->byte_code, opaque, lre_realloc);
dbuf_init2(&s->group_names, opaque, lre_realloc);
@ -1854,7 +1853,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
dbuf_putc(&s->byte_code, 0); /* second element is the number of captures */
dbuf_putc(&s->byte_code, 0); /* stack size */
dbuf_put_u32(&s->byte_code, 0); /* bytecode length */
if (!is_sticky) {
/* iterate thru all positions (about the same as .*?( ... ) )
. We do it without an explicit loop so that lock step
@ -1876,7 +1875,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
}
re_emit_op_u8(s, REOP_save_end, 0);
re_emit_op(s, REOP_match);
if (*s->buf_ptr != '\0') {
@ -1888,13 +1887,13 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
re_parse_out_of_memory(s);
goto error;
}
stack_size = compute_stack_size(s->byte_code.buf, s->byte_code.size);
if (stack_size < 0) {
re_parse_error(s, "too many imbricated quantifiers");
goto error;
}
s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count;
s->byte_code.buf[RE_HEADER_STACK_SIZE] = stack_size;
put_u32(s->byte_code.buf + 3, s->byte_code.size - RE_HEADER_LEN);
@ -1905,11 +1904,11 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
s->byte_code.buf[RE_HEADER_FLAGS] |= LRE_FLAG_NAMED_GROUPS;
}
dbuf_free(&s->group_names);
#ifdef DUMP_REOP
lre_dump_bytecode(s->byte_code.buf, s->byte_code.size);
#endif
error_msg[0] = '\0';
*plen = s->byte_code.size;
return s->byte_code.buf;
@ -2040,7 +2039,7 @@ typedef struct {
const uint8_t *cbuf;
const uint8_t *cbuf_end;
/* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16 */
int cbuf_type;
int cbuf_type;
int capture_count;
int stack_size_max;
BOOL multi_line;
@ -2065,7 +2064,7 @@ static int push_state(REExecContext *s,
size_t new_size, i, n;
StackInt *stack_buf;
if (unlikely((s->state_stack_len + 1) > s->state_stack_size)) {
if (UNLIKELY((s->state_stack_len + 1) > s->state_stack_size)) {
/* reallocate the stack */
new_size = s->state_stack_size * 3 / 2;
if (new_size < 8)
@ -2102,7 +2101,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
int cbuf_type;
uint32_t val, c;
const uint8_t *cbuf_end;
cbuf_type = s->cbuf_type;
cbuf_end = s->cbuf_end;
@ -2200,7 +2199,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
case REOP_split_next_first:
{
const uint8_t *pc1;
val = get_u32(pc);
pc += 4;
if (opcode == REOP_split_next_first) {
@ -2226,7 +2225,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
if (ret < 0)
return -1;
break;
case REOP_goto:
val = get_u32(pc);
pc += 4 + (int)val;
@ -2332,7 +2331,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
{
const uint8_t *cptr1, *cptr1_end, *cptr1_start;
uint32_t c1, c2;
val = *pc++;
if (val >= s->capture_count)
goto no_match;
@ -2375,7 +2374,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
{
int n;
uint32_t low, high, idx_min, idx_max, idx;
n = get_u16(pc); /* n must be >= 1 */
pc += 2;
if (cptr >= cbuf_end)
@ -2391,7 +2390,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
idx_max = n - 1;
high = get_u16(pc + idx_max * 4 + 2);
/* 0xffff in for last value means +infinity */
if (unlikely(c >= 0xffff) && high == 0xffff)
if (UNLIKELY(c >= 0xffff) && high == 0xffff)
goto range_match;
if (c > high)
goto no_match;
@ -2415,7 +2414,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
{
int n;
uint32_t low, high, idx_min, idx_max, idx;
n = get_u16(pc); /* n must be >= 1 */
pc += 2;
if (cptr >= cbuf_end)
@ -2460,14 +2459,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
size_t q;
intptr_t res;
const uint8_t *pc1;
next_pos = get_u32(pc);
quant_min = get_u32(pc + 4);
quant_max = get_u32(pc + 8);
pc += 16;
pc1 = pc;
pc += (int)next_pos;
q = 0;
for(;;) {
res = lre_exec_backtrack(s, capture, stack, stack_len,
@ -2510,7 +2509,7 @@ int lre_exec(uint8_t **capture,
REExecContext s_s, *s = &s_s;
int re_flags, i, alloca_size, ret;
StackInt *stack_buf;
re_flags = bc_buf[RE_HEADER_FLAGS];
s->multi_line = (re_flags & LRE_FLAG_MULTILINE) != 0;
s->ignore_case = (re_flags & LRE_FLAG_IGNORECASE) != 0;
@ -2530,7 +2529,7 @@ int lre_exec(uint8_t **capture,
s->state_stack = NULL;
s->state_stack_len = 0;
s->state_stack_size = 0;
for(i = 0; i < s->capture_count * 2; i++)
capture[i] = NULL;
alloca_size = s->stack_size_max * sizeof(stack_buf[0]);
@ -2582,7 +2581,7 @@ int main(int argc, char **argv)
uint8_t *capture[CAPTURE_COUNT_MAX * 2];
const char *input;
int input_len, capture_count;
if (argc < 3) {
printf("usage: %s regexp input\n", argv[0]);
exit(1);
@ -2596,7 +2595,7 @@ int main(int argc, char **argv)
input = argv[2];
input_len = strlen(input);
ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL);
printf("ret=%d\n", ret);
if (ret == 1) {