mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-26 15:59:04 +00:00
- 10.5% reduction of o//depend dependency graph - 8.8% reduction in latency of make command - Fix issue with temporary file cleanup There's a new -w option in compile.com that turns off the recent Landlock output path workaround for "good commands" which do not unlink() the output file like GNU tooling does. Our new GNU Make unveil sandboxing appears to have zero overhead in the grand scheme of things. Full builds are pretty fast since the only thing that's actually slowed us down is probably libcxx make -j16 MODE=rel RL: took 85,732,063µs wall time RL: ballooned to 323,612kb in size RL: needed 828,560,521µs cpu (11% kernel) RL: caused 39,080,670 page faults (99% memcpy) RL: 350,073 context switches (72% consensual) RL: performed 0 reads and 11,494,960 write i/o operations pledge() and unveil() no longer consider ENOSYS to be an error. These functions have also been added to Python's cosmo module. This change also removes some WIN32 APIs and System Five magnums which we're not using and it's doubtful anyone else would be too
2620 lines
83 KiB
C
2620 lines
83 KiB
C
/*
|
|
* Regular Expression Engine
|
|
*
|
|
* Copyright (c) 2017-2018 Fabrice Bellard
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
#include "libc/assert.h"
|
|
#include "libc/bits/likely.h"
|
|
#include "libc/fmt/fmt.h"
|
|
#include "libc/limits.h"
|
|
#include "libc/mem/alloca.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/stdio/stdio.h"
|
|
#include "libc/str/str.h"
|
|
#include "third_party/quickjs/cutils.h"
|
|
#include "third_party/quickjs/libregexp.h"
|
|
|
|
asm(".ident\t\"\\n\\n\
|
|
QuickJS (MIT License)\\n\
|
|
Copyright (c) 2017-2021 Fabrice Bellard\\n\
|
|
Copyright (c) 2017-2021 Charlie Gordon\"");
|
|
asm(".include \"libc/disclaimer.inc\"");
|
|
|
|
/* clang-format off */
|
|
|
|
/*
|
|
TODO:
|
|
|
|
- Add full unicode canonicalize rules for character ranges (not
|
|
really useful but needed for exact "ignorecase" compatibility).
|
|
|
|
- Add a lock step execution mode (=linear time execution guaranteed)
|
|
when the regular expression is "simple" i.e. no backreference nor
|
|
complicated lookahead. The opcodes are designed for this execution
|
|
model.
|
|
*/
|
|
|
|
#if defined(TEST)
|
|
#define DUMP_REOP
|
|
#endif
|
|
|
|
typedef enum {
|
|
#define DEF(id, size) REOP_ ## id,
|
|
#include "third_party/quickjs/libregexp-opcode.inc"
|
|
#undef DEF
|
|
REOP_COUNT,
|
|
} REOPCodeEnum;
|
|
|
|
#define CAPTURE_COUNT_MAX 255
|
|
#define STACK_SIZE_MAX 255
|
|
|
|
/* unicode code points */
|
|
#define CP_LS 0x2028
|
|
#define CP_PS 0x2029
|
|
|
|
#define TMP_BUF_SIZE 128
|
|
|
|
typedef struct {
|
|
DynBuf byte_code;
|
|
const uint8_t *buf_ptr;
|
|
const uint8_t *buf_end;
|
|
const uint8_t *buf_start;
|
|
int re_flags;
|
|
BOOL is_utf16;
|
|
BOOL ignore_case;
|
|
BOOL dotall;
|
|
int capture_count;
|
|
int total_capture_count; /* -1 = not computed yet */
|
|
int has_named_captures; /* -1 = don't know, 0 = no, 1 = yes */
|
|
void *opaque;
|
|
DynBuf group_names;
|
|
union {
|
|
char error_msg[TMP_BUF_SIZE];
|
|
char tmp_buf[TMP_BUF_SIZE];
|
|
} u;
|
|
} REParseState;
|
|
|
|
typedef struct {
|
|
#ifdef DUMP_REOP
|
|
const char *name;
|
|
#endif
|
|
uint8_t size;
|
|
} REOpCode;
|
|
|
|
static const REOpCode reopcode_info[REOP_COUNT] = {
|
|
#ifdef DUMP_REOP
|
|
#define DEF(id, size) { #id, size },
|
|
#else
|
|
#define DEF(id, size) { size },
|
|
#endif
|
|
#include "third_party/quickjs/libregexp-opcode.inc"
|
|
#undef DEF
|
|
};
|
|
|
|
#define RE_HEADER_FLAGS 0
|
|
#define RE_HEADER_CAPTURE_COUNT 1
|
|
#define RE_HEADER_STACK_SIZE 2
|
|
|
|
#define RE_HEADER_LEN 7
|
|
|
|
static inline int is_digit(int c) {
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
/* insert 'len' bytes at position 'pos'. Return < 0 if error. */
|
|
static int dbuf_insert(DynBuf *s, int pos, int len)
|
|
{
|
|
if (dbuf_realloc(s, s->size + len))
|
|
return -1;
|
|
memmove(s->buf + pos + len, s->buf + pos, s->size - pos);
|
|
s->size += len;
|
|
return 0;
|
|
}
|
|
|
|
/* canonicalize with the specific JS regexp rules */
|
|
static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
|
|
{
|
|
uint32_t res[LRE_CC_RES_LEN_MAX];
|
|
int len;
|
|
if (is_utf16) {
|
|
if (LIKELY(c < 128)) {
|
|
if (c >= 'A' && c <= 'Z')
|
|
c = c - 'A' + 'a';
|
|
} else {
|
|
lre_case_conv(res, c, 2);
|
|
c = res[0];
|
|
}
|
|
} else {
|
|
if (LIKELY(c < 128)) {
|
|
if (c >= 'a' && c <= 'z')
|
|
c = c - 'a' + 'A';
|
|
} else {
|
|
/* legacy regexp: to upper case if single char >= 128 */
|
|
len = lre_case_conv(res, c, FALSE);
|
|
if (len == 1 && res[0] >= 128)
|
|
c = res[0];
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
static const uint16_t char_range_d[] = {
|
|
1,
|
|
0x0030, 0x0039 + 1,
|
|
};
|
|
|
|
/* code point ranges for Zs,Zl or Zp property */
|
|
static const uint16_t char_range_s[] = {
|
|
10,
|
|
0x0009, 0x000D + 1,
|
|
0x0020, 0x0020 + 1,
|
|
0x00A0, 0x00A0 + 1,
|
|
0x1680, 0x1680 + 1,
|
|
0x2000, 0x200A + 1,
|
|
/* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
|
|
/* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
|
|
0x2028, 0x2029 + 1,
|
|
0x202F, 0x202F + 1,
|
|
0x205F, 0x205F + 1,
|
|
0x3000, 0x3000 + 1,
|
|
/* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
|
|
0xFEFF, 0xFEFF + 1,
|
|
};
|
|
|
|
BOOL lre_is_space(int c)
|
|
{
|
|
int i, n, low, high;
|
|
n = (countof(char_range_s) - 1) / 2;
|
|
for(i = 0; i < n; i++) {
|
|
low = char_range_s[2 * i + 1];
|
|
if (c < low)
|
|
return FALSE;
|
|
high = char_range_s[2 * i + 2];
|
|
if (c < high)
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
uint32_t const lre_id_start_table_ascii[4] = {
|
|
/* $ A-Z _ a-z */
|
|
0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
|
|
};
|
|
|
|
uint32_t const lre_id_continue_table_ascii[4] = {
|
|
/* $ 0-9 A-Z _ a-z */
|
|
0x00000000, 0x03FF0010, 0x87FFFFFE, 0x07FFFFFE
|
|
};
|
|
|
|
|
|
static const uint16_t char_range_w[] = {
|
|
4,
|
|
0x0030, 0x0039 + 1,
|
|
0x0041, 0x005A + 1,
|
|
0x005F, 0x005F + 1,
|
|
0x0061, 0x007A + 1,
|
|
};
|
|
|
|
#define CLASS_RANGE_BASE 0x40000000
|
|
|
|
typedef enum {
|
|
CHAR_RANGE_d,
|
|
CHAR_RANGE_D,
|
|
CHAR_RANGE_s,
|
|
CHAR_RANGE_S,
|
|
CHAR_RANGE_w,
|
|
CHAR_RANGE_W,
|
|
} CharRangeEnum;
|
|
|
|
static const uint16_t *char_range_table[] = {
|
|
char_range_d,
|
|
char_range_s,
|
|
char_range_w,
|
|
};
|
|
|
|
static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
|
|
{
|
|
BOOL invert;
|
|
const uint16_t *c_pt;
|
|
int len, i;
|
|
|
|
invert = c & 1;
|
|
c_pt = char_range_table[c >> 1];
|
|
len = *c_pt++;
|
|
cr_init(cr, s->opaque, lre_realloc);
|
|
for(i = 0; i < len * 2; i++) {
|
|
if (cr_add_point(cr, c_pt[i]))
|
|
goto fail;
|
|
}
|
|
if (invert) {
|
|
if (cr_invert(cr))
|
|
goto fail;
|
|
}
|
|
return 0;
|
|
fail:
|
|
cr_free(cr);
|
|
return -1;
|
|
}
|
|
|
|
static int cr_canonicalize(CharRange *cr)
|
|
{
|
|
CharRange a;
|
|
uint32_t pt[2];
|
|
int i, ret;
|
|
|
|
cr_init(&a, cr->mem_opaque, lre_realloc);
|
|
pt[0] = 'a';
|
|
pt[1] = 'z' + 1;
|
|
ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER);
|
|
if (ret)
|
|
goto fail;
|
|
/* convert to upper case */
|
|
/* XXX: the generic unicode case would be much more complicated
|
|
and not really useful */
|
|
for(i = 0; i < a.len; i++) {
|
|
a.points[i] += 'A' - 'a';
|
|
}
|
|
/* Note: for simplicity we keep the lower case ranges */
|
|
ret = cr_union1(cr, a.points, a.len);
|
|
fail:
|
|
cr_free(&a);
|
|
return ret;
|
|
}
|
|
|
|
#ifdef DUMP_REOP
|
|
static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
|
int buf_len)
|
|
{
|
|
int pos, len, opcode, bc_len, re_flags, i;
|
|
uint32_t val;
|
|
|
|
assert(buf_len >= RE_HEADER_LEN);
|
|
|
|
re_flags= buf[0];
|
|
bc_len = get_u32(buf + 3);
|
|
assert(bc_len + RE_HEADER_LEN <= buf_len);
|
|
printf("flags: 0x%x capture_count=%d stack_size=%d\n",
|
|
re_flags, buf[1], buf[2]);
|
|
if (re_flags & LRE_FLAG_NAMED_GROUPS) {
|
|
const char *p;
|
|
p = (char *)buf + RE_HEADER_LEN + bc_len;
|
|
printf("named groups: ");
|
|
for(i = 1; i < buf[1]; i++) {
|
|
if (i != 1)
|
|
printf(",");
|
|
printf("<%s>", p);
|
|
p += strlen(p) + 1;
|
|
}
|
|
printf("\n");
|
|
assert(p == (char *)(buf + buf_len));
|
|
}
|
|
printf("bytecode_len=%d\n", bc_len);
|
|
|
|
buf += RE_HEADER_LEN;
|
|
pos = 0;
|
|
while (pos < bc_len) {
|
|
printf("%5u: ", pos);
|
|
opcode = buf[pos];
|
|
len = reopcode_info[opcode].size;
|
|
if (opcode >= REOP_COUNT) {
|
|
printf(" invalid opcode=0x%02x\n", opcode);
|
|
break;
|
|
}
|
|
if ((pos + len) > bc_len) {
|
|
printf(" buffer overflow (opcode=0x%02x)\n", opcode);
|
|
break;
|
|
}
|
|
printf("%s", reopcode_info[opcode].name);
|
|
switch(opcode) {
|
|
case REOP_char:
|
|
val = get_u16(buf + pos + 1);
|
|
if (val >= ' ' && val <= 126)
|
|
printf(" '%c'", val);
|
|
else
|
|
printf(" 0x%04x", val);
|
|
break;
|
|
case REOP_char32:
|
|
val = get_u32(buf + pos + 1);
|
|
if (val >= ' ' && val <= 126)
|
|
printf(" '%c'", val);
|
|
else
|
|
printf(" 0x%08x", val);
|
|
break;
|
|
case REOP_goto:
|
|
case REOP_split_goto_first:
|
|
case REOP_split_next_first:
|
|
case REOP_loop:
|
|
case REOP_lookahead:
|
|
case REOP_negative_lookahead:
|
|
case REOP_bne_char_pos:
|
|
val = get_u32(buf + pos + 1);
|
|
val += (pos + 5);
|
|
printf(" %u", val);
|
|
break;
|
|
case REOP_simple_greedy_quant:
|
|
printf(" %u %u %u %u",
|
|
get_u32(buf + pos + 1) + (pos + 17),
|
|
get_u32(buf + pos + 1 + 4),
|
|
get_u32(buf + pos + 1 + 8),
|
|
get_u32(buf + pos + 1 + 12));
|
|
break;
|
|
case REOP_save_start:
|
|
case REOP_save_end:
|
|
case REOP_back_reference:
|
|
case REOP_backward_back_reference:
|
|
printf(" %u", buf[pos + 1]);
|
|
break;
|
|
case REOP_save_reset:
|
|
printf(" %u %u", buf[pos + 1], buf[pos + 2]);
|
|
break;
|
|
case REOP_push_i32:
|
|
val = get_u32(buf + pos + 1);
|
|
printf(" %d", val);
|
|
break;
|
|
case REOP_range:
|
|
{
|
|
int n, i;
|
|
n = get_u16(buf + pos + 1);
|
|
len += n * 4;
|
|
for(i = 0; i < n * 2; i++) {
|
|
val = get_u16(buf + pos + 3 + i * 2);
|
|
printf(" 0x%04x", val);
|
|
}
|
|
}
|
|
break;
|
|
case REOP_range32:
|
|
{
|
|
int n, i;
|
|
n = get_u16(buf + pos + 1);
|
|
len += n * 8;
|
|
for(i = 0; i < n * 2; i++) {
|
|
val = get_u32(buf + pos + 3 + i * 4);
|
|
printf(" 0x%08x", val);
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
printf("\n");
|
|
pos += len;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static void re_emit_op(REParseState *s, int op)
|
|
{
|
|
dbuf_putc(&s->byte_code, op);
|
|
}
|
|
|
|
/* return the offset of the u32 value */
|
|
static int re_emit_op_u32(REParseState *s, int op, uint32_t val)
|
|
{
|
|
int pos;
|
|
dbuf_putc(&s->byte_code, op);
|
|
pos = s->byte_code.size;
|
|
dbuf_put_u32(&s->byte_code, val);
|
|
return pos;
|
|
}
|
|
|
|
static int re_emit_goto(REParseState *s, int op, uint32_t val)
|
|
{
|
|
int pos;
|
|
dbuf_putc(&s->byte_code, op);
|
|
pos = s->byte_code.size;
|
|
dbuf_put_u32(&s->byte_code, val - (pos + 4));
|
|
return pos;
|
|
}
|
|
|
|
static void re_emit_op_u8(REParseState *s, int op, uint32_t val)
|
|
{
|
|
dbuf_putc(&s->byte_code, op);
|
|
dbuf_putc(&s->byte_code, val);
|
|
}
|
|
|
|
static void re_emit_op_u16(REParseState *s, int op, uint32_t val)
|
|
{
|
|
dbuf_putc(&s->byte_code, op);
|
|
dbuf_put_u16(&s->byte_code, val);
|
|
}
|
|
|
|
static int __attribute__((format(printf, 2, 3))) re_parse_error(REParseState *s, const char *fmt, ...)
|
|
{
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
vsnprintf(s->u.error_msg, sizeof(s->u.error_msg), fmt, ap);
|
|
va_end(ap);
|
|
return -1;
|
|
}
|
|
|
|
static int re_parse_out_of_memory(REParseState *s)
|
|
{
|
|
return re_parse_error(s, "out of memory");
|
|
}
|
|
|
|
/* If allow_overflow is false, return -1 in case of
|
|
overflow. Otherwise return INT32_MAX. */
|
|
static int parse_digits(const uint8_t **pp, BOOL allow_overflow)
|
|
{
|
|
const uint8_t *p;
|
|
uint64_t v;
|
|
int c;
|
|
|
|
p = *pp;
|
|
v = 0;
|
|
for(;;) {
|
|
c = *p;
|
|
if (c < '0' || c > '9')
|
|
break;
|
|
v = v * 10 + c - '0';
|
|
if (v >= INT32_MAX) {
|
|
if (allow_overflow)
|
|
v = INT32_MAX;
|
|
else
|
|
return -1;
|
|
}
|
|
p++;
|
|
}
|
|
*pp = p;
|
|
return v;
|
|
}
|
|
|
|
static int re_parse_expect(REParseState *s, const uint8_t **pp, int c)
|
|
{
|
|
const uint8_t *p;
|
|
p = *pp;
|
|
if (*p != c)
|
|
return re_parse_error(s, "expecting '%c'", c);
|
|
p++;
|
|
*pp = p;
|
|
return 0;
|
|
}
|
|
|
|
/* Parse an escape sequence, *pp points after the '\':
|
|
allow_utf16 value:
|
|
0 : no UTF-16 escapes allowed
|
|
1 : UTF-16 escapes allowed
|
|
2 : UTF-16 escapes allowed and escapes of surrogate pairs are
|
|
converted to a unicode character (unicode regexp case).
|
|
|
|
Return the unicode char and update *pp if recognized,
|
|
return -1 if malformed escape,
|
|
return -2 otherwise. */
|
|
int lre_parse_escape(const uint8_t **pp, int allow_utf16)
|
|
{
|
|
const uint8_t *p;
|
|
uint32_t c;
|
|
p = *pp;
|
|
c = *p++;
|
|
switch(c) {
|
|
case 'b':
|
|
c = '\b';
|
|
break;
|
|
case 'f':
|
|
c = '\f';
|
|
break;
|
|
case 'e':
|
|
c = '\e'; /* [jart] love this */
|
|
break;
|
|
case 'n':
|
|
c = '\n';
|
|
break;
|
|
case 'r':
|
|
c = '\r';
|
|
break;
|
|
case 't':
|
|
c = '\t';
|
|
break;
|
|
case 'v':
|
|
c = '\v';
|
|
break;
|
|
case 'x':
|
|
case 'u':
|
|
{
|
|
int h, n, i;
|
|
uint32_t c1;
|
|
|
|
if (*p == '{' && allow_utf16) {
|
|
p++;
|
|
c = 0;
|
|
for(;;) {
|
|
h = from_hex(*p++);
|
|
if (h < 0)
|
|
return -1;
|
|
c = (c << 4) | h;
|
|
if (c > 0x10FFFF)
|
|
return -1;
|
|
if (*p == '}')
|
|
break;
|
|
}
|
|
p++;
|
|
} else {
|
|
if (c == 'x') {
|
|
n = 2;
|
|
} else {
|
|
n = 4;
|
|
}
|
|
c = 0;
|
|
for(i = 0; i < n; i++) {
|
|
h = from_hex(*p++);
|
|
if (h < 0) {
|
|
return -1;
|
|
}
|
|
c = (c << 4) | h;
|
|
}
|
|
if (c >= 0xd800 && c < 0xdc00 &&
|
|
allow_utf16 == 2 && p[0] == '\\' && p[1] == 'u') {
|
|
/* convert an escaped surrogate pair into a
|
|
unicode char */
|
|
c1 = 0;
|
|
for(i = 0; i < 4; i++) {
|
|
h = from_hex(p[2 + i]);
|
|
if (h < 0)
|
|
break;
|
|
c1 = (c1 << 4) | h;
|
|
}
|
|
if (i == 4 && c1 >= 0xdc00 && c1 < 0xe000) {
|
|
p += 6;
|
|
c = (((c & 0x3ff) << 10) | (c1 & 0x3ff)) + 0x10000;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case '0': case '1': case '2': case '3':
|
|
case '4': case '5': case '6': case '7':
|
|
c -= '0';
|
|
if (allow_utf16 == 2) {
|
|
/* only accept \0 not followed by digit */
|
|
if (c != 0 || is_digit(*p))
|
|
return -1;
|
|
} else {
|
|
/* parse a legacy octal sequence */
|
|
uint32_t v;
|
|
v = *p - '0';
|
|
if (v > 7)
|
|
break;
|
|
c = (c << 3) | v;
|
|
p++;
|
|
if (c >= 32)
|
|
break;
|
|
v = *p - '0';
|
|
if (v > 7)
|
|
break;
|
|
c = (c << 3) | v;
|
|
p++;
|
|
}
|
|
break;
|
|
default:
|
|
return -2;
|
|
}
|
|
*pp = p;
|
|
return c;
|
|
}
|
|
|
|
#ifdef CONFIG_ALL_UNICODE
|
|
/* XXX: we use the same chars for name and value */
|
|
static BOOL is_unicode_char(int c)
|
|
{
|
|
return ((c >= '0' && c <= '9') ||
|
|
(c >= 'A' && c <= 'Z') ||
|
|
(c >= 'a' && c <= 'z') ||
|
|
(c == '_'));
|
|
}
|
|
|
|
static int parse_unicode_property(REParseState *s, CharRange *cr,
|
|
const uint8_t **pp, BOOL is_inv)
|
|
{
|
|
const uint8_t *p;
|
|
char name[64], value[64];
|
|
char *q;
|
|
BOOL script_ext;
|
|
int ret;
|
|
|
|
p = *pp;
|
|
if (*p != '{')
|
|
return re_parse_error(s, "expecting '{' after \\p");
|
|
p++;
|
|
q = name;
|
|
while (is_unicode_char(*p)) {
|
|
if ((q - name) >= sizeof(name) - 1)
|
|
goto unknown_property_name;
|
|
*q++ = *p++;
|
|
}
|
|
*q = '\0';
|
|
q = value;
|
|
if (*p == '=') {
|
|
p++;
|
|
while (is_unicode_char(*p)) {
|
|
if ((q - value) >= sizeof(value) - 1)
|
|
return re_parse_error(s, "unknown unicode property value");
|
|
*q++ = *p++;
|
|
}
|
|
}
|
|
*q = '\0';
|
|
if (*p != '}')
|
|
return re_parse_error(s, "expecting '}'");
|
|
p++;
|
|
// printf("name=%s value=%s\n", name, value);
|
|
|
|
if (!strcmp(name, "Script") || !strcmp(name, "sc")) {
|
|
script_ext = FALSE;
|
|
goto do_script;
|
|
} else if (!strcmp(name, "Script_Extensions") || !strcmp(name, "scx")) {
|
|
script_ext = TRUE;
|
|
do_script:
|
|
cr_init(cr, s->opaque, lre_realloc);
|
|
ret = unicode_script(cr, value, script_ext);
|
|
if (ret) {
|
|
cr_free(cr);
|
|
if (ret == -2)
|
|
return re_parse_error(s, "unknown unicode script");
|
|
else
|
|
goto out_of_memory;
|
|
}
|
|
} else if (!strcmp(name, "General_Category") || !strcmp(name, "gc")) {
|
|
cr_init(cr, s->opaque, lre_realloc);
|
|
ret = unicode_general_category(cr, value);
|
|
if (ret) {
|
|
cr_free(cr);
|
|
if (ret == -2)
|
|
return re_parse_error(s, "unknown unicode general category");
|
|
else
|
|
goto out_of_memory;
|
|
}
|
|
} else if (value[0] == '\0') {
|
|
cr_init(cr, s->opaque, lre_realloc);
|
|
ret = unicode_general_category(cr, name);
|
|
if (ret == -1) {
|
|
cr_free(cr);
|
|
goto out_of_memory;
|
|
}
|
|
if (ret < 0) {
|
|
ret = unicode_prop(cr, name);
|
|
if (ret) {
|
|
cr_free(cr);
|
|
if (ret == -2)
|
|
goto unknown_property_name;
|
|
else
|
|
goto out_of_memory;
|
|
}
|
|
}
|
|
} else {
|
|
unknown_property_name:
|
|
return re_parse_error(s, "unknown unicode property name");
|
|
}
|
|
|
|
if (is_inv) {
|
|
if (cr_invert(cr)) {
|
|
cr_free(cr);
|
|
return -1;
|
|
}
|
|
}
|
|
*pp = p;
|
|
return 0;
|
|
out_of_memory:
|
|
return re_parse_out_of_memory(s);
|
|
}
|
|
#endif /* CONFIG_ALL_UNICODE */
|
|
|
|
/* return -1 if error otherwise the character or a class range
|
|
(CLASS_RANGE_BASE). In case of class range, 'cr' is
|
|
initialized. Otherwise, it is ignored. */
|
|
static int get_class_atom(REParseState *s, CharRange *cr,
|
|
const uint8_t **pp, BOOL inclass)
|
|
{
|
|
const uint8_t *p;
|
|
uint32_t c;
|
|
int ret;
|
|
|
|
p = *pp;
|
|
|
|
c = *p;
|
|
switch(c) {
|
|
case '\\':
|
|
p++;
|
|
if (p >= s->buf_end)
|
|
goto unexpected_end;
|
|
c = *p++;
|
|
switch(c) {
|
|
case 'd':
|
|
c = CHAR_RANGE_d;
|
|
goto class_range;
|
|
case 'D':
|
|
c = CHAR_RANGE_D;
|
|
goto class_range;
|
|
case 's':
|
|
c = CHAR_RANGE_s;
|
|
goto class_range;
|
|
case 'S':
|
|
c = CHAR_RANGE_S;
|
|
goto class_range;
|
|
case 'w':
|
|
c = CHAR_RANGE_w;
|
|
goto class_range;
|
|
case 'W':
|
|
c = CHAR_RANGE_W;
|
|
class_range:
|
|
if (cr_init_char_range(s, cr, c))
|
|
return -1;
|
|
c = CLASS_RANGE_BASE;
|
|
break;
|
|
case 'c':
|
|
c = *p;
|
|
if ((c >= 'a' && c <= 'z') ||
|
|
(c >= 'A' && c <= 'Z') ||
|
|
(((c >= '0' && c <= '9') || c == '_') &&
|
|
inclass && !s->is_utf16)) { /* Annex B.1.4 */
|
|
c &= 0x1f;
|
|
p++;
|
|
} else if (s->is_utf16) {
|
|
goto invalid_escape;
|
|
} else {
|
|
/* otherwise return '\' and 'c' */
|
|
p--;
|
|
c = '\\';
|
|
}
|
|
break;
|
|
#ifdef CONFIG_ALL_UNICODE
|
|
case 'p':
|
|
case 'P':
|
|
if (s->is_utf16) {
|
|
if (parse_unicode_property(s, cr, &p, (c == 'P')))
|
|
return -1;
|
|
c = CLASS_RANGE_BASE;
|
|
break;
|
|
}
|
|
/* fall thru */
|
|
#endif
|
|
default:
|
|
p--;
|
|
ret = lre_parse_escape(&p, s->is_utf16 * 2);
|
|
if (ret >= 0) {
|
|
c = ret;
|
|
} else {
|
|
if (ret == -2 && *p != '\0' && strchr("^$\\.*+?()[]{}|/", *p)) {
|
|
/* always valid to escape these characters */
|
|
goto normal_char;
|
|
} else if (s->is_utf16) {
|
|
invalid_escape:
|
|
return re_parse_error(s, "invalid escape sequence in regular expression");
|
|
} else {
|
|
/* just ignore the '\' */
|
|
goto normal_char;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case '\0':
|
|
if (p >= s->buf_end) {
|
|
unexpected_end:
|
|
return re_parse_error(s, "unexpected end");
|
|
}
|
|
/* fall thru */
|
|
default:
|
|
normal_char:
|
|
/* normal char */
|
|
if (c >= 128) {
|
|
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
|
|
if ((unsigned)c > 0xffff && !s->is_utf16) {
|
|
/* XXX: should handle non BMP-1 code points */
|
|
return re_parse_error(s, "malformed unicode char");
|
|
}
|
|
} else {
|
|
p++;
|
|
}
|
|
break;
|
|
}
|
|
*pp = p;
|
|
return c;
|
|
}
|
|
|
|
static int re_emit_range(REParseState *s, const CharRange *cr)
|
|
{
|
|
int len, i;
|
|
uint32_t high;
|
|
|
|
len = (unsigned)cr->len / 2;
|
|
if (len >= 65535)
|
|
return re_parse_error(s, "too many ranges");
|
|
if (len == 0) {
|
|
/* not sure it can really happen. Emit a match that is always
|
|
false */
|
|
re_emit_op_u32(s, REOP_char32, -1);
|
|
} else {
|
|
high = cr->points[cr->len - 1];
|
|
if (high == UINT32_MAX)
|
|
high = cr->points[cr->len - 2];
|
|
if (high <= 0xffff) {
|
|
/* can use 16 bit ranges with the conversion that 0xffff =
|
|
infinity */
|
|
re_emit_op_u16(s, REOP_range, len);
|
|
for(i = 0; i < cr->len; i += 2) {
|
|
dbuf_put_u16(&s->byte_code, cr->points[i]);
|
|
high = cr->points[i + 1] - 1;
|
|
if (high == UINT32_MAX - 1)
|
|
high = 0xffff;
|
|
dbuf_put_u16(&s->byte_code, high);
|
|
}
|
|
} else {
|
|
re_emit_op_u16(s, REOP_range32, len);
|
|
for(i = 0; i < cr->len; i += 2) {
|
|
dbuf_put_u32(&s->byte_code, cr->points[i]);
|
|
dbuf_put_u32(&s->byte_code, cr->points[i + 1] - 1);
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int re_parse_char_class(REParseState *s, const uint8_t **pp)
|
|
{
|
|
const uint8_t *p;
|
|
uint32_t c1, c2;
|
|
CharRange cr_s, *cr = &cr_s;
|
|
CharRange cr1_s, *cr1 = &cr1_s;
|
|
BOOL invert;
|
|
|
|
cr_init(cr, s->opaque, lre_realloc);
|
|
p = *pp;
|
|
p++; /* skip '[' */
|
|
invert = FALSE;
|
|
if (*p == '^') {
|
|
p++;
|
|
invert = TRUE;
|
|
}
|
|
for(;;) {
|
|
if (*p == ']')
|
|
break;
|
|
c1 = get_class_atom(s, cr1, &p, TRUE);
|
|
if ((int)c1 < 0)
|
|
goto fail;
|
|
if (*p == '-' && p[1] != ']') {
|
|
const uint8_t *p0 = p + 1;
|
|
if (c1 >= CLASS_RANGE_BASE) {
|
|
if (s->is_utf16) {
|
|
cr_free(cr1);
|
|
goto invalid_class_range;
|
|
}
|
|
/* Annex B: match '-' character */
|
|
goto class_atom;
|
|
}
|
|
c2 = get_class_atom(s, cr1, &p0, TRUE);
|
|
if ((int)c2 < 0)
|
|
goto fail;
|
|
if (c2 >= CLASS_RANGE_BASE) {
|
|
cr_free(cr1);
|
|
if (s->is_utf16) {
|
|
goto invalid_class_range;
|
|
}
|
|
/* Annex B: match '-' character */
|
|
goto class_atom;
|
|
}
|
|
p = p0;
|
|
if (c2 < c1) {
|
|
invalid_class_range:
|
|
re_parse_error(s, "invalid class range");
|
|
goto fail;
|
|
}
|
|
if (cr_union_interval(cr, c1, c2))
|
|
goto memory_error;
|
|
} else {
|
|
class_atom:
|
|
if (c1 >= CLASS_RANGE_BASE) {
|
|
int ret;
|
|
ret = cr_union1(cr, cr1->points, cr1->len);
|
|
cr_free(cr1);
|
|
if (ret)
|
|
goto memory_error;
|
|
} else {
|
|
if (cr_union_interval(cr, c1, c1))
|
|
goto memory_error;
|
|
}
|
|
}
|
|
}
|
|
if (s->ignore_case) {
|
|
if (cr_canonicalize(cr))
|
|
goto memory_error;
|
|
}
|
|
if (invert) {
|
|
if (cr_invert(cr))
|
|
goto memory_error;
|
|
}
|
|
if (re_emit_range(s, cr))
|
|
goto fail;
|
|
cr_free(cr);
|
|
p++; /* skip ']' */
|
|
*pp = p;
|
|
return 0;
|
|
memory_error:
|
|
re_parse_out_of_memory(s);
|
|
fail:
|
|
cr_free(cr);
|
|
return -1;
|
|
}
|
|
|
|
/* Return:
|
|
1 if the opcodes in bc_buf[] always advance the character pointer.
|
|
0 if the character pointer may not be advanced.
|
|
-1 if the code may depend on side effects of its previous execution (backreference)
|
|
*/
|
|
static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
|
{
|
|
int pos, opcode, ret, len, i;
|
|
uint32_t val, last;
|
|
BOOL has_back_reference;
|
|
uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
|
|
|
|
ret = -2; /* not known yet */
|
|
pos = 0;
|
|
has_back_reference = FALSE;
|
|
bzero(capture_bitmap, sizeof(capture_bitmap));
|
|
|
|
while (pos < bc_buf_len) {
|
|
opcode = bc_buf[pos];
|
|
len = reopcode_info[opcode].size;
|
|
switch(opcode) {
|
|
case REOP_range:
|
|
val = get_u16(bc_buf + pos + 1);
|
|
len += val * 4;
|
|
goto simple_char;
|
|
case REOP_range32:
|
|
val = get_u16(bc_buf + pos + 1);
|
|
len += val * 8;
|
|
goto simple_char;
|
|
case REOP_char:
|
|
case REOP_char32:
|
|
case REOP_dot:
|
|
case REOP_any:
|
|
simple_char:
|
|
if (ret == -2)
|
|
ret = 1;
|
|
break;
|
|
case REOP_line_start:
|
|
case REOP_line_end:
|
|
case REOP_push_i32:
|
|
case REOP_push_char_pos:
|
|
case REOP_drop:
|
|
case REOP_word_boundary:
|
|
case REOP_not_word_boundary:
|
|
case REOP_prev:
|
|
/* no effect */
|
|
break;
|
|
case REOP_save_start:
|
|
case REOP_save_end:
|
|
val = bc_buf[pos + 1];
|
|
capture_bitmap[val] |= 1;
|
|
break;
|
|
case REOP_save_reset:
|
|
{
|
|
val = bc_buf[pos + 1];
|
|
last = bc_buf[pos + 2];
|
|
while (val < last)
|
|
capture_bitmap[val++] |= 1;
|
|
}
|
|
break;
|
|
case REOP_back_reference:
|
|
case REOP_backward_back_reference:
|
|
val = bc_buf[pos + 1];
|
|
capture_bitmap[val] |= 2;
|
|
has_back_reference = TRUE;
|
|
break;
|
|
default:
|
|
/* safe behvior: we cannot predict the outcome */
|
|
if (ret == -2)
|
|
ret = 0;
|
|
break;
|
|
}
|
|
pos += len;
|
|
}
|
|
if (has_back_reference) {
|
|
/* check if there is back reference which references a capture
|
|
made in the some code */
|
|
for(i = 0; i < CAPTURE_COUNT_MAX; i++) {
|
|
if (capture_bitmap[i] == 3)
|
|
return -1;
|
|
}
|
|
}
|
|
if (ret == -2)
|
|
ret = 0;
|
|
return ret;
|
|
}
|
|
|
|
/* return -1 if a simple quantifier cannot be used. Otherwise return
|
|
the number of characters in the atom. */
|
|
static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
|
|
{
|
|
int pos, opcode, len, count;
|
|
uint32_t val;
|
|
|
|
count = 0;
|
|
pos = 0;
|
|
while (pos < bc_buf_len) {
|
|
opcode = bc_buf[pos];
|
|
len = reopcode_info[opcode].size;
|
|
switch(opcode) {
|
|
case REOP_range:
|
|
val = get_u16(bc_buf + pos + 1);
|
|
len += val * 4;
|
|
goto simple_char;
|
|
case REOP_range32:
|
|
val = get_u16(bc_buf + pos + 1);
|
|
len += val * 8;
|
|
goto simple_char;
|
|
case REOP_char:
|
|
case REOP_char32:
|
|
case REOP_dot:
|
|
case REOP_any:
|
|
simple_char:
|
|
count++;
|
|
break;
|
|
case REOP_line_start:
|
|
case REOP_line_end:
|
|
case REOP_word_boundary:
|
|
case REOP_not_word_boundary:
|
|
break;
|
|
default:
|
|
return -1;
|
|
}
|
|
pos += len;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/* '*pp' is the first char after '<' */
|
|
static int re_parse_group_name(char *buf, int buf_size,
|
|
const uint8_t **pp, BOOL is_utf16)
|
|
{
|
|
const uint8_t *p;
|
|
uint32_t c;
|
|
char *q;
|
|
|
|
p = *pp;
|
|
q = buf;
|
|
for(;;) {
|
|
c = *p;
|
|
if (c == '\\') {
|
|
p++;
|
|
if (*p != 'u')
|
|
return -1;
|
|
c = lre_parse_escape(&p, is_utf16 * 2);
|
|
} else if (c == '>') {
|
|
break;
|
|
} else if (c >= 128) {
|
|
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
|
|
} else {
|
|
p++;
|
|
}
|
|
if (c > 0x10FFFF)
|
|
return -1;
|
|
if (q == buf) {
|
|
if (!lre_js_is_ident_first(c))
|
|
return -1;
|
|
} else {
|
|
if (!lre_js_is_ident_next(c))
|
|
return -1;
|
|
}
|
|
if ((q - buf + UTF8_CHAR_LEN_MAX + 1) > buf_size)
|
|
return -1;
|
|
if (c < 128) {
|
|
*q++ = c;
|
|
} else {
|
|
q += unicode_to_utf8((uint8_t*)q, c);
|
|
}
|
|
}
|
|
if (q == buf)
|
|
return -1;
|
|
*q = '\0';
|
|
p++;
|
|
*pp = p;
|
|
return 0;
|
|
}
|
|
|
|
/* if capture_name = NULL: return the number of captures + 1.
|
|
Otherwise, return the capture index corresponding to capture_name
|
|
or -1 if none */
|
|
static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
|
const char *capture_name)
|
|
{
|
|
const uint8_t *p;
|
|
int capture_index;
|
|
char name[TMP_BUF_SIZE];
|
|
|
|
capture_index = 1;
|
|
*phas_named_captures = 0;
|
|
for (p = s->buf_start; p < s->buf_end; p++) {
|
|
switch (*p) {
|
|
case '(':
|
|
if (p[1] == '?') {
|
|
if (p[2] == '<' && p[3] != '=' && p[3] != '!') {
|
|
*phas_named_captures = 1;
|
|
/* potential named capture */
|
|
if (capture_name) {
|
|
p += 3;
|
|
if (re_parse_group_name(name, sizeof(name), &p,
|
|
s->is_utf16) == 0) {
|
|
if (!strcmp(name, capture_name))
|
|
return capture_index;
|
|
}
|
|
}
|
|
capture_index++;
|
|
if (capture_index >= CAPTURE_COUNT_MAX)
|
|
goto done;
|
|
}
|
|
} else {
|
|
capture_index++;
|
|
if (capture_index >= CAPTURE_COUNT_MAX)
|
|
goto done;
|
|
}
|
|
break;
|
|
case '\\':
|
|
p++;
|
|
break;
|
|
case '[':
|
|
for (p += 1 + (*p == ']'); p < s->buf_end && *p != ']'; p++) {
|
|
if (*p == '\\')
|
|
p++;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
done:
|
|
if (capture_name)
|
|
return -1;
|
|
else
|
|
return capture_index;
|
|
}
|
|
|
|
static int re_count_captures(REParseState *s)
|
|
{
|
|
if (s->total_capture_count < 0) {
|
|
s->total_capture_count = re_parse_captures(s, &s->has_named_captures,
|
|
NULL);
|
|
}
|
|
return s->total_capture_count;
|
|
}
|
|
|
|
static BOOL re_has_named_captures(REParseState *s)
|
|
{
|
|
if (s->has_named_captures < 0)
|
|
re_count_captures(s);
|
|
return s->has_named_captures;
|
|
}
|
|
|
|
static int find_group_name(REParseState *s, const char *name)
|
|
{
|
|
const char *p, *buf_end;
|
|
size_t len, name_len;
|
|
int capture_index;
|
|
|
|
name_len = strlen(name);
|
|
p = (char *)s->group_names.buf;
|
|
buf_end = (char *)s->group_names.buf + s->group_names.size;
|
|
capture_index = 1;
|
|
while (p < buf_end) {
|
|
len = strlen(p);
|
|
if (len == name_len && memcmp(name, p, name_len) == 0)
|
|
return capture_index;
|
|
p += len + 1;
|
|
capture_index++;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir);
|
|
|
|
static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|
{
|
|
const uint8_t *p;
|
|
int c, last_atom_start, quant_min, quant_max, last_capture_count;
|
|
BOOL greedy, add_zero_advance_check, is_neg, is_backward_lookahead;
|
|
CharRange cr_s, *cr = &cr_s;
|
|
|
|
last_atom_start = -1;
|
|
last_capture_count = 0;
|
|
p = s->buf_ptr;
|
|
c = *p;
|
|
switch(c) {
|
|
case '^':
|
|
p++;
|
|
re_emit_op(s, REOP_line_start);
|
|
break;
|
|
case '$':
|
|
p++;
|
|
re_emit_op(s, REOP_line_end);
|
|
break;
|
|
case '.':
|
|
p++;
|
|
last_atom_start = s->byte_code.size;
|
|
last_capture_count = s->capture_count;
|
|
if (is_backward_dir)
|
|
re_emit_op(s, REOP_prev);
|
|
re_emit_op(s, s->dotall ? REOP_any : REOP_dot);
|
|
if (is_backward_dir)
|
|
re_emit_op(s, REOP_prev);
|
|
break;
|
|
case '{':
|
|
if (s->is_utf16) {
|
|
return re_parse_error(s, "syntax error");
|
|
} else if (!is_digit(p[1])) {
|
|
/* Annex B: we accept '{' not followed by digits as a
|
|
normal atom */
|
|
goto parse_class_atom;
|
|
} else {
|
|
const uint8_t *p1 = p + 1;
|
|
/* Annex B: error if it is like a repetition count */
|
|
parse_digits(&p1, TRUE);
|
|
if (*p1 == ',') {
|
|
p1++;
|
|
if (is_digit(*p1)) {
|
|
parse_digits(&p1, TRUE);
|
|
}
|
|
}
|
|
if (*p1 != '}') {
|
|
goto parse_class_atom;
|
|
}
|
|
}
|
|
/* fall thru */
|
|
case '*':
|
|
case '+':
|
|
case '?':
|
|
return re_parse_error(s, "nothing to repeat");
|
|
case '(':
|
|
if (p[1] == '?') {
|
|
if (p[2] == ':') {
|
|
p += 3;
|
|
last_atom_start = s->byte_code.size;
|
|
last_capture_count = s->capture_count;
|
|
s->buf_ptr = p;
|
|
if (re_parse_disjunction(s, is_backward_dir))
|
|
return -1;
|
|
p = s->buf_ptr;
|
|
if (re_parse_expect(s, &p, ')'))
|
|
return -1;
|
|
} else if ((p[2] == '=' || p[2] == '!')) {
|
|
is_neg = (p[2] == '!');
|
|
is_backward_lookahead = FALSE;
|
|
p += 3;
|
|
goto lookahead;
|
|
} else if (p[2] == '<' &&
|
|
(p[3] == '=' || p[3] == '!')) {
|
|
int pos;
|
|
is_neg = (p[3] == '!');
|
|
is_backward_lookahead = TRUE;
|
|
p += 4;
|
|
/* lookahead */
|
|
lookahead:
|
|
/* Annex B allows lookahead to be used as an atom for
|
|
the quantifiers */
|
|
if (!s->is_utf16 && !is_backward_lookahead) {
|
|
last_atom_start = s->byte_code.size;
|
|
last_capture_count = s->capture_count;
|
|
}
|
|
pos = re_emit_op_u32(s, REOP_lookahead + is_neg, 0);
|
|
s->buf_ptr = p;
|
|
if (re_parse_disjunction(s, is_backward_lookahead))
|
|
return -1;
|
|
p = s->buf_ptr;
|
|
if (re_parse_expect(s, &p, ')'))
|
|
return -1;
|
|
re_emit_op(s, REOP_match);
|
|
/* jump after the 'match' after the lookahead is successful */
|
|
if (dbuf_error(&s->byte_code))
|
|
return -1;
|
|
put_u32(s->byte_code.buf + pos, s->byte_code.size - (pos + 4));
|
|
} else if (p[2] == '<') {
|
|
p += 3;
|
|
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
|
&p, s->is_utf16)) {
|
|
return re_parse_error(s, "invalid group name");
|
|
}
|
|
if (find_group_name(s, s->u.tmp_buf) > 0) {
|
|
return re_parse_error(s, "duplicate group name");
|
|
}
|
|
/* group name with a trailing zero */
|
|
dbuf_put(&s->group_names, (uint8_t *)s->u.tmp_buf,
|
|
strlen(s->u.tmp_buf) + 1);
|
|
s->has_named_captures = 1;
|
|
goto parse_capture;
|
|
} else {
|
|
return re_parse_error(s, "invalid group");
|
|
}
|
|
} else {
|
|
int capture_index;
|
|
p++;
|
|
/* capture without group name */
|
|
dbuf_putc(&s->group_names, 0);
|
|
parse_capture:
|
|
if (s->capture_count >= CAPTURE_COUNT_MAX)
|
|
return re_parse_error(s, "too many captures");
|
|
last_atom_start = s->byte_code.size;
|
|
last_capture_count = s->capture_count;
|
|
capture_index = s->capture_count++;
|
|
re_emit_op_u8(s, REOP_save_start + is_backward_dir,
|
|
capture_index);
|
|
|
|
s->buf_ptr = p;
|
|
if (re_parse_disjunction(s, is_backward_dir))
|
|
return -1;
|
|
p = s->buf_ptr;
|
|
|
|
re_emit_op_u8(s, REOP_save_start + 1 - is_backward_dir,
|
|
capture_index);
|
|
|
|
if (re_parse_expect(s, &p, ')'))
|
|
return -1;
|
|
}
|
|
break;
|
|
case '\\':
|
|
switch(p[1]) {
|
|
case 'b':
|
|
case 'B':
|
|
re_emit_op(s, REOP_word_boundary + (p[1] != 'b'));
|
|
p += 2;
|
|
break;
|
|
case 'k':
|
|
{
|
|
const uint8_t *p1;
|
|
int dummy_res;
|
|
|
|
p1 = p;
|
|
if (p1[2] != '<') {
|
|
/* annex B: we tolerate invalid group names in non
|
|
unicode mode if there is no named capture
|
|
definition */
|
|
if (s->is_utf16 || re_has_named_captures(s))
|
|
return re_parse_error(s, "expecting group name");
|
|
else
|
|
goto parse_class_atom;
|
|
}
|
|
p1 += 3;
|
|
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
|
&p1, s->is_utf16)) {
|
|
if (s->is_utf16 || re_has_named_captures(s))
|
|
return re_parse_error(s, "invalid group name");
|
|
else
|
|
goto parse_class_atom;
|
|
}
|
|
c = find_group_name(s, s->u.tmp_buf);
|
|
if (c < 0) {
|
|
/* no capture name parsed before, try to look
|
|
after (inefficient, but hopefully not common */
|
|
c = re_parse_captures(s, &dummy_res, s->u.tmp_buf);
|
|
if (c < 0) {
|
|
if (s->is_utf16 || re_has_named_captures(s))
|
|
return re_parse_error(s, "group name not defined");
|
|
else
|
|
goto parse_class_atom;
|
|
}
|
|
}
|
|
p = p1;
|
|
}
|
|
goto emit_back_reference;
|
|
case '0':
|
|
p += 2;
|
|
c = 0;
|
|
if (s->is_utf16) {
|
|
if (is_digit(*p)) {
|
|
return re_parse_error(s, "invalid decimal escape in regular expression");
|
|
}
|
|
} else {
|
|
/* Annex B.1.4: accept legacy octal */
|
|
if (*p >= '0' && *p <= '7') {
|
|
c = *p++ - '0';
|
|
if (*p >= '0' && *p <= '7') {
|
|
c = (c << 3) + *p++ - '0';
|
|
}
|
|
}
|
|
}
|
|
goto normal_char;
|
|
case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8':
|
|
case '9':
|
|
{
|
|
const uint8_t *q = ++p;
|
|
|
|
c = parse_digits(&p, FALSE);
|
|
if (c < 0 || (c >= s->capture_count && c >= re_count_captures(s))) {
|
|
if (!s->is_utf16) {
|
|
/* Annex B.1.4: accept legacy octal */
|
|
p = q;
|
|
if (*p <= '7') {
|
|
c = 0;
|
|
if (*p <= '3')
|
|
c = *p++ - '0';
|
|
if (*p >= '0' && *p <= '7') {
|
|
c = (c << 3) + *p++ - '0';
|
|
if (*p >= '0' && *p <= '7') {
|
|
c = (c << 3) + *p++ - '0';
|
|
}
|
|
}
|
|
} else {
|
|
c = *p++;
|
|
}
|
|
goto normal_char;
|
|
}
|
|
return re_parse_error(s, "back reference out of range in regular expression");
|
|
}
|
|
emit_back_reference:
|
|
last_atom_start = s->byte_code.size;
|
|
last_capture_count = s->capture_count;
|
|
re_emit_op_u8(s, REOP_back_reference + is_backward_dir, c);
|
|
}
|
|
break;
|
|
default:
|
|
goto parse_class_atom;
|
|
}
|
|
break;
|
|
case '[':
|
|
last_atom_start = s->byte_code.size;
|
|
last_capture_count = s->capture_count;
|
|
if (is_backward_dir)
|
|
re_emit_op(s, REOP_prev);
|
|
if (re_parse_char_class(s, &p))
|
|
return -1;
|
|
if (is_backward_dir)
|
|
re_emit_op(s, REOP_prev);
|
|
break;
|
|
case ']':
|
|
case '}':
|
|
if (s->is_utf16)
|
|
return re_parse_error(s, "syntax error");
|
|
goto parse_class_atom;
|
|
default:
|
|
parse_class_atom:
|
|
c = get_class_atom(s, cr, &p, FALSE);
|
|
if ((int)c < 0)
|
|
return -1;
|
|
normal_char:
|
|
last_atom_start = s->byte_code.size;
|
|
last_capture_count = s->capture_count;
|
|
if (is_backward_dir)
|
|
re_emit_op(s, REOP_prev);
|
|
if (c >= CLASS_RANGE_BASE) {
|
|
int ret;
|
|
/* Note: canonicalization is not needed */
|
|
ret = re_emit_range(s, cr);
|
|
cr_free(cr);
|
|
if (ret)
|
|
return -1;
|
|
} else {
|
|
if (s->ignore_case)
|
|
c = lre_canonicalize(c, s->is_utf16);
|
|
if (c <= 0xffff)
|
|
re_emit_op_u16(s, REOP_char, c);
|
|
else
|
|
re_emit_op_u32(s, REOP_char32, c);
|
|
}
|
|
if (is_backward_dir)
|
|
re_emit_op(s, REOP_prev);
|
|
break;
|
|
}
|
|
|
|
/* quantifier */
|
|
if (last_atom_start >= 0) {
|
|
c = *p;
|
|
switch(c) {
|
|
case '*':
|
|
p++;
|
|
quant_min = 0;
|
|
quant_max = INT32_MAX;
|
|
goto quantifier;
|
|
case '+':
|
|
p++;
|
|
quant_min = 1;
|
|
quant_max = INT32_MAX;
|
|
goto quantifier;
|
|
case '?':
|
|
p++;
|
|
quant_min = 0;
|
|
quant_max = 1;
|
|
goto quantifier;
|
|
case '{':
|
|
{
|
|
const uint8_t *p1 = p;
|
|
/* As an extension (see ES6 annex B), we accept '{' not
|
|
followed by digits as a normal atom */
|
|
if (!is_digit(p[1])) {
|
|
if (s->is_utf16)
|
|
goto invalid_quant_count;
|
|
break;
|
|
}
|
|
p++;
|
|
quant_min = parse_digits(&p, TRUE);
|
|
quant_max = quant_min;
|
|
if (*p == ',') {
|
|
p++;
|
|
if (is_digit(*p)) {
|
|
quant_max = parse_digits(&p, TRUE);
|
|
if (quant_max < quant_min) {
|
|
invalid_quant_count:
|
|
return re_parse_error(s, "invalid repetition count");
|
|
}
|
|
} else {
|
|
quant_max = INT32_MAX; /* infinity */
|
|
}
|
|
}
|
|
if (*p != '}' && !s->is_utf16) {
|
|
/* Annex B: normal atom if invalid '{' syntax */
|
|
p = p1;
|
|
break;
|
|
}
|
|
if (re_parse_expect(s, &p, '}'))
|
|
return -1;
|
|
}
|
|
quantifier:
|
|
greedy = TRUE;
|
|
if (*p == '?') {
|
|
p++;
|
|
greedy = FALSE;
|
|
}
|
|
if (last_atom_start < 0) {
|
|
return re_parse_error(s, "nothing to repeat");
|
|
}
|
|
if (greedy) {
|
|
int len, pos;
|
|
|
|
if (quant_max > 0) {
|
|
/* specific optimization for simple quantifiers */
|
|
if (dbuf_error(&s->byte_code))
|
|
goto out_of_memory;
|
|
len = re_is_simple_quantifier(s->byte_code.buf + last_atom_start,
|
|
s->byte_code.size - last_atom_start);
|
|
if (len > 0) {
|
|
re_emit_op(s, REOP_match);
|
|
|
|
if (dbuf_insert(&s->byte_code, last_atom_start, 17))
|
|
goto out_of_memory;
|
|
pos = last_atom_start;
|
|
s->byte_code.buf[pos++] = REOP_simple_greedy_quant;
|
|
put_u32(&s->byte_code.buf[pos],
|
|
s->byte_code.size - last_atom_start - 17);
|
|
pos += 4;
|
|
put_u32(&s->byte_code.buf[pos], quant_min);
|
|
pos += 4;
|
|
put_u32(&s->byte_code.buf[pos], quant_max);
|
|
pos += 4;
|
|
put_u32(&s->byte_code.buf[pos], len);
|
|
pos += 4;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (dbuf_error(&s->byte_code))
|
|
goto out_of_memory;
|
|
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
|
|
s->byte_code.size - last_atom_start) == 0);
|
|
} else {
|
|
add_zero_advance_check = FALSE;
|
|
}
|
|
|
|
{
|
|
int len, pos;
|
|
len = s->byte_code.size - last_atom_start;
|
|
if (quant_min == 0) {
|
|
/* need to reset the capture in case the atom is
|
|
not executed */
|
|
if (last_capture_count != s->capture_count) {
|
|
if (dbuf_insert(&s->byte_code, last_atom_start, 3))
|
|
goto out_of_memory;
|
|
s->byte_code.buf[last_atom_start++] = REOP_save_reset;
|
|
s->byte_code.buf[last_atom_start++] = last_capture_count;
|
|
s->byte_code.buf[last_atom_start++] = s->capture_count - 1;
|
|
}
|
|
if (quant_max == 0) {
|
|
s->byte_code.size = last_atom_start;
|
|
} else if (quant_max == 1) {
|
|
if (dbuf_insert(&s->byte_code, last_atom_start, 5))
|
|
goto out_of_memory;
|
|
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
|
|
greedy;
|
|
put_u32(s->byte_code.buf + last_atom_start + 1, len);
|
|
} else if (quant_max == INT32_MAX) {
|
|
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
|
|
goto out_of_memory;
|
|
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
|
|
greedy;
|
|
put_u32(s->byte_code.buf + last_atom_start + 1,
|
|
len + 5 + add_zero_advance_check);
|
|
if (add_zero_advance_check) {
|
|
/* avoid infinite loop by stoping the
|
|
recursion if no advance was made in the
|
|
atom (only works if the atom has no
|
|
side effect) */
|
|
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
|
|
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
|
|
} else {
|
|
re_emit_goto(s, REOP_goto, last_atom_start);
|
|
}
|
|
} else {
|
|
if (dbuf_insert(&s->byte_code, last_atom_start, 10))
|
|
goto out_of_memory;
|
|
pos = last_atom_start;
|
|
s->byte_code.buf[pos++] = REOP_push_i32;
|
|
put_u32(s->byte_code.buf + pos, quant_max);
|
|
pos += 4;
|
|
s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
|
|
put_u32(s->byte_code.buf + pos, len + 5);
|
|
re_emit_goto(s, REOP_loop, last_atom_start + 5);
|
|
re_emit_op(s, REOP_drop);
|
|
}
|
|
} else if (quant_min == 1 && quant_max == INT32_MAX &&
|
|
!add_zero_advance_check) {
|
|
re_emit_goto(s, REOP_split_next_first - greedy,
|
|
last_atom_start);
|
|
} else {
|
|
if (quant_min == 1) {
|
|
/* nothing to add */
|
|
} else {
|
|
if (dbuf_insert(&s->byte_code, last_atom_start, 5))
|
|
goto out_of_memory;
|
|
s->byte_code.buf[last_atom_start] = REOP_push_i32;
|
|
put_u32(s->byte_code.buf + last_atom_start + 1,
|
|
quant_min);
|
|
last_atom_start += 5;
|
|
re_emit_goto(s, REOP_loop, last_atom_start);
|
|
re_emit_op(s, REOP_drop);
|
|
}
|
|
if (quant_max == INT32_MAX) {
|
|
pos = s->byte_code.size;
|
|
re_emit_op_u32(s, REOP_split_goto_first + greedy,
|
|
len + 5 + add_zero_advance_check);
|
|
if (add_zero_advance_check)
|
|
re_emit_op(s, REOP_push_char_pos);
|
|
/* copy the atom */
|
|
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
|
if (add_zero_advance_check)
|
|
re_emit_goto(s, REOP_bne_char_pos, pos);
|
|
else
|
|
re_emit_goto(s, REOP_goto, pos);
|
|
} else if (quant_max > quant_min) {
|
|
re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
|
|
pos = s->byte_code.size;
|
|
re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5);
|
|
/* copy the atom */
|
|
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
|
|
|
re_emit_goto(s, REOP_loop, pos);
|
|
re_emit_op(s, REOP_drop);
|
|
}
|
|
}
|
|
last_atom_start = -1;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
done:
|
|
s->buf_ptr = p;
|
|
return 0;
|
|
out_of_memory:
|
|
return re_parse_out_of_memory(s);
|
|
}
|
|
|
|
static int re_parse_alternative(REParseState *s, BOOL is_backward_dir)
|
|
{
|
|
const uint8_t *p;
|
|
int ret;
|
|
size_t start, term_start, end, term_size;
|
|
|
|
start = s->byte_code.size;
|
|
for(;;) {
|
|
p = s->buf_ptr;
|
|
if (p >= s->buf_end)
|
|
break;
|
|
if (*p == '|' || *p == ')')
|
|
break;
|
|
term_start = s->byte_code.size;
|
|
ret = re_parse_term(s, is_backward_dir);
|
|
if (ret)
|
|
return ret;
|
|
if (is_backward_dir) {
|
|
/* reverse the order of the terms (XXX: inefficient, but
|
|
speed is not really critical here) */
|
|
end = s->byte_code.size;
|
|
term_size = end - term_start;
|
|
if (dbuf_realloc(&s->byte_code, end + term_size))
|
|
return -1;
|
|
memmove(s->byte_code.buf + start + term_size,
|
|
s->byte_code.buf + start,
|
|
end - start);
|
|
memcpy(s->byte_code.buf + start, s->byte_code.buf + end,
|
|
term_size);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
|
|
{
|
|
int start, len, pos;
|
|
|
|
if (lre_check_stack_overflow(s->opaque, 0))
|
|
return re_parse_error(s, "stack overflow");
|
|
|
|
start = s->byte_code.size;
|
|
if (re_parse_alternative(s, is_backward_dir))
|
|
return -1;
|
|
while (*s->buf_ptr == '|') {
|
|
s->buf_ptr++;
|
|
|
|
len = s->byte_code.size - start;
|
|
|
|
/* insert a split before the first alternative */
|
|
if (dbuf_insert(&s->byte_code, start, 5)) {
|
|
return re_parse_out_of_memory(s);
|
|
}
|
|
s->byte_code.buf[start] = REOP_split_next_first;
|
|
put_u32(s->byte_code.buf + start + 1, len + 5);
|
|
|
|
pos = re_emit_op_u32(s, REOP_goto, 0);
|
|
|
|
if (re_parse_alternative(s, is_backward_dir))
|
|
return -1;
|
|
|
|
/* patch the goto */
|
|
len = s->byte_code.size - (pos + 4);
|
|
put_u32(s->byte_code.buf + pos, len);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* the control flow is recursive so the analysis can be linear */
|
|
static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
|
|
{
|
|
int stack_size, stack_size_max, pos, opcode, len;
|
|
uint32_t val;
|
|
|
|
stack_size = 0;
|
|
stack_size_max = 0;
|
|
bc_buf += RE_HEADER_LEN;
|
|
bc_buf_len -= RE_HEADER_LEN;
|
|
pos = 0;
|
|
while (pos < bc_buf_len) {
|
|
opcode = bc_buf[pos];
|
|
len = reopcode_info[opcode].size;
|
|
assert(opcode < REOP_COUNT);
|
|
assert((pos + len) <= bc_buf_len);
|
|
switch(opcode) {
|
|
case REOP_push_i32:
|
|
case REOP_push_char_pos:
|
|
stack_size++;
|
|
if (stack_size > stack_size_max) {
|
|
if (stack_size > STACK_SIZE_MAX)
|
|
return -1;
|
|
stack_size_max = stack_size;
|
|
}
|
|
break;
|
|
case REOP_drop:
|
|
case REOP_bne_char_pos:
|
|
assert(stack_size > 0);
|
|
stack_size--;
|
|
break;
|
|
case REOP_range:
|
|
val = get_u16(bc_buf + pos + 1);
|
|
len += val * 4;
|
|
break;
|
|
case REOP_range32:
|
|
val = get_u16(bc_buf + pos + 1);
|
|
len += val * 8;
|
|
break;
|
|
}
|
|
pos += len;
|
|
}
|
|
return stack_size_max;
|
|
}
|
|
|
|
/* 'buf' must be a zero terminated UTF-8 string of length buf_len.
|
|
Return NULL if error and allocate an error message in *perror_msg,
|
|
otherwise the compiled bytecode and its length in plen.
|
|
*/
|
|
uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
|
|
const char *buf, size_t buf_len, int re_flags,
|
|
void *opaque)
|
|
{
|
|
REParseState s_s, *s = &s_s;
|
|
int stack_size;
|
|
BOOL is_sticky;
|
|
|
|
bzero(s, sizeof(*s));
|
|
s->opaque = opaque;
|
|
s->buf_ptr = (const uint8_t *)buf;
|
|
s->buf_end = s->buf_ptr + buf_len;
|
|
s->buf_start = s->buf_ptr;
|
|
s->re_flags = re_flags;
|
|
s->is_utf16 = ((re_flags & LRE_FLAG_UTF16) != 0);
|
|
is_sticky = ((re_flags & LRE_FLAG_STICKY) != 0);
|
|
s->ignore_case = ((re_flags & LRE_FLAG_IGNORECASE) != 0);
|
|
s->dotall = ((re_flags & LRE_FLAG_DOTALL) != 0);
|
|
s->capture_count = 1;
|
|
s->total_capture_count = -1;
|
|
s->has_named_captures = -1;
|
|
|
|
dbuf_init2(&s->byte_code, opaque, lre_realloc);
|
|
dbuf_init2(&s->group_names, opaque, lre_realloc);
|
|
|
|
dbuf_putc(&s->byte_code, re_flags); /* first element is the flags */
|
|
dbuf_putc(&s->byte_code, 0); /* second element is the number of captures */
|
|
dbuf_putc(&s->byte_code, 0); /* stack size */
|
|
dbuf_put_u32(&s->byte_code, 0); /* bytecode length */
|
|
|
|
if (!is_sticky) {
|
|
/* iterate thru all positions (about the same as .*?( ... ) )
|
|
. We do it without an explicit loop so that lock step
|
|
thread execution will be possible in an optimized
|
|
implementation */
|
|
re_emit_op_u32(s, REOP_split_goto_first, 1 + 5);
|
|
re_emit_op(s, REOP_any);
|
|
re_emit_op_u32(s, REOP_goto, -(5 + 1 + 5));
|
|
}
|
|
re_emit_op_u8(s, REOP_save_start, 0);
|
|
|
|
if (re_parse_disjunction(s, FALSE)) {
|
|
error:
|
|
dbuf_free(&s->byte_code);
|
|
dbuf_free(&s->group_names);
|
|
pstrcpy(error_msg, error_msg_size, s->u.error_msg);
|
|
*plen = 0;
|
|
return NULL;
|
|
}
|
|
|
|
re_emit_op_u8(s, REOP_save_end, 0);
|
|
|
|
re_emit_op(s, REOP_match);
|
|
|
|
if (*s->buf_ptr != '\0') {
|
|
re_parse_error(s, "extraneous characters at the end");
|
|
goto error;
|
|
}
|
|
|
|
if (dbuf_error(&s->byte_code)) {
|
|
re_parse_out_of_memory(s);
|
|
goto error;
|
|
}
|
|
|
|
stack_size = compute_stack_size(s->byte_code.buf, s->byte_code.size);
|
|
if (stack_size < 0) {
|
|
re_parse_error(s, "too many imbricated quantifiers");
|
|
goto error;
|
|
}
|
|
|
|
s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count;
|
|
s->byte_code.buf[RE_HEADER_STACK_SIZE] = stack_size;
|
|
put_u32(s->byte_code.buf + 3, s->byte_code.size - RE_HEADER_LEN);
|
|
|
|
/* add the named groups if needed */
|
|
if (s->group_names.size > (s->capture_count - 1)) {
|
|
dbuf_put(&s->byte_code, s->group_names.buf, s->group_names.size);
|
|
s->byte_code.buf[RE_HEADER_FLAGS] |= LRE_FLAG_NAMED_GROUPS;
|
|
}
|
|
dbuf_free(&s->group_names);
|
|
|
|
#ifdef DUMP_REOP
|
|
lre_dump_bytecode(s->byte_code.buf, s->byte_code.size);
|
|
#endif
|
|
|
|
error_msg[0] = '\0';
|
|
*plen = s->byte_code.size;
|
|
return s->byte_code.buf;
|
|
}
|
|
|
|
static BOOL is_line_terminator(uint32_t c)
|
|
{
|
|
return (c == '\n' || c == '\r' || c == CP_LS || c == CP_PS);
|
|
}
|
|
|
|
static BOOL is_word_char(uint32_t c)
|
|
{
|
|
return ((c >= '0' && c <= '9') ||
|
|
(c >= 'a' && c <= 'z') ||
|
|
(c >= 'A' && c <= 'Z') ||
|
|
(c == '_'));
|
|
}
|
|
|
|
#define GET_CHAR(c, cptr, cbuf_end) \
|
|
do { \
|
|
if (cbuf_type == 0) { \
|
|
c = *cptr++; \
|
|
} else { \
|
|
uint32_t __c1; \
|
|
c = *(uint16_t *)cptr; \
|
|
cptr += 2; \
|
|
if (c >= 0xd800 && c < 0xdc00 && \
|
|
cbuf_type == 2 && cptr < cbuf_end) { \
|
|
__c1 = *(uint16_t *)cptr; \
|
|
if (__c1 >= 0xdc00 && __c1 < 0xe000) { \
|
|
c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
|
|
cptr += 2; \
|
|
} \
|
|
} \
|
|
} \
|
|
} while (0)
|
|
|
|
#define PEEK_CHAR(c, cptr, cbuf_end) \
|
|
do { \
|
|
if (cbuf_type == 0) { \
|
|
c = cptr[0]; \
|
|
} else { \
|
|
uint32_t __c1; \
|
|
c = ((uint16_t *)cptr)[0]; \
|
|
if (c >= 0xd800 && c < 0xdc00 && \
|
|
cbuf_type == 2 && (cptr + 2) < cbuf_end) { \
|
|
__c1 = ((uint16_t *)cptr)[1]; \
|
|
if (__c1 >= 0xdc00 && __c1 < 0xe000) { \
|
|
c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
|
|
} \
|
|
} \
|
|
} \
|
|
} while (0)
|
|
|
|
#define PEEK_PREV_CHAR(c, cptr, cbuf_start) \
|
|
do { \
|
|
if (cbuf_type == 0) { \
|
|
c = cptr[-1]; \
|
|
} else { \
|
|
uint32_t __c1; \
|
|
c = ((uint16_t *)cptr)[-1]; \
|
|
if (c >= 0xdc00 && c < 0xe000 && \
|
|
cbuf_type == 2 && (cptr - 4) >= cbuf_start) { \
|
|
__c1 = ((uint16_t *)cptr)[-2]; \
|
|
if (__c1 >= 0xd800 && __c1 < 0xdc00 ) { \
|
|
c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
|
|
} \
|
|
} \
|
|
} \
|
|
} while (0)
|
|
|
|
#define GET_PREV_CHAR(c, cptr, cbuf_start) \
|
|
do { \
|
|
if (cbuf_type == 0) { \
|
|
cptr--; \
|
|
c = cptr[0]; \
|
|
} else { \
|
|
uint32_t __c1; \
|
|
cptr -= 2; \
|
|
c = ((uint16_t *)cptr)[0]; \
|
|
if (c >= 0xdc00 && c < 0xe000 && \
|
|
cbuf_type == 2 && cptr > cbuf_start) { \
|
|
__c1 = ((uint16_t *)cptr)[-1]; \
|
|
if (__c1 >= 0xd800 && __c1 < 0xdc00 ) { \
|
|
cptr -= 2; \
|
|
c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
|
|
} \
|
|
} \
|
|
} \
|
|
} while (0)
|
|
|
|
#define PREV_CHAR(cptr, cbuf_start) \
|
|
do { \
|
|
if (cbuf_type == 0) { \
|
|
cptr--; \
|
|
} else { \
|
|
cptr -= 2; \
|
|
if (cbuf_type == 2) { \
|
|
c = ((uint16_t *)cptr)[0]; \
|
|
if (c >= 0xdc00 && c < 0xe000 && cptr > cbuf_start) { \
|
|
c = ((uint16_t *)cptr)[-1]; \
|
|
if (c >= 0xd800 && c < 0xdc00) \
|
|
cptr -= 2; \
|
|
} \
|
|
} \
|
|
} \
|
|
} while (0)
|
|
|
|
typedef uintptr_t StackInt;
|
|
|
|
typedef enum {
|
|
RE_EXEC_STATE_SPLIT,
|
|
RE_EXEC_STATE_LOOKAHEAD,
|
|
RE_EXEC_STATE_NEGATIVE_LOOKAHEAD,
|
|
RE_EXEC_STATE_GREEDY_QUANT,
|
|
} REExecStateEnum;
|
|
|
|
typedef struct REExecState {
|
|
REExecStateEnum type : 8;
|
|
uint8_t stack_len;
|
|
size_t count; /* only used for RE_EXEC_STATE_GREEDY_QUANT */
|
|
const uint8_t *cptr;
|
|
const uint8_t *pc;
|
|
void *buf[0];
|
|
} REExecState;
|
|
|
|
typedef struct {
|
|
const uint8_t *cbuf;
|
|
const uint8_t *cbuf_end;
|
|
/* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16 */
|
|
int cbuf_type;
|
|
int capture_count;
|
|
int stack_size_max;
|
|
BOOL multi_line;
|
|
BOOL ignore_case;
|
|
BOOL is_utf16;
|
|
void *opaque; /* used for stack overflow check */
|
|
|
|
size_t state_size;
|
|
uint8_t *state_stack;
|
|
size_t state_stack_size;
|
|
size_t state_stack_len;
|
|
} REExecContext;
|
|
|
|
static int push_state(REExecContext *s,
|
|
uint8_t **capture,
|
|
StackInt *stack, size_t stack_len,
|
|
const uint8_t *pc, const uint8_t *cptr,
|
|
REExecStateEnum type, size_t count)
|
|
{
|
|
REExecState *rs;
|
|
uint8_t *new_stack;
|
|
size_t new_size, i, n;
|
|
StackInt *stack_buf;
|
|
|
|
if (UNLIKELY((s->state_stack_len + 1) > s->state_stack_size)) {
|
|
/* reallocate the stack */
|
|
new_size = s->state_stack_size * 3 / 2;
|
|
if (new_size < 8)
|
|
new_size = 8;
|
|
new_stack = lre_realloc(s->opaque, s->state_stack, new_size * s->state_size);
|
|
if (!new_stack)
|
|
return -1;
|
|
s->state_stack_size = new_size;
|
|
s->state_stack = new_stack;
|
|
}
|
|
rs = (REExecState *)(s->state_stack + s->state_stack_len * s->state_size);
|
|
s->state_stack_len++;
|
|
rs->type = type;
|
|
rs->count = count;
|
|
rs->stack_len = stack_len;
|
|
rs->cptr = cptr;
|
|
rs->pc = pc;
|
|
n = 2 * s->capture_count;
|
|
for(i = 0; i < n; i++)
|
|
rs->buf[i] = capture[i];
|
|
stack_buf = (StackInt *)(rs->buf + n);
|
|
for(i = 0; i < stack_len; i++)
|
|
stack_buf[i] = stack[i];
|
|
return 0;
|
|
}
|
|
|
|
/* return 1 if match, 0 if not match or -1 if error. */
|
|
static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
|
StackInt *stack, int stack_len,
|
|
const uint8_t *pc, const uint8_t *cptr,
|
|
BOOL no_recurse)
|
|
{
|
|
int opcode, ret;
|
|
int cbuf_type;
|
|
uint32_t val, c;
|
|
const uint8_t *cbuf_end;
|
|
|
|
cbuf_type = s->cbuf_type;
|
|
cbuf_end = s->cbuf_end;
|
|
|
|
for(;;) {
|
|
// printf("top=%p: pc=%d\n", th_list.top, (int)(pc - (bc_buf + RE_HEADER_LEN)));
|
|
opcode = *pc++;
|
|
switch(opcode) {
|
|
case REOP_match:
|
|
{
|
|
REExecState *rs;
|
|
if (no_recurse)
|
|
return (intptr_t)cptr;
|
|
ret = 1;
|
|
goto recurse;
|
|
no_match:
|
|
if (no_recurse)
|
|
return 0;
|
|
ret = 0;
|
|
recurse:
|
|
for(;;) {
|
|
if (s->state_stack_len == 0)
|
|
return ret;
|
|
rs = (REExecState *)(s->state_stack +
|
|
(s->state_stack_len - 1) * s->state_size);
|
|
if (rs->type == RE_EXEC_STATE_SPLIT) {
|
|
if (!ret) {
|
|
pop_state:
|
|
memcpy(capture, rs->buf,
|
|
sizeof(capture[0]) * 2 * s->capture_count);
|
|
pop_state1:
|
|
pc = rs->pc;
|
|
cptr = rs->cptr;
|
|
stack_len = rs->stack_len;
|
|
memcpy(stack, rs->buf + 2 * s->capture_count,
|
|
stack_len * sizeof(stack[0]));
|
|
s->state_stack_len--;
|
|
break;
|
|
}
|
|
} else if (rs->type == RE_EXEC_STATE_GREEDY_QUANT) {
|
|
if (!ret) {
|
|
uint32_t char_count, i;
|
|
memcpy(capture, rs->buf,
|
|
sizeof(capture[0]) * 2 * s->capture_count);
|
|
stack_len = rs->stack_len;
|
|
memcpy(stack, rs->buf + 2 * s->capture_count,
|
|
stack_len * sizeof(stack[0]));
|
|
pc = rs->pc;
|
|
cptr = rs->cptr;
|
|
/* go backward */
|
|
char_count = get_u32(pc + 12);
|
|
for(i = 0; i < char_count; i++) {
|
|
PREV_CHAR(cptr, s->cbuf);
|
|
}
|
|
pc = (pc + 16) + (int)get_u32(pc);
|
|
rs->cptr = cptr;
|
|
rs->count--;
|
|
if (rs->count == 0) {
|
|
s->state_stack_len--;
|
|
}
|
|
break;
|
|
}
|
|
} else {
|
|
ret = ((rs->type == RE_EXEC_STATE_LOOKAHEAD && ret) ||
|
|
(rs->type == RE_EXEC_STATE_NEGATIVE_LOOKAHEAD && !ret));
|
|
if (ret) {
|
|
/* keep the capture in case of positive lookahead */
|
|
if (rs->type == RE_EXEC_STATE_LOOKAHEAD)
|
|
goto pop_state1;
|
|
else
|
|
goto pop_state;
|
|
}
|
|
}
|
|
s->state_stack_len--;
|
|
}
|
|
}
|
|
break;
|
|
case REOP_char32:
|
|
val = get_u32(pc);
|
|
pc += 4;
|
|
goto test_char;
|
|
case REOP_char:
|
|
val = get_u16(pc);
|
|
pc += 2;
|
|
test_char:
|
|
if (cptr >= cbuf_end)
|
|
goto no_match;
|
|
GET_CHAR(c, cptr, cbuf_end);
|
|
if (s->ignore_case) {
|
|
c = lre_canonicalize(c, s->is_utf16);
|
|
}
|
|
if (val != c)
|
|
goto no_match;
|
|
break;
|
|
case REOP_split_goto_first:
|
|
case REOP_split_next_first:
|
|
{
|
|
const uint8_t *pc1;
|
|
|
|
val = get_u32(pc);
|
|
pc += 4;
|
|
if (opcode == REOP_split_next_first) {
|
|
pc1 = pc + (int)val;
|
|
} else {
|
|
pc1 = pc;
|
|
pc = pc + (int)val;
|
|
}
|
|
ret = push_state(s, capture, stack, stack_len,
|
|
pc1, cptr, RE_EXEC_STATE_SPLIT, 0);
|
|
if (ret < 0)
|
|
return -1;
|
|
break;
|
|
}
|
|
case REOP_lookahead:
|
|
case REOP_negative_lookahead:
|
|
val = get_u32(pc);
|
|
pc += 4;
|
|
ret = push_state(s, capture, stack, stack_len,
|
|
pc + (int)val, cptr,
|
|
RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead,
|
|
0);
|
|
if (ret < 0)
|
|
return -1;
|
|
break;
|
|
|
|
case REOP_goto:
|
|
val = get_u32(pc);
|
|
pc += 4 + (int)val;
|
|
break;
|
|
case REOP_line_start:
|
|
if (cptr == s->cbuf)
|
|
break;
|
|
if (!s->multi_line)
|
|
goto no_match;
|
|
PEEK_PREV_CHAR(c, cptr, s->cbuf);
|
|
if (!is_line_terminator(c))
|
|
goto no_match;
|
|
break;
|
|
case REOP_line_end:
|
|
if (cptr == cbuf_end)
|
|
break;
|
|
if (!s->multi_line)
|
|
goto no_match;
|
|
PEEK_CHAR(c, cptr, cbuf_end);
|
|
if (!is_line_terminator(c))
|
|
goto no_match;
|
|
break;
|
|
case REOP_dot:
|
|
if (cptr == cbuf_end)
|
|
goto no_match;
|
|
GET_CHAR(c, cptr, cbuf_end);
|
|
if (is_line_terminator(c))
|
|
goto no_match;
|
|
break;
|
|
case REOP_any:
|
|
if (cptr == cbuf_end)
|
|
goto no_match;
|
|
GET_CHAR(c, cptr, cbuf_end);
|
|
break;
|
|
case REOP_save_start:
|
|
case REOP_save_end:
|
|
val = *pc++;
|
|
assert(val < s->capture_count);
|
|
capture[2 * val + opcode - REOP_save_start] = (uint8_t *)cptr;
|
|
break;
|
|
case REOP_save_reset:
|
|
{
|
|
uint32_t val2;
|
|
val = pc[0];
|
|
val2 = pc[1];
|
|
pc += 2;
|
|
assert(val2 < s->capture_count);
|
|
while (val <= val2) {
|
|
capture[2 * val] = NULL;
|
|
capture[2 * val + 1] = NULL;
|
|
val++;
|
|
}
|
|
}
|
|
break;
|
|
case REOP_push_i32:
|
|
val = get_u32(pc);
|
|
pc += 4;
|
|
stack[stack_len++] = val;
|
|
break;
|
|
case REOP_drop:
|
|
stack_len--;
|
|
break;
|
|
case REOP_loop:
|
|
val = get_u32(pc);
|
|
pc += 4;
|
|
if (--stack[stack_len - 1] != 0) {
|
|
pc += (int)val;
|
|
}
|
|
break;
|
|
case REOP_push_char_pos:
|
|
stack[stack_len++] = (uintptr_t)cptr;
|
|
break;
|
|
case REOP_bne_char_pos:
|
|
val = get_u32(pc);
|
|
pc += 4;
|
|
if (stack[--stack_len] != (uintptr_t)cptr)
|
|
pc += (int)val;
|
|
break;
|
|
case REOP_word_boundary:
|
|
case REOP_not_word_boundary:
|
|
{
|
|
BOOL v1, v2;
|
|
/* char before */
|
|
if (cptr == s->cbuf) {
|
|
v1 = FALSE;
|
|
} else {
|
|
PEEK_PREV_CHAR(c, cptr, s->cbuf);
|
|
v1 = is_word_char(c);
|
|
}
|
|
/* current char */
|
|
if (cptr >= cbuf_end) {
|
|
v2 = FALSE;
|
|
} else {
|
|
PEEK_CHAR(c, cptr, cbuf_end);
|
|
v2 = is_word_char(c);
|
|
}
|
|
if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode))
|
|
goto no_match;
|
|
}
|
|
break;
|
|
case REOP_back_reference:
|
|
case REOP_backward_back_reference:
|
|
{
|
|
const uint8_t *cptr1, *cptr1_end, *cptr1_start;
|
|
uint32_t c1, c2;
|
|
|
|
val = *pc++;
|
|
if (val >= s->capture_count)
|
|
goto no_match;
|
|
cptr1_start = capture[2 * val];
|
|
cptr1_end = capture[2 * val + 1];
|
|
if (!cptr1_start || !cptr1_end)
|
|
break;
|
|
if (opcode == REOP_back_reference) {
|
|
cptr1 = cptr1_start;
|
|
while (cptr1 < cptr1_end) {
|
|
if (cptr >= cbuf_end)
|
|
goto no_match;
|
|
GET_CHAR(c1, cptr1, cptr1_end);
|
|
GET_CHAR(c2, cptr, cbuf_end);
|
|
if (s->ignore_case) {
|
|
c1 = lre_canonicalize(c1, s->is_utf16);
|
|
c2 = lre_canonicalize(c2, s->is_utf16);
|
|
}
|
|
if (c1 != c2)
|
|
goto no_match;
|
|
}
|
|
} else {
|
|
cptr1 = cptr1_end;
|
|
while (cptr1 > cptr1_start) {
|
|
if (cptr == s->cbuf)
|
|
goto no_match;
|
|
GET_PREV_CHAR(c1, cptr1, cptr1_start);
|
|
GET_PREV_CHAR(c2, cptr, s->cbuf);
|
|
if (s->ignore_case) {
|
|
c1 = lre_canonicalize(c1, s->is_utf16);
|
|
c2 = lre_canonicalize(c2, s->is_utf16);
|
|
}
|
|
if (c1 != c2)
|
|
goto no_match;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case REOP_range:
|
|
{
|
|
int n;
|
|
uint32_t low, high, idx_min, idx_max, idx;
|
|
|
|
n = get_u16(pc); /* n must be >= 1 */
|
|
pc += 2;
|
|
if (cptr >= cbuf_end)
|
|
goto no_match;
|
|
GET_CHAR(c, cptr, cbuf_end);
|
|
if (s->ignore_case) {
|
|
c = lre_canonicalize(c, s->is_utf16);
|
|
}
|
|
idx_min = 0;
|
|
low = get_u16(pc + 0 * 4);
|
|
if (c < low)
|
|
goto no_match;
|
|
idx_max = n - 1;
|
|
high = get_u16(pc + idx_max * 4 + 2);
|
|
/* 0xffff in for last value means +infinity */
|
|
if (UNLIKELY(c >= 0xffff) && high == 0xffff)
|
|
goto range_match;
|
|
if (c > high)
|
|
goto no_match;
|
|
while (idx_min <= idx_max) {
|
|
idx = (idx_min + idx_max) / 2;
|
|
low = get_u16(pc + idx * 4);
|
|
high = get_u16(pc + idx * 4 + 2);
|
|
if (c < low)
|
|
idx_max = idx - 1;
|
|
else if (c > high)
|
|
idx_min = idx + 1;
|
|
else
|
|
goto range_match;
|
|
}
|
|
goto no_match;
|
|
range_match:
|
|
pc += 4 * n;
|
|
}
|
|
break;
|
|
case REOP_range32:
|
|
{
|
|
int n;
|
|
uint32_t low, high, idx_min, idx_max, idx;
|
|
|
|
n = get_u16(pc); /* n must be >= 1 */
|
|
pc += 2;
|
|
if (cptr >= cbuf_end)
|
|
goto no_match;
|
|
GET_CHAR(c, cptr, cbuf_end);
|
|
if (s->ignore_case) {
|
|
c = lre_canonicalize(c, s->is_utf16);
|
|
}
|
|
idx_min = 0;
|
|
low = get_u32(pc + 0 * 8);
|
|
if (c < low)
|
|
goto no_match;
|
|
idx_max = n - 1;
|
|
high = get_u32(pc + idx_max * 8 + 4);
|
|
if (c > high)
|
|
goto no_match;
|
|
while (idx_min <= idx_max) {
|
|
idx = (idx_min + idx_max) / 2;
|
|
low = get_u32(pc + idx * 8);
|
|
high = get_u32(pc + idx * 8 + 4);
|
|
if (c < low)
|
|
idx_max = idx - 1;
|
|
else if (c > high)
|
|
idx_min = idx + 1;
|
|
else
|
|
goto range32_match;
|
|
}
|
|
goto no_match;
|
|
range32_match:
|
|
pc += 8 * n;
|
|
}
|
|
break;
|
|
case REOP_prev:
|
|
/* go to the previous char */
|
|
if (cptr == s->cbuf)
|
|
goto no_match;
|
|
PREV_CHAR(cptr, s->cbuf);
|
|
break;
|
|
case REOP_simple_greedy_quant:
|
|
{
|
|
uint32_t next_pos, quant_min, quant_max;
|
|
size_t q;
|
|
intptr_t res;
|
|
const uint8_t *pc1;
|
|
|
|
next_pos = get_u32(pc);
|
|
quant_min = get_u32(pc + 4);
|
|
quant_max = get_u32(pc + 8);
|
|
pc += 16;
|
|
pc1 = pc;
|
|
pc += (int)next_pos;
|
|
|
|
q = 0;
|
|
for(;;) {
|
|
res = lre_exec_backtrack(s, capture, stack, stack_len,
|
|
pc1, cptr, TRUE);
|
|
if (res == -1)
|
|
return res;
|
|
if (!res)
|
|
break;
|
|
cptr = (uint8_t *)res;
|
|
q++;
|
|
if (q >= quant_max && quant_max != INT32_MAX)
|
|
break;
|
|
}
|
|
if (q < quant_min)
|
|
goto no_match;
|
|
if (q > quant_min) {
|
|
/* will examine all matches down to quant_min */
|
|
ret = push_state(s, capture, stack, stack_len,
|
|
pc1 - 16, cptr,
|
|
RE_EXEC_STATE_GREEDY_QUANT,
|
|
q - quant_min);
|
|
if (ret < 0)
|
|
return -1;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
abort();
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Return 1 if match, 0 if not match or -1 if error. cindex is the
|
|
starting position of the match and must be such as 0 <= cindex <=
|
|
clen. */
|
|
int lre_exec(uint8_t **capture,
|
|
const uint8_t *bc_buf, const uint8_t *cbuf, int cindex, int clen,
|
|
int cbuf_type, void *opaque)
|
|
{
|
|
REExecContext s_s, *s = &s_s;
|
|
int re_flags, i, alloca_size, ret;
|
|
StackInt *stack_buf;
|
|
|
|
re_flags = bc_buf[RE_HEADER_FLAGS];
|
|
s->multi_line = (re_flags & LRE_FLAG_MULTILINE) != 0;
|
|
s->ignore_case = (re_flags & LRE_FLAG_IGNORECASE) != 0;
|
|
s->is_utf16 = (re_flags & LRE_FLAG_UTF16) != 0;
|
|
s->capture_count = bc_buf[RE_HEADER_CAPTURE_COUNT];
|
|
s->stack_size_max = bc_buf[RE_HEADER_STACK_SIZE];
|
|
s->cbuf = cbuf;
|
|
s->cbuf_end = cbuf + (clen << cbuf_type);
|
|
s->cbuf_type = cbuf_type;
|
|
if (s->cbuf_type == 1 && s->is_utf16)
|
|
s->cbuf_type = 2;
|
|
s->opaque = opaque;
|
|
|
|
s->state_size = sizeof(REExecState) +
|
|
s->capture_count * sizeof(capture[0]) * 2 +
|
|
s->stack_size_max * sizeof(stack_buf[0]);
|
|
s->state_stack = NULL;
|
|
s->state_stack_len = 0;
|
|
s->state_stack_size = 0;
|
|
|
|
for(i = 0; i < s->capture_count * 2; i++)
|
|
capture[i] = NULL;
|
|
alloca_size = s->stack_size_max * sizeof(stack_buf[0]);
|
|
stack_buf = alloca(alloca_size);
|
|
ret = lre_exec_backtrack(s, capture, stack_buf, 0, bc_buf + RE_HEADER_LEN,
|
|
cbuf + (cindex << cbuf_type), FALSE);
|
|
lre_realloc(s->opaque, s->state_stack, 0);
|
|
return ret;
|
|
}
|
|
|
|
int lre_get_capture_count(const uint8_t *bc_buf)
|
|
{
|
|
return bc_buf[RE_HEADER_CAPTURE_COUNT];
|
|
}
|
|
|
|
int lre_get_flags(const uint8_t *bc_buf)
|
|
{
|
|
return bc_buf[RE_HEADER_FLAGS];
|
|
}
|
|
|
|
/* Return NULL if no group names. Otherwise, return a pointer to
|
|
'capture_count - 1' zero terminated UTF-8 strings. */
|
|
const char *lre_get_groupnames(const uint8_t *bc_buf)
|
|
{
|
|
uint32_t re_bytecode_len;
|
|
if ((lre_get_flags(bc_buf) & LRE_FLAG_NAMED_GROUPS) == 0)
|
|
return NULL;
|
|
re_bytecode_len = get_u32(bc_buf + 3);
|
|
return (const char *)(bc_buf + 7 + re_bytecode_len);
|
|
}
|
|
|
|
#ifdef TEST
|
|
|
|
BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size)
|
|
{
|
|
return FALSE;
|
|
}
|
|
|
|
void *lre_realloc(void *opaque, void *ptr, size_t size)
|
|
{
|
|
return realloc(ptr, size);
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
int len, ret, i;
|
|
uint8_t *bc;
|
|
char error_msg[64];
|
|
uint8_t *capture[CAPTURE_COUNT_MAX * 2];
|
|
const char *input;
|
|
int input_len, capture_count;
|
|
|
|
if (argc < 3) {
|
|
printf("usage: %s regexp input\n", argv[0]);
|
|
exit(1);
|
|
}
|
|
bc = lre_compile(&len, error_msg, sizeof(error_msg), argv[1],
|
|
strlen(argv[1]), 0, NULL);
|
|
if (!bc) {
|
|
fprintf(stderr, "error: %s\n", error_msg);
|
|
exit(1);
|
|
}
|
|
|
|
input = argv[2];
|
|
input_len = strlen(input);
|
|
|
|
ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL);
|
|
printf("ret=%d\n", ret);
|
|
if (ret == 1) {
|
|
capture_count = lre_get_capture_count(bc);
|
|
for(i = 0; i < 2 * capture_count; i++) {
|
|
uint8_t *ptr;
|
|
ptr = capture[i];
|
|
printf("%d: ", i);
|
|
if (!ptr)
|
|
printf("<nil>");
|
|
else
|
|
printf("%u", (int)(ptr - (uint8_t *)input));
|
|
printf("\n");
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|