/* * QuickJS Javascript Engine * * Copyright (c) 2017-2021 Fabrice Bellard * Copyright (c) 2017-2021 Charlie Gordon * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "third_party/quickjs/internal.h" #include "third_party/quickjs/libregexp.h" #include "third_party/quickjs/quickjs.h" asm(".ident\t\"\\n\\n\ QuickJS (MIT License)\\n\ Copyright (c) 2017-2021 Fabrice Bellard\\n\ Copyright (c) 2017-2021 Charlie Gordon\""); asm(".include \"libc/disclaimer.inc\""); /* clang-format off */ void js_regexp_finalizer(JSRuntime *rt, JSValue val) { JSObject *p = JS_VALUE_GET_OBJ(val); JSRegExp *re = &p->u.regexp; JS_FreeValueRT(rt, JS_MKPTR(JS_TAG_STRING, re->bytecode)); JS_FreeValueRT(rt, JS_MKPTR(JS_TAG_STRING, re->pattern)); } /* create a string containing the RegExp bytecode */ JSValue js_compile_regexp(JSContext *ctx, JSValueConst pattern, JSValueConst flags) { const char *str; int re_flags, mask; uint8_t *re_bytecode_buf; size_t i, len; int re_bytecode_len; JSValue ret; char error_msg[64]; re_flags = 0; if (!JS_IsUndefined(flags)) { str = JS_ToCStringLen(ctx, &len, flags); if (!str) return JS_EXCEPTION; /* XXX: re_flags = LRE_FLAG_OCTAL unless strict mode? */ for (i = 0; i < len; i++) { switch(str[i]) { case 'g': mask = LRE_FLAG_GLOBAL; break; case 'i': mask = LRE_FLAG_IGNORECASE; break; case 'm': mask = LRE_FLAG_MULTILINE; break; case 's': mask = LRE_FLAG_DOTALL; break; case 'u': mask = LRE_FLAG_UTF16; break; case 'y': mask = LRE_FLAG_STICKY; break; default: goto bad_flags; } if ((re_flags & mask) != 0) { bad_flags: JS_FreeCString(ctx, str); return JS_ThrowSyntaxError(ctx, "invalid regular expression flags"); } re_flags |= mask; } JS_FreeCString(ctx, str); } str = JS_ToCStringLen2(ctx, &len, pattern, !(re_flags & LRE_FLAG_UTF16)); if (!str) return JS_EXCEPTION; re_bytecode_buf = lre_compile(&re_bytecode_len, error_msg, sizeof(error_msg), str, len, re_flags, ctx); JS_FreeCString(ctx, str); if (!re_bytecode_buf) { JS_ThrowSyntaxError(ctx, "%s", error_msg); return JS_EXCEPTION; } ret = js_new_string8(ctx, re_bytecode_buf, re_bytecode_len); js_free(ctx, re_bytecode_buf); return ret; } /* create a RegExp object from a string containing the RegExp bytecode and the source pattern */ JSValue js_regexp_constructor_internal(JSContext *ctx, JSValueConst ctor, JSValue pattern, JSValue bc) { JSValue obj; JSObject *p; JSRegExp *re; /* sanity check */ if (JS_VALUE_GET_TAG(bc) != JS_TAG_STRING || JS_VALUE_GET_TAG(pattern) != JS_TAG_STRING) { JS_ThrowTypeError(ctx, "string expected"); fail: JS_FreeValue(ctx, bc); JS_FreeValue(ctx, pattern); return JS_EXCEPTION; } obj = js_create_from_ctor(ctx, ctor, JS_CLASS_REGEXP); if (JS_IsException(obj)) goto fail; p = JS_VALUE_GET_OBJ(obj); re = &p->u.regexp; re->pattern = JS_VALUE_GET_STRING(pattern); re->bytecode = JS_VALUE_GET_STRING(bc); JS_DefinePropertyValue(ctx, obj, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0), JS_PROP_WRITABLE); return obj; } JSRegExp *js_get_regexp(JSContext *ctx, JSValueConst obj, BOOL throw_error) { if (JS_VALUE_GET_TAG(obj) == JS_TAG_OBJECT) { JSObject *p = JS_VALUE_GET_OBJ(obj); if (p->class_id == JS_CLASS_REGEXP) return &p->u.regexp; } if (throw_error) { JS_ThrowTypeErrorInvalidClass(ctx, JS_CLASS_REGEXP); } return NULL; } /* return < 0 if exception or TRUE/FALSE */ int js_is_regexp(JSContext *ctx, JSValueConst obj) { JSValue m; if (!JS_IsObject(obj)) return FALSE; m = JS_GetProperty(ctx, obj, JS_ATOM_Symbol_match); if (JS_IsException(m)) return -1; if (!JS_IsUndefined(m)) return JS_ToBoolFree(ctx, m); return js_get_regexp(ctx, obj, FALSE) != NULL; } JSValue js_regexp_constructor(JSContext *ctx, JSValueConst new_target, int argc, JSValueConst *argv) { JSValue pattern, flags, bc, val; JSValueConst pat, flags1; JSRegExp *re; int pat_is_regexp; pat = argv[0]; flags1 = argv[1]; pat_is_regexp = js_is_regexp(ctx, pat); if (pat_is_regexp < 0) return JS_EXCEPTION; if (JS_IsUndefined(new_target)) { /* called as a function */ new_target = JS_GetActiveFunction(ctx); if (pat_is_regexp && JS_IsUndefined(flags1)) { JSValue ctor; BOOL res; ctor = JS_GetProperty(ctx, pat, JS_ATOM_constructor); if (JS_IsException(ctor)) return ctor; res = js_same_value(ctx, ctor, new_target); JS_FreeValue(ctx, ctor); if (res) return JS_DupValue(ctx, pat); } } re = js_get_regexp(ctx, pat, FALSE); if (re) { pattern = JS_DupValue(ctx, JS_MKPTR(JS_TAG_STRING, re->pattern)); if (JS_IsUndefined(flags1)) { bc = JS_DupValue(ctx, JS_MKPTR(JS_TAG_STRING, re->bytecode)); goto no_compilation; } else { flags = JS_ToString(ctx, flags1); if (JS_IsException(flags)) goto fail; } } else { flags = JS_UNDEFINED; if (pat_is_regexp) { pattern = JS_GetProperty(ctx, pat, JS_ATOM_source); if (JS_IsException(pattern)) goto fail; if (JS_IsUndefined(flags1)) { flags = JS_GetProperty(ctx, pat, JS_ATOM_flags); if (JS_IsException(flags)) goto fail; } else { flags = JS_DupValue(ctx, flags1); } } else { pattern = JS_DupValue(ctx, pat); flags = JS_DupValue(ctx, flags1); } if (JS_IsUndefined(pattern)) { pattern = JS_AtomToString(ctx, JS_ATOM_empty_string); } else { val = pattern; pattern = JS_ToString(ctx, val); JS_FreeValue(ctx, val); if (JS_IsException(pattern)) goto fail; } } bc = js_compile_regexp(ctx, pattern, flags); if (JS_IsException(bc)) goto fail; JS_FreeValue(ctx, flags); no_compilation: return js_regexp_constructor_internal(ctx, new_target, pattern, bc); fail: JS_FreeValue(ctx, pattern); JS_FreeValue(ctx, flags); return JS_EXCEPTION; } JSValue js_regexp_compile(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSRegExp *re1, *re; JSValueConst pattern1, flags1; JSValue bc, pattern; re = js_get_regexp(ctx, this_val, TRUE); if (!re) return JS_EXCEPTION; pattern1 = argv[0]; flags1 = argv[1]; re1 = js_get_regexp(ctx, pattern1, FALSE); if (re1) { if (!JS_IsUndefined(flags1)) return JS_ThrowTypeError(ctx, "flags must be undefined"); pattern = JS_DupValue(ctx, JS_MKPTR(JS_TAG_STRING, re1->pattern)); bc = JS_DupValue(ctx, JS_MKPTR(JS_TAG_STRING, re1->bytecode)); } else { bc = JS_UNDEFINED; if (JS_IsUndefined(pattern1)) pattern = JS_AtomToString(ctx, JS_ATOM_empty_string); else pattern = JS_ToString(ctx, pattern1); if (JS_IsException(pattern)) goto fail; bc = js_compile_regexp(ctx, pattern, flags1); if (JS_IsException(bc)) goto fail; } JS_FreeValue(ctx, JS_MKPTR(JS_TAG_STRING, re->pattern)); JS_FreeValue(ctx, JS_MKPTR(JS_TAG_STRING, re->bytecode)); re->pattern = JS_VALUE_GET_STRING(pattern); re->bytecode = JS_VALUE_GET_STRING(bc); if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) return JS_EXCEPTION; return JS_DupValue(ctx, this_val); fail: JS_FreeValue(ctx, pattern); JS_FreeValue(ctx, bc); return JS_EXCEPTION; } #if 0 static JSValue js_regexp_get___source(JSContext *ctx, JSValueConst this_val) { JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); if (!re) return JS_EXCEPTION; return JS_DupValue(ctx, JS_MKPTR(JS_TAG_STRING, re->pattern)); } static JSValue js_regexp_get___flags(JSContext *ctx, JSValueConst this_val) { JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); int flags; if (!re) return JS_EXCEPTION; flags = lre_get_flags(re->bytecode->u.str8); return JS_NewInt32(ctx, flags); } #endif JSValue js_regexp_get_source(JSContext *ctx, JSValueConst this_val) { JSRegExp *re; JSString *p; StringBuffer b_s, *b = &b_s; int i, n, c, c2, bra; if (JS_VALUE_GET_TAG(this_val) != JS_TAG_OBJECT) return JS_ThrowTypeErrorNotAnObject(ctx); if (js_same_value(ctx, this_val, ctx->class_proto[JS_CLASS_REGEXP])) goto empty_regex; re = js_get_regexp(ctx, this_val, TRUE); if (!re) return JS_EXCEPTION; p = re->pattern; if (p->len == 0) { empty_regex: return JS_NewString(ctx, "(?:)"); } string_buffer_init2(ctx, b, p->len, p->is_wide_char); /* Escape '/' and newline sequences as needed */ bra = 0; for (i = 0, n = p->len; i < n;) { c2 = -1; switch (c = string_get(p, i++)) { case '\\': if (i < n) c2 = string_get(p, i++); break; case ']': bra = 0; break; case '[': if (!bra) { if (i < n && string_get(p, i) == ']') c2 = string_get(p, i++); bra = 1; } break; case '\n': c = '\\'; c2 = 'n'; break; case '\r': c = '\\'; c2 = 'r'; break; case '/': if (!bra) { c = '\\'; c2 = '/'; } break; } string_buffer_putc16(b, c); if (c2 >= 0) string_buffer_putc16(b, c2); } return string_buffer_end(b); } JSValue js_regexp_get_flag(JSContext *ctx, JSValueConst this_val, int mask) { JSRegExp *re; int flags; if (JS_VALUE_GET_TAG(this_val) != JS_TAG_OBJECT) return JS_ThrowTypeErrorNotAnObject(ctx); re = js_get_regexp(ctx, this_val, FALSE); if (!re) { if (js_same_value(ctx, this_val, ctx->class_proto[JS_CLASS_REGEXP])) return JS_UNDEFINED; else return JS_ThrowTypeErrorInvalidClass(ctx, JS_CLASS_REGEXP); } flags = lre_get_flags(re->bytecode->u.str8); return JS_NewBool(ctx, (flags & mask) != 0); } JSValue js_regexp_get_flags(JSContext *ctx, JSValueConst this_val) { char str[8], *p = str; int res; if (JS_VALUE_GET_TAG(this_val) != JS_TAG_OBJECT) return JS_ThrowTypeErrorNotAnObject(ctx); res = JS_ToBoolFree(ctx, JS_GetProperty(ctx, this_val, JS_ATOM_global)); if (res < 0) goto exception; if (res) *p++ = 'g'; res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "ignoreCase")); if (res < 0) goto exception; if (res) *p++ = 'i'; res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "multiline")); if (res < 0) goto exception; if (res) *p++ = 'm'; res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "dotAll")); if (res < 0) goto exception; if (res) *p++ = 's'; res = JS_ToBoolFree(ctx, JS_GetProperty(ctx, this_val, JS_ATOM_unicode)); if (res < 0) goto exception; if (res) *p++ = 'u'; res = JS_ToBoolFree(ctx, JS_GetPropertyStr(ctx, this_val, "sticky")); if (res < 0) goto exception; if (res) *p++ = 'y'; return JS_NewStringLen(ctx, str, p - str); exception: return JS_EXCEPTION; } JSValue js_regexp_toString(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSValue pattern, flags; StringBuffer b_s, *b = &b_s; if (!JS_IsObject(this_val)) return JS_ThrowTypeErrorNotAnObject(ctx); string_buffer_init(ctx, b, 0); string_buffer_putc8(b, '/'); pattern = JS_GetProperty(ctx, this_val, JS_ATOM_source); if (string_buffer_concat_value_free(b, pattern)) goto fail; string_buffer_putc8(b, '/'); flags = JS_GetProperty(ctx, this_val, JS_ATOM_flags); if (string_buffer_concat_value_free(b, flags)) goto fail; return string_buffer_end(b); fail: string_buffer_free(b); return JS_EXCEPTION; } void js_regexp_string_iterator_finalizer(JSRuntime *rt, JSValue val) { JSObject *p = JS_VALUE_GET_OBJ(val); JSRegExpStringIteratorData *it = p->u.regexp_string_iterator_data; if (it) { JS_FreeValueRT(rt, it->iterating_regexp); JS_FreeValueRT(rt, it->iterated_string); js_free_rt(rt, it); } } void js_regexp_string_iterator_mark(JSRuntime *rt, JSValueConst val, JS_MarkFunc *mark_func) { JSObject *p = JS_VALUE_GET_OBJ(val); JSRegExpStringIteratorData *it = p->u.regexp_string_iterator_data; if (it) { JS_MarkValue(rt, it->iterating_regexp, mark_func); JS_MarkValue(rt, it->iterated_string, mark_func); } } JSValue js_regexp_string_iterator_next(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, BOOL *pdone, int magic) { JSRegExpStringIteratorData *it; JSValueConst R, S; JSValue matchStr = JS_UNDEFINED, match = JS_UNDEFINED; JSString *sp; it = JS_GetOpaque2(ctx, this_val, JS_CLASS_REGEXP_STRING_ITERATOR); if (!it) goto exception; if (it->done) { *pdone = TRUE; return JS_UNDEFINED; } R = it->iterating_regexp; S = it->iterated_string; match = JS_RegExpExec(ctx, R, S); if (JS_IsException(match)) goto exception; if (JS_IsNull(match)) { it->done = TRUE; *pdone = TRUE; return JS_UNDEFINED; } else if (it->global) { matchStr = JS_ToStringFree(ctx, JS_GetPropertyInt64(ctx, match, 0)); if (JS_IsException(matchStr)) goto exception; if (JS_IsEmptyString(matchStr)) { int64_t thisIndex, nextIndex; if (JS_ToLengthFree(ctx, &thisIndex, JS_GetProperty(ctx, R, JS_ATOM_lastIndex)) < 0) goto exception; sp = JS_VALUE_GET_STRING(S); nextIndex = string_advance_index(sp, thisIndex, it->unicode); if (JS_SetProperty(ctx, R, JS_ATOM_lastIndex, JS_NewInt64(ctx, nextIndex)) < 0) goto exception; } JS_FreeValue(ctx, matchStr); } else { it->done = TRUE; } *pdone = FALSE; return match; exception: JS_FreeValue(ctx, match); JS_FreeValue(ctx, matchStr); *pdone = FALSE; return JS_EXCEPTION; } JSValue js_regexp_Symbol_matchAll(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { // [Symbol.matchAll](str) JSValueConst R = this_val; JSValue S, C, flags, matcher, iter; JSValueConst args[2]; JSString *strp; int64_t lastIndex; JSRegExpStringIteratorData *it; if (!JS_IsObject(R)) return JS_ThrowTypeErrorNotAnObject(ctx); C = JS_UNDEFINED; flags = JS_UNDEFINED; matcher = JS_UNDEFINED; iter = JS_UNDEFINED; S = JS_ToString(ctx, argv[0]); if (JS_IsException(S)) goto exception; C = JS_SpeciesConstructor(ctx, R, ctx->regexp_ctor); if (JS_IsException(C)) goto exception; flags = JS_ToStringFree(ctx, JS_GetProperty(ctx, R, JS_ATOM_flags)); if (JS_IsException(flags)) goto exception; args[0] = R; args[1] = flags; matcher = JS_CallConstructor(ctx, C, 2, args); if (JS_IsException(matcher)) goto exception; if (JS_ToLengthFree(ctx, &lastIndex, JS_GetProperty(ctx, R, JS_ATOM_lastIndex))) goto exception; if (JS_SetProperty(ctx, matcher, JS_ATOM_lastIndex, JS_NewInt64(ctx, lastIndex)) < 0) goto exception; iter = JS_NewObjectClass(ctx, JS_CLASS_REGEXP_STRING_ITERATOR); if (JS_IsException(iter)) goto exception; it = js_malloc(ctx, sizeof(*it)); if (!it) goto exception; it->iterating_regexp = matcher; it->iterated_string = S; strp = JS_VALUE_GET_STRING(flags); it->global = string_indexof_char(strp, 'g', 0) >= 0; it->unicode = string_indexof_char(strp, 'u', 0) >= 0; it->done = FALSE; JS_SetOpaque(iter, it); JS_FreeValue(ctx, C); JS_FreeValue(ctx, flags); return iter; exception: JS_FreeValue(ctx, S); JS_FreeValue(ctx, C); JS_FreeValue(ctx, flags); JS_FreeValue(ctx, matcher); JS_FreeValue(ctx, iter); return JS_EXCEPTION; } JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); JSString *str; JSValue str_val, obj, val, groups = JS_UNDEFINED; uint8_t *re_bytecode; int ret; uint8_t **capture, *str_buf; int capture_count, shift, i, re_flags; int64_t last_index; const char *group_name_ptr; if (!re) return JS_EXCEPTION; str_val = JS_ToString(ctx, argv[0]); if (JS_IsException(str_val)) return str_val; val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex); if (JS_IsException(val) || JS_ToLengthFree(ctx, &last_index, val)) { JS_FreeValue(ctx, str_val); return JS_EXCEPTION; } re_bytecode = re->bytecode->u.str8; re_flags = lre_get_flags(re_bytecode); if ((re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) == 0) { last_index = 0; } str = JS_VALUE_GET_STRING(str_val); capture_count = lre_get_capture_count(re_bytecode); capture = NULL; if (capture_count > 0) { capture = js_malloc(ctx, sizeof(capture[0]) * capture_count * 2); if (!capture) { JS_FreeValue(ctx, str_val); return JS_EXCEPTION; } } shift = str->is_wide_char; str_buf = str->u.str8; if (last_index > str->len) { ret = 2; } else { ret = lre_exec(capture, re_bytecode, str_buf, last_index, str->len, shift, ctx); } obj = JS_NULL; if (ret != 1) { if (ret >= 0) { if (ret == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) { if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) goto fail; } } else { JS_ThrowInternalError(ctx, "out of memory in regexp execution"); goto fail; } JS_FreeValue(ctx, str_val); } else { int prop_flags; if (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) { if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, JS_NewInt32(ctx, (capture[1] - str_buf) >> shift)) < 0) goto fail; } obj = JS_NewArray(ctx); if (JS_IsException(obj)) goto fail; prop_flags = JS_PROP_C_W_E | JS_PROP_THROW; group_name_ptr = lre_get_groupnames(re_bytecode); if (group_name_ptr) { groups = JS_NewObjectProto(ctx, JS_NULL); if (JS_IsException(groups)) goto fail; } for(i = 0; i < capture_count; i++) { int start, end; JSValue val; if (capture[2 * i] == NULL || capture[2 * i + 1] == NULL) { val = JS_UNDEFINED; } else { start = (capture[2 * i] - str_buf) >> shift; end = (capture[2 * i + 1] - str_buf) >> shift; val = js_sub_string(ctx, str, start, end); if (JS_IsException(val)) goto fail; } if (group_name_ptr && i > 0) { if (*group_name_ptr) { if (JS_DefinePropertyValueStr(ctx, groups, group_name_ptr, JS_DupValue(ctx, val), prop_flags) < 0) { JS_FreeValue(ctx, val); goto fail; } } group_name_ptr += strlen(group_name_ptr) + 1; } if (JS_DefinePropertyValueUint32(ctx, obj, i, val, prop_flags) < 0) goto fail; } if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_groups, groups, prop_flags) < 0) goto fail; if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_index, JS_NewInt32(ctx, (capture[0] - str_buf) >> shift), prop_flags) < 0) goto fail; if (JS_DefinePropertyValue(ctx, obj, JS_ATOM_input, str_val, prop_flags) < 0) goto fail1; } js_free(ctx, capture); return obj; fail: JS_FreeValue(ctx, groups); JS_FreeValue(ctx, str_val); fail1: JS_FreeValue(ctx, obj); js_free(ctx, capture); return JS_EXCEPTION; } JSValue JS_RegExpExec(JSContext *ctx, JSValueConst r, JSValueConst s) { JSValue method, ret; method = JS_GetProperty(ctx, r, JS_ATOM_exec); if (JS_IsException(method)) return method; if (JS_IsFunction(ctx, method)) { ret = JS_CallFree(ctx, method, r, 1, &s); if (JS_IsException(ret)) return ret; if (!JS_IsObject(ret) && !JS_IsNull(ret)) { JS_FreeValue(ctx, ret); return JS_ThrowTypeError(ctx, "RegExp exec method must return an object or null"); } return ret; } JS_FreeValue(ctx, method); return js_regexp_exec(ctx, r, 1, &s); } BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size) { JSContext *ctx = opaque; return js_check_stack_overflow(ctx->rt, alloca_size); } void *lre_realloc(void *opaque, void *ptr, size_t size) { JSContext *ctx = opaque; /* No JS exception is raised here */ return js_realloc_rt(ctx->rt, ptr, size); } /* delete portions of a string that match a given regex */ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValueConst arg) { JSRegExp *re = js_get_regexp(ctx, this_val, TRUE); JSString *str; JSValue str_val, val; uint8_t *re_bytecode; int ret; uint8_t **capture, *str_buf; int capture_count, shift, re_flags; int next_src_pos, start, end; int64_t last_index; StringBuffer b_s, *b = &b_s; if (!re) return JS_EXCEPTION; string_buffer_init(ctx, b, 0); capture = NULL; str_val = JS_ToString(ctx, arg); if (JS_IsException(str_val)) goto fail; str = JS_VALUE_GET_STRING(str_val); re_bytecode = re->bytecode->u.str8; re_flags = lre_get_flags(re_bytecode); if ((re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY)) == 0) { last_index = 0; } else { val = JS_GetProperty(ctx, this_val, JS_ATOM_lastIndex); if (JS_IsException(val) || JS_ToLengthFree(ctx, &last_index, val)) goto fail; } capture_count = lre_get_capture_count(re_bytecode); if (capture_count > 0) { capture = js_malloc(ctx, sizeof(capture[0]) * capture_count * 2); if (!capture) goto fail; } shift = str->is_wide_char; str_buf = str->u.str8; next_src_pos = 0; for (;;) { if (last_index > str->len) break; ret = lre_exec(capture, re_bytecode, str_buf, last_index, str->len, shift, ctx); if (ret != 1) { if (ret >= 0) { if (ret == 2 || (re_flags & (LRE_FLAG_GLOBAL | LRE_FLAG_STICKY))) { if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) goto fail; } } else { JS_ThrowInternalError(ctx, "out of memory in regexp execution"); goto fail; } break; } start = (capture[0] - str_buf) >> shift; end = (capture[1] - str_buf) >> shift; last_index = end; if (next_src_pos < start) { if (string_buffer_concat(b, str, next_src_pos, start)) goto fail; } next_src_pos = end; if (!(re_flags & LRE_FLAG_GLOBAL)) { if (JS_SetProperty(ctx, this_val, JS_ATOM_lastIndex, JS_NewInt32(ctx, end)) < 0) goto fail; break; } if (end == start) { if (!(re_flags & LRE_FLAG_UTF16) || (unsigned)end >= str->len || !str->is_wide_char) { end++; } else { string_getc(str, &end); } } last_index = end; } if (string_buffer_concat(b, str, next_src_pos, str->len)) goto fail; JS_FreeValue(ctx, str_val); js_free(ctx, capture); return string_buffer_end(b); fail: JS_FreeValue(ctx, str_val); js_free(ctx, capture); string_buffer_free(b); return JS_EXCEPTION; } #if 0 static JSValue js_regexp___RegExpExec(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { return JS_RegExpExec(ctx, argv[0], argv[1]); } static JSValue js_regexp___RegExpDelete(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { return JS_RegExpDelete(ctx, argv[0], argv[1]); } #endif static JSValue js_regexp_test(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSValue val; BOOL ret; val = JS_RegExpExec(ctx, this_val, argv[0]); if (JS_IsException(val)) return JS_EXCEPTION; ret = !JS_IsNull(val); JS_FreeValue(ctx, val); return JS_NewBool(ctx, ret); } static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { // [Symbol.match](str) JSValueConst rx = this_val; JSValue A, S, result, matchStr; int global, n, fullUnicode, isEmpty; JSString *p; if (!JS_IsObject(rx)) return JS_ThrowTypeErrorNotAnObject(ctx); A = JS_UNDEFINED; result = JS_UNDEFINED; matchStr = JS_UNDEFINED; S = JS_ToString(ctx, argv[0]); if (JS_IsException(S)) goto exception; global = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_global)); if (global < 0) goto exception; if (!global) { A = JS_RegExpExec(ctx, rx, S); } else { fullUnicode = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_unicode)); if (fullUnicode < 0) goto exception; if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) goto exception; A = JS_NewArray(ctx); if (JS_IsException(A)) goto exception; n = 0; for(;;) { JS_FreeValue(ctx, result); result = JS_RegExpExec(ctx, rx, S); if (JS_IsException(result)) goto exception; if (JS_IsNull(result)) break; matchStr = JS_ToStringFree(ctx, JS_GetPropertyInt64(ctx, result, 0)); if (JS_IsException(matchStr)) goto exception; isEmpty = JS_IsEmptyString(matchStr); if (JS_SetPropertyInt64(ctx, A, n++, matchStr) < 0) goto exception; if (isEmpty) { int64_t thisIndex, nextIndex; if (JS_ToLengthFree(ctx, &thisIndex, JS_GetProperty(ctx, rx, JS_ATOM_lastIndex)) < 0) goto exception; p = JS_VALUE_GET_STRING(S); nextIndex = string_advance_index(p, thisIndex, fullUnicode); if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, JS_NewInt64(ctx, nextIndex)) < 0) goto exception; } } if (n == 0) { JS_FreeValue(ctx, A); A = JS_NULL; } } JS_FreeValue(ctx, result); JS_FreeValue(ctx, S); return A; exception: JS_FreeValue(ctx, A); JS_FreeValue(ctx, result); JS_FreeValue(ctx, S); return JS_EXCEPTION; } typedef struct ValueBuffer { JSContext *ctx; JSValue *arr; JSValue def[4]; int len; int size; int error_status; } ValueBuffer; static int value_buffer_init(JSContext *ctx, ValueBuffer *b) { b->ctx = ctx; b->len = 0; b->size = 4; b->error_status = 0; b->arr = b->def; return 0; } static void value_buffer_free(ValueBuffer *b) { while (b->len > 0) JS_FreeValue(b->ctx, b->arr[--b->len]); if (b->arr != b->def) js_free(b->ctx, b->arr); b->arr = b->def; b->size = 4; } static int value_buffer_append(ValueBuffer *b, JSValue val) { if (b->error_status) return -1; if (b->len >= b->size) { int new_size = (b->len + (b->len >> 1) + 31) & ~16; size_t slack; JSValue *new_arr; if (b->arr == b->def) { new_arr = js_realloc2(b->ctx, NULL, sizeof(*b->arr) * new_size, &slack); if (new_arr) memcpy(new_arr, b->def, sizeof b->def); } else { new_arr = js_realloc2(b->ctx, b->arr, sizeof(*b->arr) * new_size, &slack); } if (!new_arr) { value_buffer_free(b); JS_FreeValue(b->ctx, val); b->error_status = -1; return -1; } new_size += slack / sizeof(*new_arr); b->arr = new_arr; b->size = new_size; } b->arr[b->len++] = val; return 0; } static int js_is_standard_regexp(JSContext *ctx, JSValueConst rx) { JSValue val; int res; val = JS_GetProperty(ctx, rx, JS_ATOM_constructor); if (JS_IsException(val)) return -1; // rx.constructor === RegExp res = js_same_value(ctx, val, ctx->regexp_ctor); JS_FreeValue(ctx, val); if (res) { val = JS_GetProperty(ctx, rx, JS_ATOM_exec); if (JS_IsException(val)) return -1; // rx.exec === RE_exec res = JS_IsCFunction(ctx, val, js_regexp_exec, 0); JS_FreeValue(ctx, val); } return res; } static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { // [Symbol.replace](str, rep) JSValueConst rx = this_val, rep = argv[1]; JSValueConst args[6]; JSValue str, rep_val, matched, tab, rep_str, namedCaptures, res; JSString *sp, *rp; StringBuffer b_s, *b = &b_s; ValueBuffer v_b, *results = &v_b; int nextSourcePosition, n, j, functionalReplace, is_global, fullUnicode; uint32_t nCaptures; int64_t position; if (!JS_IsObject(rx)) return JS_ThrowTypeErrorNotAnObject(ctx); string_buffer_init(ctx, b, 0); value_buffer_init(ctx, results); rep_val = JS_UNDEFINED; matched = JS_UNDEFINED; tab = JS_UNDEFINED; rep_str = JS_UNDEFINED; namedCaptures = JS_UNDEFINED; str = JS_ToString(ctx, argv[0]); if (JS_IsException(str)) goto exception; sp = JS_VALUE_GET_STRING(str); rp = NULL; functionalReplace = JS_IsFunction(ctx, rep); if (!functionalReplace) { rep_val = JS_ToString(ctx, rep); if (JS_IsException(rep_val)) goto exception; rp = JS_VALUE_GET_STRING(rep_val); } fullUnicode = 0; is_global = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_global)); if (is_global < 0) goto exception; if (is_global) { fullUnicode = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_unicode)); if (fullUnicode < 0) goto exception; if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) goto exception; } if (rp && rp->len == 0 && is_global && js_is_standard_regexp(ctx, rx)) { /* use faster version for simple cases */ res = JS_RegExpDelete(ctx, rx, str); goto done; } for(;;) { JSValue result; result = JS_RegExpExec(ctx, rx, str); if (JS_IsException(result)) goto exception; if (JS_IsNull(result)) break; if (value_buffer_append(results, result) < 0) goto exception; if (!is_global) break; JS_FreeValue(ctx, matched); matched = JS_ToStringFree(ctx, JS_GetPropertyInt64(ctx, result, 0)); if (JS_IsException(matched)) goto exception; if (JS_IsEmptyString(matched)) { /* always advance of at least one char */ int64_t thisIndex, nextIndex; if (JS_ToLengthFree(ctx, &thisIndex, JS_GetProperty(ctx, rx, JS_ATOM_lastIndex)) < 0) goto exception; nextIndex = string_advance_index(sp, thisIndex, fullUnicode); if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, JS_NewInt64(ctx, nextIndex)) < 0) goto exception; } } nextSourcePosition = 0; for(j = 0; j < results->len; j++) { JSValueConst result; result = results->arr[j]; if (js_get_length32(ctx, &nCaptures, result) < 0) goto exception; JS_FreeValue(ctx, matched); matched = JS_ToStringFree(ctx, JS_GetPropertyInt64(ctx, result, 0)); if (JS_IsException(matched)) goto exception; if (JS_ToLengthFree(ctx, &position, JS_GetProperty(ctx, result, JS_ATOM_index))) goto exception; if (position > sp->len) position = sp->len; else if (position < 0) position = 0; /* ignore substition if going backward (can happen with custom regexp object) */ JS_FreeValue(ctx, tab); tab = JS_NewArray(ctx); if (JS_IsException(tab)) goto exception; if (JS_DefinePropertyValueInt64(ctx, tab, 0, JS_DupValue(ctx, matched), JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; for(n = 1; n < nCaptures; n++) { JSValue capN; capN = JS_GetPropertyInt64(ctx, result, n); if (JS_IsException(capN)) goto exception; if (!JS_IsUndefined(capN)) { capN = JS_ToStringFree(ctx, capN); if (JS_IsException(capN)) goto exception; } if (JS_DefinePropertyValueInt64(ctx, tab, n, capN, JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; } JS_FreeValue(ctx, namedCaptures); namedCaptures = JS_GetProperty(ctx, result, JS_ATOM_groups); if (JS_IsException(namedCaptures)) goto exception; if (functionalReplace) { if (JS_DefinePropertyValueInt64(ctx, tab, n++, JS_NewInt32(ctx, position), JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; if (JS_DefinePropertyValueInt64(ctx, tab, n++, JS_DupValue(ctx, str), JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; if (!JS_IsUndefined(namedCaptures)) { if (JS_DefinePropertyValueInt64(ctx, tab, n++, JS_DupValue(ctx, namedCaptures), JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; } args[0] = JS_UNDEFINED; args[1] = tab; JS_FreeValue(ctx, rep_str); rep_str = JS_ToStringFree(ctx, js_function_apply(ctx, rep, 2, args, 0)); } else { JSValue namedCaptures1; if (!JS_IsUndefined(namedCaptures)) { namedCaptures1 = JS_ToObject(ctx, namedCaptures); if (JS_IsException(namedCaptures1)) goto exception; } else { namedCaptures1 = JS_UNDEFINED; } args[0] = matched; args[1] = str; args[2] = JS_NewInt32(ctx, position); args[3] = tab; args[4] = namedCaptures1; args[5] = rep_val; JS_FreeValue(ctx, rep_str); rep_str = js_string___GetSubstitution(ctx, JS_UNDEFINED, 6, args); JS_FreeValue(ctx, namedCaptures1); } if (JS_IsException(rep_str)) goto exception; if (position >= nextSourcePosition) { string_buffer_concat(b, sp, nextSourcePosition, position); string_buffer_concat_value(b, rep_str); nextSourcePosition = position + JS_VALUE_GET_STRING(matched)->len; } } string_buffer_concat(b, sp, nextSourcePosition, sp->len); res = string_buffer_end(b); goto done1; exception: res = JS_EXCEPTION; done: string_buffer_free(b); done1: value_buffer_free(results); JS_FreeValue(ctx, rep_val); JS_FreeValue(ctx, matched); JS_FreeValue(ctx, tab); JS_FreeValue(ctx, rep_str); JS_FreeValue(ctx, namedCaptures); JS_FreeValue(ctx, str); return res; } static JSValue js_regexp_Symbol_search(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSValueConst rx = this_val; JSValue str, previousLastIndex, currentLastIndex, result, index; if (!JS_IsObject(rx)) return JS_ThrowTypeErrorNotAnObject(ctx); result = JS_UNDEFINED; currentLastIndex = JS_UNDEFINED; previousLastIndex = JS_UNDEFINED; str = JS_ToString(ctx, argv[0]); if (JS_IsException(str)) goto exception; previousLastIndex = JS_GetProperty(ctx, rx, JS_ATOM_lastIndex); if (JS_IsException(previousLastIndex)) goto exception; if (!js_same_value(ctx, previousLastIndex, JS_NewInt32(ctx, 0))) { if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, JS_NewInt32(ctx, 0)) < 0) { goto exception; } } result = JS_RegExpExec(ctx, rx, str); if (JS_IsException(result)) goto exception; currentLastIndex = JS_GetProperty(ctx, rx, JS_ATOM_lastIndex); if (JS_IsException(currentLastIndex)) goto exception; if (js_same_value(ctx, currentLastIndex, previousLastIndex)) { JS_FreeValue(ctx, previousLastIndex); } else { if (JS_SetProperty(ctx, rx, JS_ATOM_lastIndex, previousLastIndex) < 0) { previousLastIndex = JS_UNDEFINED; goto exception; } } JS_FreeValue(ctx, str); JS_FreeValue(ctx, currentLastIndex); if (JS_IsNull(result)) { return JS_NewInt32(ctx, -1); } else { index = JS_GetProperty(ctx, result, JS_ATOM_index); JS_FreeValue(ctx, result); return index; } exception: JS_FreeValue(ctx, result); JS_FreeValue(ctx, str); JS_FreeValue(ctx, currentLastIndex); JS_FreeValue(ctx, previousLastIndex); return JS_EXCEPTION; } static JSValue js_regexp_Symbol_split(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { // [Symbol.split](str, limit) JSValueConst rx = this_val; JSValueConst args[2]; JSValue str, ctor, splitter, A, flags, z, sub; JSString *strp; uint32_t lim, size, p, q; int unicodeMatching; int64_t lengthA, e, numberOfCaptures, i; if (!JS_IsObject(rx)) return JS_ThrowTypeErrorNotAnObject(ctx); ctor = JS_UNDEFINED; splitter = JS_UNDEFINED; A = JS_UNDEFINED; flags = JS_UNDEFINED; z = JS_UNDEFINED; str = JS_ToString(ctx, argv[0]); if (JS_IsException(str)) goto exception; ctor = JS_SpeciesConstructor(ctx, rx, ctx->regexp_ctor); if (JS_IsException(ctor)) goto exception; flags = JS_ToStringFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_flags)); if (JS_IsException(flags)) goto exception; strp = JS_VALUE_GET_STRING(flags); unicodeMatching = string_indexof_char(strp, 'u', 0) >= 0; if (string_indexof_char(strp, 'y', 0) < 0) { flags = JS_ConcatString3(ctx, "", flags, "y"); if (JS_IsException(flags)) goto exception; } args[0] = rx; args[1] = flags; splitter = JS_CallConstructor(ctx, ctor, 2, args); if (JS_IsException(splitter)) goto exception; A = JS_NewArray(ctx); if (JS_IsException(A)) goto exception; lengthA = 0; if (JS_IsUndefined(argv[1])) { lim = 0xffffffff; } else { if (JS_ToUint32(ctx, &lim, argv[1]) < 0) goto exception; if (lim == 0) goto done; } strp = JS_VALUE_GET_STRING(str); p = q = 0; size = strp->len; if (size == 0) { z = JS_RegExpExec(ctx, splitter, str); if (JS_IsException(z)) goto exception; if (JS_IsNull(z)) goto add_tail; goto done; } while (q < size) { if (JS_SetProperty(ctx, splitter, JS_ATOM_lastIndex, JS_NewInt32(ctx, q)) < 0) goto exception; JS_FreeValue(ctx, z); z = JS_RegExpExec(ctx, splitter, str); if (JS_IsException(z)) goto exception; if (JS_IsNull(z)) { q = string_advance_index(strp, q, unicodeMatching); } else { if (JS_ToLengthFree(ctx, &e, JS_GetProperty(ctx, splitter, JS_ATOM_lastIndex))) goto exception; if (e > size) e = size; if (e == p) { q = string_advance_index(strp, q, unicodeMatching); } else { sub = js_sub_string(ctx, strp, p, q); if (JS_IsException(sub)) goto exception; if (JS_DefinePropertyValueInt64(ctx, A, lengthA++, sub, JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; if (lengthA == lim) goto done; p = e; if (js_get_length64(ctx, &numberOfCaptures, z)) goto exception; for(i = 1; i < numberOfCaptures; i++) { sub = JS_ToStringFree(ctx, JS_GetPropertyInt64(ctx, z, i)); if (JS_IsException(sub)) goto exception; if (JS_DefinePropertyValueInt64(ctx, A, lengthA++, sub, JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; if (lengthA == lim) goto done; } q = p; } } } add_tail: if (p > size) p = size; sub = js_sub_string(ctx, strp, p, size); if (JS_IsException(sub)) goto exception; if (JS_DefinePropertyValueInt64(ctx, A, lengthA++, sub, JS_PROP_C_W_E | JS_PROP_THROW) < 0) goto exception; goto done; exception: JS_FreeValue(ctx, A); A = JS_EXCEPTION; done: JS_FreeValue(ctx, str); JS_FreeValue(ctx, ctor); JS_FreeValue(ctx, splitter); JS_FreeValue(ctx, flags); JS_FreeValue(ctx, z); return A; } static const JSCFunctionListEntry js_regexp_funcs[] = { JS_CGETSET_DEF("[Symbol.species]", js_get_this, NULL ), //JS_CFUNC_DEF("__RegExpExec", 2, js_regexp___RegExpExec ), //JS_CFUNC_DEF("__RegExpDelete", 2, js_regexp___RegExpDelete ), }; static const JSCFunctionListEntry js_regexp_proto_funcs[] = { JS_CGETSET_DEF("flags", js_regexp_get_flags, NULL ), JS_CGETSET_DEF("source", js_regexp_get_source, NULL ), JS_CGETSET_MAGIC_DEF("global", js_regexp_get_flag, NULL, 1 ), JS_CGETSET_MAGIC_DEF("ignoreCase", js_regexp_get_flag, NULL, 2 ), JS_CGETSET_MAGIC_DEF("multiline", js_regexp_get_flag, NULL, 4 ), JS_CGETSET_MAGIC_DEF("dotAll", js_regexp_get_flag, NULL, 8 ), JS_CGETSET_MAGIC_DEF("unicode", js_regexp_get_flag, NULL, 16 ), JS_CGETSET_MAGIC_DEF("sticky", js_regexp_get_flag, NULL, 32 ), JS_CFUNC_DEF("exec", 1, js_regexp_exec ), JS_CFUNC_DEF("compile", 2, js_regexp_compile ), JS_CFUNC_DEF("test", 1, js_regexp_test ), JS_CFUNC_DEF("toString", 0, js_regexp_toString ), JS_CFUNC_DEF("[Symbol.replace]", 2, js_regexp_Symbol_replace ), JS_CFUNC_DEF("[Symbol.match]", 1, js_regexp_Symbol_match ), JS_CFUNC_DEF("[Symbol.matchAll]", 1, js_regexp_Symbol_matchAll ), JS_CFUNC_DEF("[Symbol.search]", 1, js_regexp_Symbol_search ), JS_CFUNC_DEF("[Symbol.split]", 2, js_regexp_Symbol_split ), //JS_CGETSET_DEF("__source", js_regexp_get___source, NULL ), //JS_CGETSET_DEF("__flags", js_regexp_get___flags, NULL ), }; static const JSCFunctionListEntry js_regexp_string_iterator_proto_funcs[] = { JS_ITERATOR_NEXT_DEF("next", 0, js_regexp_string_iterator_next, 0 ), JS_PROP_STRING_DEF("[Symbol.toStringTag]", "RegExp String Iterator", JS_PROP_CONFIGURABLE ), }; void JS_AddIntrinsicRegExpCompiler(JSContext *ctx) { ctx->compile_regexp = js_compile_regexp; } void JS_AddIntrinsicRegExp(JSContext *ctx) { JSValueConst obj; JS_AddIntrinsicRegExpCompiler(ctx); ctx->class_proto[JS_CLASS_REGEXP] = JS_NewObject(ctx); JS_SetPropertyFunctionList(ctx, ctx->class_proto[JS_CLASS_REGEXP], js_regexp_proto_funcs, countof(js_regexp_proto_funcs)); obj = JS_NewGlobalCConstructor(ctx, "RegExp", js_regexp_constructor, 2, ctx->class_proto[JS_CLASS_REGEXP]); ctx->regexp_ctor = JS_DupValue(ctx, obj); JS_SetPropertyFunctionList(ctx, obj, js_regexp_funcs, countof(js_regexp_funcs)); ctx->class_proto[JS_CLASS_REGEXP_STRING_ITERATOR] = JS_NewObjectProto(ctx, ctx->iterator_proto); JS_SetPropertyFunctionList(ctx, ctx->class_proto[JS_CLASS_REGEXP_STRING_ITERATOR], js_regexp_string_iterator_proto_funcs, countof(js_regexp_string_iterator_proto_funcs)); }