Add PCRE2 library to third_party

2025-10-25 18:50:57 +00:00 · 2023-12-10 05:59:18 -08:00 · 2023-12-10 05:59:18 -08:00 · b27e9fe845
commit b27e9fe845
parent 4f66d7f2dd
47 changed files with 58274 additions and 0 deletions
--- a/1
+++ b/1
@ -225,6 +225,7 @@ include third_party/stb/BUILD.mk		# │
 include third_party/mbedtls/BUILD.mk		# │
 include third_party/libcxx/BUILD.mk		# │
 include third_party/ggml/BUILD.mk		# │
+include third_party/pcre/BUILD.mk		# │
 include third_party/radpajama/BUILD.mk		# │
 include net/https/BUILD.mk			# │
 include third_party/regex/BUILD.mk		#─┘
--- a/third_party/BUILD.mk
+++ b/third_party/BUILD.mk
@ -24,6 +24,7 @@ o/$(MODE)/third_party:				\
 	o/$(MODE)/third_party/mbedtls		\
 	o/$(MODE)/third_party/musl		\
 	o/$(MODE)/third_party/nsync		\
+	o/$(MODE)/third_party/pcre		\
 	o/$(MODE)/third_party/puff		\
 	o/$(MODE)/third_party/python		\
 	o/$(MODE)/third_party/quickjs		\
--- a/third_party/pcre/BUILD.mk
+++ b/third_party/pcre/BUILD.mk
@ -0,0 +1,69 @@
+#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
+#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
+
+PKGS += THIRD_PARTY_PCRE
+
+THIRD_PARTY_PCRE_ARTIFACTS += THIRD_PARTY_PCRE_A
+THIRD_PARTY_PCRE = $(THIRD_PARTY_PCRE_A_DEPS) $(THIRD_PARTY_PCRE_A)
+THIRD_PARTY_PCRE_A = o/$(MODE)/third_party/pcre/pcre.a
+THIRD_PARTY_PCRE_A_FILES := $(wildcard third_party/pcre/*)
+THIRD_PARTY_PCRE_A_HDRS = $(filter %.h,$(THIRD_PARTY_PCRE_A_FILES))
+THIRD_PARTY_PCRE_A_INCS = $(filter %.inc,$(THIRD_PARTY_PCRE_A_FILES))
+THIRD_PARTY_PCRE_A_SRCS = $(filter %.c,$(THIRD_PARTY_PCRE_A_FILES))
+THIRD_PARTY_PCRE_A_OBJS = $(THIRD_PARTY_PCRE_A_SRCS:%.c=o/$(MODE)/%.o)
+
+THIRD_PARTY_PCRE_A_CHECKS =				\
+	$(THIRD_PARTY_PCRE_A).pkg			\
+	o/$(MODE)/third_party/pcre/pcre2posix_test.com.runs
+
+THIRD_PARTY_PCRE_A_DIRECTDEPS =				\
+	LIBC_CALLS					\
+	LIBC_FMT					\
+	LIBC_INTRIN					\
+	LIBC_MEM					\
+	LIBC_NEXGEN32E					\
+	LIBC_PROC					\
+	LIBC_RUNTIME					\
+	LIBC_STDIO					\
+	LIBC_STR					\
+	LIBC_SYSV
+
+THIRD_PARTY_PCRE_A_DEPS :=				\
+	$(call uniq,$(foreach x,$(THIRD_PARTY_PCRE_A_DIRECTDEPS),$($(x))))
+
+$(THIRD_PARTY_PCRE_A):					\
+		third_party/pcre/			\
+		$(THIRD_PARTY_PCRE_A).pkg		\
+		$(THIRD_PARTY_PCRE_A_OBJS)
+
+$(THIRD_PARTY_PCRE_A).pkg:				\
+		$(THIRD_PARTY_PCRE_A_OBJS)		\
+		$(foreach x,$(THIRD_PARTY_PCRE_A_DIRECTDEPS),$($(x)_A).pkg)
+
+$(THIRD_PARTY_PCRE_A_OBJS): private CPPFLAGS += -DHAVE_CONFIG_H
+
+o/$(MODE)/third_party/pcre/%.com.dbg:			\
+		$(THIRD_PARTY_PCRE)			\
+		o/$(MODE)/third_party/pcre/%.o		\
+		$(CRT)					\
+		$(APE_NO_MODIFY_SELF)
+	@$(APELINK)
+
+THIRD_PARTY_PCRE_COMS =					\
+	o/$(MODE)/third_party/pcre/pcre2grep.com	\
+	o/$(MODE)/third_party/pcre/pcre2posix_test.com
+
+THIRD_PARTY_PCRE_BINS = $(THIRD_PARTY_PCRE_COMS) $(THIRD_PARTY_PCRE_COMS:%=%.dbg)
+THIRD_PARTY_PCRE_LIBS = $(foreach x,$(THIRD_PARTY_PCRE_ARTIFACTS),$($(x)))
+THIRD_PARTY_PCRE_SRCS = $(foreach x,$(THIRD_PARTY_PCRE_ARTIFACTS),$($(x)_SRCS))
+THIRD_PARTY_PCRE_HDRS = $(foreach x,$(THIRD_PARTY_PCRE_ARTIFACTS),$($(x)_HDRS))
+THIRD_PARTY_PCRE_INCS = $(foreach x,$(THIRD_PARTY_PCRE_ARTIFACTS),$($(x)_INCS))
+THIRD_PARTY_PCRE_CHECKS = $(foreach x,$(THIRD_PARTY_PCRE_ARTIFACTS),$($(x)_CHECKS))
+THIRD_PARTY_PCRE_OBJS = $(foreach x,$(THIRD_PARTY_PCRE_ARTIFACTS),$($(x)_OBJS))
+$(THIRD_PARTY_PCRE_OBJS): $(BUILD_FILES) third_party/pcre/BUILD.mk
+
+.PHONY: o/$(MODE)/third_party/pcre
+o/$(MODE)/third_party/pcre:				\
+		$(THIRD_PARTY_PCRE_A)			\
+		$(THIRD_PARTY_PCRE_BINS)		\
+		$(THIRD_PARTY_PCRE_CHECKS)
--- a/third_party/pcre/LICENCE
+++ b/third_party/pcre/LICENCE
@ -0,0 +1,94 @@
+PCRE2 LICENCE
+-------------
+
+PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
+licence, as specified below, with one exemption for certain binary
+redistributions. The documentation for PCRE2, supplied in the "doc" directory,
+is distributed under the same terms as the software itself. The data in the
+testdata directory is not copyrighted and is in the public domain.
+
+The basic library functions are written in C and are freestanding. Also
+included in the distribution is a just-in-time compiler that can be used to
+optimize pattern matching. This is an optional feature that can be omitted when
+the library is built.
+
+
+THE BASIC LIBRARY FUNCTIONS
+---------------------------
+
+Written by:       Philip Hazel
+Email local part: Philip.Hazel
+Email domain:     gmail.com
+
+Retired from University of Cambridge Computing Service,
+Cambridge, England.
+
+Copyright (c) 1997-2022 University of Cambridge
+All rights reserved.
+
+
+PCRE2 JUST-IN-TIME COMPILATION SUPPORT
+--------------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Email domain:     freemail.hu
+
+Copyright(c) 2010-2022 Zoltan Herczeg
+All rights reserved.
+
+
+STACK-LESS JUST-IN-TIME COMPILER
+--------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Email domain:     freemail.hu
+
+Copyright(c) 2009-2022 Zoltan Herczeg
+All rights reserved.
+
+
+THE "BSD" LICENCE
+-----------------
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notices,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notices, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of any
+      contributors may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES
+------------------------------------------
+
+The second condition in the BSD licence (covering binary redistributions) does
+not apply all the way down a chain of software. If binary package A includes
+PCRE2, it must respect the condition, but if package B is software that
+includes package A, the condition is not imposed on package B unless it uses
+PCRE2 independently.
+
+End
--- a/third_party/pcre/README.cosmo
+++ b/third_party/pcre/README.cosmo
@ -0,0 +1,15 @@
+DESCRIPTION
+
+  PCRE2 is a regular expression library
+
+LICENSE
+
+  PCRE2 License
+
+ORIGIN
+
+  https://github.com/PCRE2Project/pcre2/archive/refs/tags/pcre2-10.42.tar.gz
+
+LOCAL CHANGES
+
+  Removed JIT code.
--- a/third_party/pcre/config.h
+++ b/third_party/pcre/config.h
@ -0,0 +1,446 @@
+/* src/config.h.  Generated from config.h.in by configure.  */
+/* src/config.h.in.  Generated from configure.ac by autoheader.  */
+
+
+/* PCRE2 is written in Standard C, but there are a few non-standard things it
+can cope with, allowing it to run on SunOS4 and other "close to standard"
+systems.
+
+In environments that support the GNU autotools, config.h.in is converted into
+config.h by the "configure" script. In environments that use CMake,
+config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
+hand" without using "configure" or CMake, you should copy the distributed
+config.h.generic to config.h, and edit the macro definitions to be the way you
+need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
+so that config.h is included at the start of every source.
+
+Alternatively, you can avoid editing by using -D on the compiler command line
+to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
+but if you do, default values will be taken from config.h for non-boolean
+macros that are not defined on the command line.
+
+Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
+defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
+such macros are listed as a commented #undef in config.h.generic. Macros such
+as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
+surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
+
+PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
+HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
+sure both macros are undefined; an emulation function will then be used. */
+
+/* By default, the \R escape sequence matches any Unicode line ending
+   character or sequence of characters. If BSR_ANYCRLF is defined (to any
+   value), this is changed so that backslash-R matches only CR, LF, or CRLF.
+   The build-time default can be overridden by the user of PCRE2 at runtime.
+   */
+/* #undef BSR_ANYCRLF */
+
+/* Define to any value to disable the use of the z and t modifiers in
+   formatting settings such as %zu or %td (this is rarely needed). */
+/* #undef DISABLE_PERCENT_ZT */
+
+/* If you are compiling for a system that uses EBCDIC instead of ASCII
+   character codes, define this macro to any value. When EBCDIC is set, PCRE2
+   assumes that all input strings are in EBCDIC. If you do not define this
+   macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
+   is not possible to build a version of PCRE2 that supports both EBCDIC and
+   UTF-8/16/32. */
+/* #undef EBCDIC */
+
+/* In an EBCDIC environment, define this macro to any value to arrange for the
+   NL character to be 0x25 instead of the default 0x15. NL plays the role that
+   LF does in an ASCII/Unicode environment. */
+/* #undef EBCDIC_NL25 */
+
+/* Define this if your compiler supports __attribute__((uninitialized)) */
+#define HAVE_ATTRIBUTE_UNINITIALIZED 1
+
+/* Define to 1 if you have the `bcopy' function. */
+#define HAVE_BCOPY 1
+
+/* Define to 1 if you have the <bzlib.h> header file. */
+/* #undef HAVE_BZLIB_H */
+
+/* Define to 1 if you have the <dirent.h> header file. */
+#define HAVE_DIRENT_H 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <editline/readline.h> header file. */
+/* #undef HAVE_EDITLINE_READLINE_H */
+
+/* Define to 1 if you have the <edit/readline/readline.h> header file. */
+/* #undef HAVE_EDIT_READLINE_READLINE_H */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if you have the `memfd_create' function. */
+#define HAVE_MEMFD_CREATE 1
+
+/* Define to 1 if you have the `memmove' function. */
+#define HAVE_MEMMOVE 1
+
+/* Define to 1 if you have the <minix/config.h> header file. */
+/* #undef HAVE_MINIX_CONFIG_H */
+
+/* Define to 1 if you have the `mkostemp' function. */
+#define HAVE_MKOSTEMP 1
+
+/* Define if you have POSIX threads libraries and header files. */
+/* #undef HAVE_PTHREAD */
+
+/* Have PTHREAD_PRIO_INHERIT. */
+/* #undef HAVE_PTHREAD_PRIO_INHERIT */
+
+/* Define to 1 if you have the <readline.h> header file. */
+/* #undef HAVE_READLINE_H */
+
+/* Define to 1 if you have the <readline/history.h> header file. */
+/* #undef HAVE_READLINE_HISTORY_H */
+
+/* Define to 1 if you have the <readline/readline.h> header file. */
+/* #undef HAVE_READLINE_READLINE_H */
+
+/* Define to 1 if you have the `realpath' function. */
+#define HAVE_REALPATH 1
+
+/* Define to 1 if you have the `secure_getenv' function. */
+#define HAVE_SECURE_GETENV 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#define HAVE_STDIO_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strerror' function. */
+#define HAVE_STRERROR 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <sys/wait.h> header file. */
+#define HAVE_SYS_WAIT_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if the compiler supports simple visibility declarations. */
+#define HAVE_VISIBILITY 1
+
+/* Define to 1 if you have the <wchar.h> header file. */
+#define HAVE_WCHAR_H 1
+
+/* Define to 1 if you have the <windows.h> header file. */
+/* #undef HAVE_WINDOWS_H */
+
+/* Define to 1 if you have the <zlib.h> header file. */
+/* #undef HAVE_ZLIB_H */
+
+/* This limits the amount of memory that may be used while matching a pattern.
+   It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
+   to JIT matching. The value is in kibibytes (units of 1024 bytes). */
+#define HEAP_LIMIT 20000000
+
+/* The value of LINK_SIZE determines the number of bytes used to store links
+   as offsets within the compiled regex. The default is 2, which allows for
+   compiled patterns up to 65535 code units long. This covers the vast
+   majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
+   instead. This allows for longer patterns in extreme cases. */
+#define LINK_SIZE 2
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#define LT_OBJDIR ".libs/"
+
+/* The value of MATCH_LIMIT determines the default number of times the
+   pcre2_match() function can record a backtrack position during a single
+   matching attempt. The value is also used to limit a loop counter in
+   pcre2_dfa_match(). There is a runtime interface for setting a different
+   limit. The limit exists in order to catch runaway regular expressions that
+   take for ever to determine that they do not match. The default is set very
+   large so that it does not accidentally catch legitimate cases. */
+#define MATCH_LIMIT 10000000
+
+/* The above limit applies to all backtracks, whether or not they are nested.
+   In some environments it is desirable to limit the nesting of backtracking
+   (that is, the depth of tree that is searched) more strictly, in order to
+   restrict the maximum amount of heap memory that is used. The value of
+   MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
+   must be less than the value of MATCH_LIMIT. The default is to use the same
+   value as MATCH_LIMIT. There is a runtime method for setting a different
+   limit. In the case of pcre2_dfa_match(), this limit controls the depth of
+   the internal nested function calls that are used for pattern recursions,
+   lookarounds, and atomic groups. */
+#define MATCH_LIMIT_DEPTH MATCH_LIMIT
+
+/* This limit is parameterized just in case anybody ever wants to change it.
+   Care must be taken if it is increased, because it guards against integer
+   overflow caused by enormously large patterns. */
+#define MAX_NAME_COUNT 10000
+
+/* This limit is parameterized just in case anybody ever wants to change it.
+   Care must be taken if it is increased, because it guards against integer
+   overflow caused by enormously large patterns. */
+#define MAX_NAME_SIZE 32
+
+/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
+/* #undef NEVER_BACKSLASH_C */
+
+/* The value of NEWLINE_DEFAULT determines the default newline character
+   sequence. PCRE2 client programs can override this by selecting other values
+   at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
+   (ANYCRLF), and 6 (NUL). */
+#define NEWLINE_DEFAULT 2
+
+/* Name of package */
+#define PACKAGE "pcre2"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "PCRE2"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "PCRE2 10.42"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "pcre2"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "10.42"
+
+/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
+   parentheses (of any kind) in a pattern. This limits the amount of system
+   stack that is used while compiling a pattern. */
+#define PARENS_NEST_LIMIT 250
+
+/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
+   pcre2grep to hold parts of the file it is searching. The buffer will be
+   expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
+   very long lines. The actual amount of memory used by pcre2grep is three
+   times this number, because it allows for the buffering of "before" and
+   "after" lines. */
+#define PCRE2GREP_BUFSIZE 20480
+
+/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
+   used by pcre2grep to hold parts of the file it is searching. The actual
+   amount of memory used by pcre2grep is three times this number, because it
+   allows for the buffering of "before" and "after" lines. */
+#define PCRE2GREP_MAX_BUFSIZE 1048576
+
+/* to make a symbol visible */
+#define PCRE2POSIX_EXP_DECL extern __attribute__ ((visibility ("default")))
+
+/* to make a symbol visible */
+#define PCRE2POSIX_EXP_DEFN extern __attribute__ ((visibility ("default")))
+
+/* Define to any value to include debugging code. */
+/* #undef PCRE2_DEBUG */
+
+/* to make a symbol visible */
+#define PCRE2_EXP_DECL extern __attribute__ ((visibility ("default")))
+
+
+/* If you are compiling for a system other than a Unix-like system or
+   Win32, and it needs some magic to be inserted before the definition
+   of a function that is exported by the library, define this macro to
+   contain the relevant magic. If you do not define this macro, a suitable
+    __declspec value is used for Windows systems; in other environments
+   "extern" is used for a C compiler and "extern C" for a C++ compiler.
+   This macro apears at the start of every exported function that is part
+   of the external API. It does not appear on functions that are "external"
+   in the C sense, but which are internal to the library. */
+#define PCRE2_EXP_DEFN __attribute__ ((visibility ("default")))
+
+/* Define to any value if linking statically (TODO: make nice with Libtool) */
+#define PCRE2_STATIC 1
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* Define to any non-zero number to enable support for SELinux compatible
+   executable memory allocator in JIT. Note that this will have no effect
+   unless SUPPORT_JIT is also defined. */
+/* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */
+
+/* Define to 1 if all of the C90 standard headers exist (not just the ones
+   required in a freestanding environment). This macro is provided for
+   backward compatibility; new code need not use it. */
+#define STDC_HEADERS 1
+
+/* Define to any value to enable support for Just-In-Time compiling. */
+/* #undef SUPPORT_JIT */
+
+/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
+   is able to handle .bz2 files. */
+/* #undef SUPPORT_LIBBZ2 */
+
+/* Define to any value to allow pcre2test to be linked with libedit. */
+/* #undef SUPPORT_LIBEDIT */
+
+/* Define to any value to allow pcre2test to be linked with libreadline. */
+/* #undef SUPPORT_LIBREADLINE */
+
+/* Define to any value to allow pcre2grep to be linked with libz, so that it
+   is able to handle .gz files. */
+/* #undef SUPPORT_LIBZ */
+
+/* Define to any value to enable callout script support in pcre2grep. */
+#define SUPPORT_PCRE2GREP_CALLOUT /**/
+
+/* Define to any value to enable fork support in pcre2grep callout scripts.
+   This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
+   */
+#define SUPPORT_PCRE2GREP_CALLOUT_FORK /**/
+
+/* Define to any value to enable JIT support in pcre2grep. Note that this will
+   have no effect unless SUPPORT_JIT is also defined. */
+/* #undef SUPPORT_PCRE2GREP_JIT */
+
+/* Define to any value to enable the 16 bit PCRE2 library. */
+/* #undef SUPPORT_PCRE2_16 */
+
+/* Define to any value to enable the 32 bit PCRE2 library. */
+/* #undef SUPPORT_PCRE2_32 */
+
+/* Define to any value to enable the 8 bit PCRE2 library. */
+#define SUPPORT_PCRE2_8 /**/
+
+/* Define to any value to enable support for Unicode and UTF encoding. This
+   will work even in an EBCDIC environment, but it is incompatible with the
+   EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
+   ASCII/Unicode, but not both at once. */
+#define SUPPORT_UNICODE /**/
+
+/* Define to any value for valgrind support to find invalid memory reads. */
+/* #undef SUPPORT_VALGRIND */
+
+/* Enable extensions on AIX 3, Interix.  */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable general extensions on macOS.  */
+#ifndef _DARWIN_C_SOURCE
+# define _DARWIN_C_SOURCE 1
+#endif
+/* Enable general extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable X/Open compliant socket functions that do not require linking
+   with -lxnet on HP-UX 11.11.  */
+#ifndef _HPUX_ALT_XOPEN_SOCKET_API
+# define _HPUX_ALT_XOPEN_SOCKET_API 1
+#endif
+/* Identify the host operating system as Minix.
+   This macro does not affect the system headers' behavior.
+   A future release of Autoconf may stop defining this macro.  */
+#ifndef _MINIX
+/* # undef _MINIX */
+#endif
+/* Enable general extensions on NetBSD.
+   Enable NetBSD compatibility extensions on Minix.  */
+#ifndef _NETBSD_SOURCE
+# define _NETBSD_SOURCE 1
+#endif
+/* Enable OpenBSD compatibility extensions on NetBSD.
+   Oddly enough, this does nothing on OpenBSD.  */
+#ifndef _OPENBSD_SOURCE
+# define _OPENBSD_SOURCE 1
+#endif
+/* Define to 1 if needed for POSIX-compatible behavior.  */
+#ifndef _POSIX_SOURCE
+/* # undef _POSIX_SOURCE */
+#endif
+/* Define to 2 if needed for POSIX-compatible behavior.  */
+#ifndef _POSIX_1_SOURCE
+/* # undef _POSIX_1_SOURCE */
+#endif
+/* Enable POSIX-compatible threading on Solaris.  */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-5:2014.  */
+#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
+# define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-1:2014.  */
+#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
+# define __STDC_WANT_IEC_60559_BFP_EXT__ 1
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-2:2015.  */
+#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
+# define __STDC_WANT_IEC_60559_DFP_EXT__ 1
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-4:2015.  */
+#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
+# define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-3:2015.  */
+#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
+# define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
+#endif
+/* Enable extensions specified by ISO/IEC TR 24731-2:2010.  */
+#ifndef __STDC_WANT_LIB_EXT2__
+# define __STDC_WANT_LIB_EXT2__ 1
+#endif
+/* Enable extensions specified by ISO/IEC 24747:2009.  */
+#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
+# define __STDC_WANT_MATH_SPEC_FUNCS__ 1
+#endif
+/* Enable extensions on HP NonStop.  */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable X/Open extensions.  Define to 500 only if necessary
+   to make mbstate_t available.  */
+#ifndef _XOPEN_SOURCE
+/* # undef _XOPEN_SOURCE */
+#endif
+
+
+/* Version number of package */
+#define VERSION "10.42"
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+/* #undef _FILE_OFFSET_BITS */
+
+/* Define for large files, on AIX-style hosts. */
+/* #undef _LARGE_FILES */
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef const */
+
+/* Define to the type of a signed integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int64_t */
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef size_t */
--- a/third_party/pcre/pcre2.h
+++ b/third_party/pcre/pcre2.h
@ -0,0 +1,997 @@
+/*************************************************
+*       Perl-Compatible Regular Expressions      *
+*************************************************/
+
+/* This is the public header file for the PCRE library, second API, to be
+#included by applications that call PCRE2 functions.
+
+           Copyright (c) 2016-2021 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_H_IDEMPOTENT_GUARD
+#define PCRE2_H_IDEMPOTENT_GUARD
+
+#ifndef PCRE2_CODE_UNIT_WIDTH
+#define PCRE2_CODE_UNIT_WIDTH 8
+#endif
+
+/* The current PCRE version information. */
+
+#define PCRE2_MAJOR           10
+#define PCRE2_MINOR           42
+#define PCRE2_PRERELEASE      
+#define PCRE2_DATE            2022-12-11
+
+/* When an application links to a PCRE DLL in Windows, the symbols that are
+imported have to be identified as such. When building PCRE2, the appropriate
+export setting is defined in pcre2_internal.h, which includes this file. So we
+don't change existing definitions of PCRE2_EXP_DECL. */
+
+#if defined(_WIN32) && !defined(PCRE2_STATIC)
+#  ifndef PCRE2_EXP_DECL
+#    define PCRE2_EXP_DECL  extern __declspec(dllimport)
+#  endif
+#endif
+
+/* By default, we use the standard "extern" declarations. */
+
+#ifndef PCRE2_EXP_DECL
+#  ifdef __cplusplus
+#    define PCRE2_EXP_DECL  extern "C"
+#  else
+#    define PCRE2_EXP_DECL  extern
+#  endif
+#endif
+
+/* When compiling with the MSVC compiler, it is sometimes necessary to include
+a "calling convention" before exported function names. (This is secondhand
+information; I know nothing about MSVC myself). For example, something like
+
+  void __cdecl function(....)
+
+might be needed. In order so make this easy, all the exported functions have
+PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
+set, we ensure here that it has no effect. */
+
+#ifndef PCRE2_CALL_CONVENTION
+#define PCRE2_CALL_CONVENTION
+#endif
+
+/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
+uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
+not have stdint.h, which is why we use inttypes.h, which according to the C
+standard is a superset of stdint.h. If inttypes.h is not available the build
+will break and the relevant values must be provided by some other means. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+/* Allow for C++ users compiling this directly. */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
+or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
+is passed. Put these bits at the most significant end of the options word so
+others can be added next to them */
+
+#define PCRE2_ANCHORED            0x80000000u
+#define PCRE2_NO_UTF_CHECK        0x40000000u
+#define PCRE2_ENDANCHORED         0x20000000u
+
+/* The following option bits can be passed only to pcre2_compile(). However,
+they may affect compilation, JIT compilation, and/or interpretive execution.
+The following tags indicate which:
+
+C   alters what is compiled by pcre2_compile()
+J   alters what is compiled by pcre2_jit_compile()
+M   is inspected during pcre2_match() execution
+D   is inspected during pcre2_dfa_match() execution
+*/
+
+#define PCRE2_ALLOW_EMPTY_CLASS   0x00000001u  /* C       */
+#define PCRE2_ALT_BSUX            0x00000002u  /* C       */
+#define PCRE2_AUTO_CALLOUT        0x00000004u  /* C       */
+#define PCRE2_CASELESS            0x00000008u  /* C       */
+#define PCRE2_DOLLAR_ENDONLY      0x00000010u  /*   J M D */
+#define PCRE2_DOTALL              0x00000020u  /* C       */
+#define PCRE2_DUPNAMES            0x00000040u  /* C       */
+#define PCRE2_EXTENDED            0x00000080u  /* C       */
+#define PCRE2_FIRSTLINE           0x00000100u  /*   J M D */
+#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u  /* C J M   */
+#define PCRE2_MULTILINE           0x00000400u  /* C       */
+#define PCRE2_NEVER_UCP           0x00000800u  /* C       */
+#define PCRE2_NEVER_UTF           0x00001000u  /* C       */
+#define PCRE2_NO_AUTO_CAPTURE     0x00002000u  /* C       */
+#define PCRE2_NO_AUTO_POSSESS     0x00004000u  /* C       */
+#define PCRE2_NO_DOTSTAR_ANCHOR   0x00008000u  /* C       */
+#define PCRE2_NO_START_OPTIMIZE   0x00010000u  /*   J M D */
+#define PCRE2_UCP                 0x00020000u  /* C J M D */
+#define PCRE2_UNGREEDY            0x00040000u  /* C       */
+#define PCRE2_UTF                 0x00080000u  /* C J M D */
+#define PCRE2_NEVER_BACKSLASH_C   0x00100000u  /* C       */
+#define PCRE2_ALT_CIRCUMFLEX      0x00200000u  /*   J M D */
+#define PCRE2_ALT_VERBNAMES       0x00400000u  /* C       */
+#define PCRE2_USE_OFFSET_LIMIT    0x00800000u  /*   J M D */
+#define PCRE2_EXTENDED_MORE       0x01000000u  /* C       */
+#define PCRE2_LITERAL             0x02000000u  /* C       */
+#define PCRE2_MATCH_INVALID_UTF   0x04000000u  /*   J M D */
+
+/* An additional compile options word is available in the compile context. */
+
+#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  0x00000001u  /* C */
+#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    0x00000002u  /* C */
+#define PCRE2_EXTRA_MATCH_WORD               0x00000004u  /* C */
+#define PCRE2_EXTRA_MATCH_LINE               0x00000008u  /* C */
+#define PCRE2_EXTRA_ESCAPED_CR_IS_LF         0x00000010u  /* C */
+#define PCRE2_EXTRA_ALT_BSUX                 0x00000020u  /* C */
+#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     0x00000040u  /* C */
+
+/* These are for pcre2_jit_compile(). */
+
+#define PCRE2_JIT_COMPLETE        0x00000001u  /* For full matching */
+#define PCRE2_JIT_PARTIAL_SOFT    0x00000002u
+#define PCRE2_JIT_PARTIAL_HARD    0x00000004u
+#define PCRE2_JIT_INVALID_UTF     0x00000100u
+
+/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
+pcre2_substitute(). Some are allowed only for one of the functions, and in
+these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
+PCRE2_NO_UTF_CHECK can also be passed to these functions (though
+pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
+
+#define PCRE2_NOTBOL                      0x00000001u
+#define PCRE2_NOTEOL                      0x00000002u
+#define PCRE2_NOTEMPTY                    0x00000004u  /* ) These two must be kept */
+#define PCRE2_NOTEMPTY_ATSTART            0x00000008u  /* ) adjacent to each other. */
+#define PCRE2_PARTIAL_SOFT                0x00000010u
+#define PCRE2_PARTIAL_HARD                0x00000020u
+#define PCRE2_DFA_RESTART                 0x00000040u  /* pcre2_dfa_match() only */
+#define PCRE2_DFA_SHORTEST                0x00000080u  /* pcre2_dfa_match() only */
+#define PCRE2_SUBSTITUTE_GLOBAL           0x00000100u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_EXTENDED         0x00000200u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_UNSET_EMPTY      0x00000400u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET    0x00000800u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  0x00001000u  /* pcre2_substitute() only */
+#define PCRE2_NO_JIT                      0x00002000u  /* Not for pcre2_dfa_match() */
+#define PCRE2_COPY_MATCHED_SUBJECT        0x00004000u
+#define PCRE2_SUBSTITUTE_LITERAL          0x00008000u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_MATCHED          0x00010000u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u  /* pcre2_substitute() only */
+
+/* Options for pcre2_pattern_convert(). */
+
+#define PCRE2_CONVERT_UTF                    0x00000001u
+#define PCRE2_CONVERT_NO_UTF_CHECK           0x00000002u
+#define PCRE2_CONVERT_POSIX_BASIC            0x00000004u
+#define PCRE2_CONVERT_POSIX_EXTENDED         0x00000008u
+#define PCRE2_CONVERT_GLOB                   0x00000010u
+#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
+#define PCRE2_CONVERT_GLOB_NO_STARSTAR       0x00000050u
+
+/* Newline and \R settings, for use in compile contexts. The newline values
+must be kept in step with values set in config.h and both sets must all be
+greater than zero. */
+
+#define PCRE2_NEWLINE_CR          1
+#define PCRE2_NEWLINE_LF          2
+#define PCRE2_NEWLINE_CRLF        3
+#define PCRE2_NEWLINE_ANY         4
+#define PCRE2_NEWLINE_ANYCRLF     5
+#define PCRE2_NEWLINE_NUL         6
+
+#define PCRE2_BSR_UNICODE         1
+#define PCRE2_BSR_ANYCRLF         2
+
+/* Error codes for pcre2_compile(). Some of these are also used by
+pcre2_pattern_convert(). */
+
+#define PCRE2_ERROR_END_BACKSLASH                  101
+#define PCRE2_ERROR_END_BACKSLASH_C                102
+#define PCRE2_ERROR_UNKNOWN_ESCAPE                 103
+#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER        104
+#define PCRE2_ERROR_QUANTIFIER_TOO_BIG             105
+#define PCRE2_ERROR_MISSING_SQUARE_BRACKET         106
+#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS        107
+#define PCRE2_ERROR_CLASS_RANGE_ORDER              108
+#define PCRE2_ERROR_QUANTIFIER_INVALID             109
+#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT     110
+#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY     111
+#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS       112
+#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING     113
+#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS    114
+#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE       115
+#define PCRE2_ERROR_NULL_PATTERN                   116
+#define PCRE2_ERROR_BAD_OPTIONS                    117
+#define PCRE2_ERROR_MISSING_COMMENT_CLOSING        118
+#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP      119
+#define PCRE2_ERROR_PATTERN_TOO_LARGE              120
+#define PCRE2_ERROR_HEAP_FAILED                    121
+#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS  122
+#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW         123
+#define PCRE2_ERROR_MISSING_CONDITION_CLOSING      124
+#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH    125
+#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE        126
+#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES    127
+#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED   128
+#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE         129
+#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS            130
+#define PCRE2_ERROR_INTERNAL_STUDY_ERROR           131
+#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED          132
+#define PCRE2_ERROR_PARENTHESES_STACK_CHECK        133
+#define PCRE2_ERROR_CODE_POINT_TOO_BIG             134
+#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED     135
+#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
+#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE    137
+#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG         138
+#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING        139
+#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB         140
+#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P     141
+#define PCRE2_ERROR_MISSING_NAME_TERMINATOR        142
+#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME      143
+#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME        144
+#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
+#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY     146
+#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY       147
+#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG       148
+#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS     149
+#define PCRE2_ERROR_CLASS_INVALID_RANGE            150
+#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG             151
+#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE     152
+#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN    153
+#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES       154
+#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE      155
+#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE       156
+#define PCRE2_ERROR_BACKSLASH_G_SYNTAX             157
+#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
+/* Error 159 is obsolete and should now never occur */
+#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED      159
+#define PCRE2_ERROR_VERB_UNKNOWN                   160
+#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG      161
+#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED       162
+#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW       163
+#define PCRE2_ERROR_INVALID_OCTAL                  164
+#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH      165
+#define PCRE2_ERROR_MARK_MISSING_ARGUMENT          166
+#define PCRE2_ERROR_INVALID_HEXADECIMAL            167
+#define PCRE2_ERROR_BACKSLASH_C_SYNTAX             168
+#define PCRE2_ERROR_BACKSLASH_K_SYNTAX             169
+#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS  170
+#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS           171
+#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG        172
+#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT  173
+#define PCRE2_ERROR_UTF_IS_DISABLED                174
+#define PCRE2_ERROR_UCP_IS_DISABLED                175
+#define PCRE2_ERROR_VERB_NAME_TOO_LONG             176
+#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
+#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS    178
+#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX       179
+#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
+#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER    181
+#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER   182
+#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED    183
+#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP      184
+#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED   185
+#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED        186
+#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG            187
+#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG        188
+#define PCRE2_ERROR_INTERNAL_BAD_CODE              189
+#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP      190
+#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16         191
+#define PCRE2_ERROR_BAD_LITERAL_OPTIONS            192
+#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE      193
+#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS      194
+#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN        195
+#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE       196
+#define PCRE2_ERROR_TOO_MANY_CAPTURES              197
+#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED  198
+#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND      199
+
+
+/* "Expected" matching error codes: no match and partial match. */
+
+#define PCRE2_ERROR_NOMATCH          (-1)
+#define PCRE2_ERROR_PARTIAL          (-2)
+
+/* Error codes for UTF-8 validity checks */
+
+#define PCRE2_ERROR_UTF8_ERR1        (-3)
+#define PCRE2_ERROR_UTF8_ERR2        (-4)
+#define PCRE2_ERROR_UTF8_ERR3        (-5)
+#define PCRE2_ERROR_UTF8_ERR4        (-6)
+#define PCRE2_ERROR_UTF8_ERR5        (-7)
+#define PCRE2_ERROR_UTF8_ERR6        (-8)
+#define PCRE2_ERROR_UTF8_ERR7        (-9)
+#define PCRE2_ERROR_UTF8_ERR8       (-10)
+#define PCRE2_ERROR_UTF8_ERR9       (-11)
+#define PCRE2_ERROR_UTF8_ERR10      (-12)
+#define PCRE2_ERROR_UTF8_ERR11      (-13)
+#define PCRE2_ERROR_UTF8_ERR12      (-14)
+#define PCRE2_ERROR_UTF8_ERR13      (-15)
+#define PCRE2_ERROR_UTF8_ERR14      (-16)
+#define PCRE2_ERROR_UTF8_ERR15      (-17)
+#define PCRE2_ERROR_UTF8_ERR16      (-18)
+#define PCRE2_ERROR_UTF8_ERR17      (-19)
+#define PCRE2_ERROR_UTF8_ERR18      (-20)
+#define PCRE2_ERROR_UTF8_ERR19      (-21)
+#define PCRE2_ERROR_UTF8_ERR20      (-22)
+#define PCRE2_ERROR_UTF8_ERR21      (-23)
+
+/* Error codes for UTF-16 validity checks */
+
+#define PCRE2_ERROR_UTF16_ERR1      (-24)
+#define PCRE2_ERROR_UTF16_ERR2      (-25)
+#define PCRE2_ERROR_UTF16_ERR3      (-26)
+
+/* Error codes for UTF-32 validity checks */
+
+#define PCRE2_ERROR_UTF32_ERR1      (-27)
+#define PCRE2_ERROR_UTF32_ERR2      (-28)
+
+/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
+functions, context functions, and serializing functions. They are in numerical
+order. Originally they were in alphabetical order too, but now that PCRE2 is
+released, the numbers must not be changed. */
+
+#define PCRE2_ERROR_BADDATA           (-29)
+#define PCRE2_ERROR_MIXEDTABLES       (-30)  /* Name was changed */
+#define PCRE2_ERROR_BADMAGIC          (-31)
+#define PCRE2_ERROR_BADMODE           (-32)
+#define PCRE2_ERROR_BADOFFSET         (-33)
+#define PCRE2_ERROR_BADOPTION         (-34)
+#define PCRE2_ERROR_BADREPLACEMENT    (-35)
+#define PCRE2_ERROR_BADUTFOFFSET      (-36)
+#define PCRE2_ERROR_CALLOUT           (-37)  /* Never used by PCRE2 itself */
+#define PCRE2_ERROR_DFA_BADRESTART    (-38)
+#define PCRE2_ERROR_DFA_RECURSE       (-39)
+#define PCRE2_ERROR_DFA_UCOND         (-40)
+#define PCRE2_ERROR_DFA_UFUNC         (-41)
+#define PCRE2_ERROR_DFA_UITEM         (-42)
+#define PCRE2_ERROR_DFA_WSSIZE        (-43)
+#define PCRE2_ERROR_INTERNAL          (-44)
+#define PCRE2_ERROR_JIT_BADOPTION     (-45)
+#define PCRE2_ERROR_JIT_STACKLIMIT    (-46)
+#define PCRE2_ERROR_MATCHLIMIT        (-47)
+#define PCRE2_ERROR_NOMEMORY          (-48)
+#define PCRE2_ERROR_NOSUBSTRING       (-49)
+#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
+#define PCRE2_ERROR_NULL              (-51)
+#define PCRE2_ERROR_RECURSELOOP       (-52)
+#define PCRE2_ERROR_DEPTHLIMIT        (-53)
+#define PCRE2_ERROR_RECURSIONLIMIT    (-53)  /* Obsolete synonym */
+#define PCRE2_ERROR_UNAVAILABLE       (-54)
+#define PCRE2_ERROR_UNSET             (-55)
+#define PCRE2_ERROR_BADOFFSETLIMIT    (-56)
+#define PCRE2_ERROR_BADREPESCAPE      (-57)
+#define PCRE2_ERROR_REPMISSINGBRACE   (-58)
+#define PCRE2_ERROR_BADSUBSTITUTION   (-59)
+#define PCRE2_ERROR_BADSUBSPATTERN    (-60)
+#define PCRE2_ERROR_TOOMANYREPLACE    (-61)
+#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
+#define PCRE2_ERROR_HEAPLIMIT         (-63)
+#define PCRE2_ERROR_CONVERT_SYNTAX    (-64)
+#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
+#define PCRE2_ERROR_DFA_UINVALID_UTF  (-66)
+
+
+/* Request types for pcre2_pattern_info() */
+
+#define PCRE2_INFO_ALLOPTIONS            0
+#define PCRE2_INFO_ARGOPTIONS            1
+#define PCRE2_INFO_BACKREFMAX            2
+#define PCRE2_INFO_BSR                   3
+#define PCRE2_INFO_CAPTURECOUNT          4
+#define PCRE2_INFO_FIRSTCODEUNIT         5
+#define PCRE2_INFO_FIRSTCODETYPE         6
+#define PCRE2_INFO_FIRSTBITMAP           7
+#define PCRE2_INFO_HASCRORLF             8
+#define PCRE2_INFO_JCHANGED              9
+#define PCRE2_INFO_JITSIZE              10
+#define PCRE2_INFO_LASTCODEUNIT         11
+#define PCRE2_INFO_LASTCODETYPE         12
+#define PCRE2_INFO_MATCHEMPTY           13
+#define PCRE2_INFO_MATCHLIMIT           14
+#define PCRE2_INFO_MAXLOOKBEHIND        15
+#define PCRE2_INFO_MINLENGTH            16
+#define PCRE2_INFO_NAMECOUNT            17
+#define PCRE2_INFO_NAMEENTRYSIZE        18
+#define PCRE2_INFO_NAMETABLE            19
+#define PCRE2_INFO_NEWLINE              20
+#define PCRE2_INFO_DEPTHLIMIT           21
+#define PCRE2_INFO_RECURSIONLIMIT       21  /* Obsolete synonym */
+#define PCRE2_INFO_SIZE                 22
+#define PCRE2_INFO_HASBACKSLASHC        23
+#define PCRE2_INFO_FRAMESIZE            24
+#define PCRE2_INFO_HEAPLIMIT            25
+#define PCRE2_INFO_EXTRAOPTIONS         26
+
+/* Request types for pcre2_config(). */
+
+#define PCRE2_CONFIG_BSR                     0
+#define PCRE2_CONFIG_JIT                     1
+#define PCRE2_CONFIG_JITTARGET               2
+#define PCRE2_CONFIG_LINKSIZE                3
+#define PCRE2_CONFIG_MATCHLIMIT              4
+#define PCRE2_CONFIG_NEWLINE                 5
+#define PCRE2_CONFIG_PARENSLIMIT             6
+#define PCRE2_CONFIG_DEPTHLIMIT              7
+#define PCRE2_CONFIG_RECURSIONLIMIT          7  /* Obsolete synonym */
+#define PCRE2_CONFIG_STACKRECURSE            8  /* Obsolete */
+#define PCRE2_CONFIG_UNICODE                 9
+#define PCRE2_CONFIG_UNICODE_VERSION        10
+#define PCRE2_CONFIG_VERSION                11
+#define PCRE2_CONFIG_HEAPLIMIT              12
+#define PCRE2_CONFIG_NEVER_BACKSLASH_C      13
+#define PCRE2_CONFIG_COMPILED_WIDTHS        14
+#define PCRE2_CONFIG_TABLES_LENGTH          15
+
+
+/* Types for code units in patterns and subject strings. */
+
+typedef uint8_t  PCRE2_UCHAR8;
+typedef uint16_t PCRE2_UCHAR16;
+typedef uint32_t PCRE2_UCHAR32;
+
+typedef const PCRE2_UCHAR8  *PCRE2_SPTR8;
+typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
+typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
+
+/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
+including pattern offsets for errors and subject offsets after a match. We
+define special values to indicate zero-terminated strings and unset offsets in
+the offset vector (ovector). */
+
+#define PCRE2_SIZE            size_t
+#define PCRE2_SIZE_MAX        SIZE_MAX
+#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
+#define PCRE2_UNSET           (~(PCRE2_SIZE)0)
+
+/* Generic types for opaque structures and JIT callback functions. These
+declarations are defined in a macro that is expanded for each width later. */
+
+#define PCRE2_TYPES_LIST \
+struct pcre2_real_general_context; \
+typedef struct pcre2_real_general_context pcre2_general_context; \
+\
+struct pcre2_real_compile_context; \
+typedef struct pcre2_real_compile_context pcre2_compile_context; \
+\
+struct pcre2_real_match_context; \
+typedef struct pcre2_real_match_context pcre2_match_context; \
+\
+struct pcre2_real_convert_context; \
+typedef struct pcre2_real_convert_context pcre2_convert_context; \
+\
+struct pcre2_real_code; \
+typedef struct pcre2_real_code pcre2_code; \
+\
+struct pcre2_real_match_data; \
+typedef struct pcre2_real_match_data pcre2_match_data; \
+\
+struct pcre2_real_jit_stack; \
+typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
+\
+typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
+
+
+/* The structures for passing out data via callout functions. We use structures
+so that new fields can be added on the end in future versions, without changing
+the API of the function, thereby allowing old clients to work without
+modification. Define the generic versions in a macro; the width-specific
+versions are generated from this macro below. */
+
+/* Flags for the callout_flags field. These are cleared after a callout. */
+
+#define PCRE2_CALLOUT_STARTMATCH    0x00000001u  /* Set for each bumpalong */
+#define PCRE2_CALLOUT_BACKTRACK     0x00000002u  /* Set after a backtrack */
+
+#define PCRE2_STRUCTURE_LIST \
+typedef struct pcre2_callout_block { \
+  uint32_t      version;           /* Identifies version of block */ \
+  /* ------------------------ Version 0 ------------------------------- */ \
+  uint32_t      callout_number;    /* Number compiled into pattern */ \
+  uint32_t      capture_top;       /* Max current capture */ \
+  uint32_t      capture_last;      /* Most recently closed capture */ \
+  PCRE2_SIZE   *offset_vector;     /* The offset vector */ \
+  PCRE2_SPTR    mark;              /* Pointer to current mark or NULL */ \
+  PCRE2_SPTR    subject;           /* The subject being matched */ \
+  PCRE2_SIZE    subject_length;    /* The length of the subject */ \
+  PCRE2_SIZE    start_match;       /* Offset to start of this match attempt */ \
+  PCRE2_SIZE    current_position;  /* Where we currently are in the subject */ \
+  PCRE2_SIZE    pattern_position;  /* Offset to next item in the pattern */ \
+  PCRE2_SIZE    next_item_length;  /* Length of next item in the pattern */ \
+  /* ------------------- Added for Version 1 -------------------------- */ \
+  PCRE2_SIZE    callout_string_offset; /* Offset to string within pattern */ \
+  PCRE2_SIZE    callout_string_length; /* Length of string compiled into pattern */ \
+  PCRE2_SPTR    callout_string;    /* String compiled into pattern */ \
+  /* ------------------- Added for Version 2 -------------------------- */ \
+  uint32_t      callout_flags;     /* See above for list */ \
+  /* ------------------------------------------------------------------ */ \
+} pcre2_callout_block; \
+\
+typedef struct pcre2_callout_enumerate_block { \
+  uint32_t      version;           /* Identifies version of block */ \
+  /* ------------------------ Version 0 ------------------------------- */ \
+  PCRE2_SIZE    pattern_position;  /* Offset to next item in the pattern */ \
+  PCRE2_SIZE    next_item_length;  /* Length of next item in the pattern */ \
+  uint32_t      callout_number;    /* Number compiled into pattern */ \
+  PCRE2_SIZE    callout_string_offset; /* Offset to string within pattern */ \
+  PCRE2_SIZE    callout_string_length; /* Length of string compiled into pattern */ \
+  PCRE2_SPTR    callout_string;    /* String compiled into pattern */ \
+  /* ------------------------------------------------------------------ */ \
+} pcre2_callout_enumerate_block; \
+\
+typedef struct pcre2_substitute_callout_block { \
+  uint32_t      version;           /* Identifies version of block */ \
+  /* ------------------------ Version 0 ------------------------------- */ \
+  PCRE2_SPTR    input;             /* Pointer to input subject string */ \
+  PCRE2_SPTR    output;            /* Pointer to output buffer */ \
+  PCRE2_SIZE    output_offsets[2]; /* Changed portion of the output */ \
+  PCRE2_SIZE   *ovector;           /* Pointer to current ovector */ \
+  uint32_t      oveccount;         /* Count of pairs set in ovector */ \
+  uint32_t      subscount;         /* Substitution number */ \
+  /* ------------------------------------------------------------------ */ \
+} pcre2_substitute_callout_block;
+
+
+/* List the generic forms of all other functions in macros, which will be
+expanded for each width below. Start with functions that give general
+information. */
+
+#define PCRE2_GENERAL_INFO_FUNCTIONS \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
+
+
+/* Functions for manipulating contexts. */
+
+#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
+PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
+  pcre2_general_context_copy(pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
+  pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
+    void (*)(void *, void *), void *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_general_context_free(pcre2_general_context *);
+
+#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
+PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
+  pcre2_compile_context_copy(pcre2_compile_context *); \
+PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
+  pcre2_compile_context_create(pcre2_general_context *);\
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_compile_context_free(pcre2_compile_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_newline(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
+    int (*)(uint32_t, void *), void *);
+
+#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
+PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
+  pcre2_match_context_copy(pcre2_match_context *); \
+PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
+  pcre2_match_context_create(pcre2_general_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_match_context_free(pcre2_match_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_callout(pcre2_match_context *, \
+    int (*)(pcre2_callout_block *, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_substitute_callout(pcre2_match_context *, \
+    int (*)(pcre2_substitute_callout_block *, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_recursion_memory_management(pcre2_match_context *, \
+    void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
+
+#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
+PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
+  pcre2_convert_context_copy(pcre2_convert_context *); \
+PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
+  pcre2_convert_context_create(pcre2_general_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_convert_context_free(pcre2_convert_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
+
+
+/* Functions concerned with compiling a pattern to PCRE internal code. */
+
+#define PCRE2_COMPILE_FUNCTIONS \
+PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
+  pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
+    pcre2_compile_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_code_free(pcre2_code *); \
+PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
+  pcre2_code_copy(const pcre2_code *); \
+PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
+  pcre2_code_copy_with_tables(const pcre2_code *);
+
+
+/* Functions that give information about a compiled pattern. */
+
+#define PCRE2_PATTERN_INFO_FUNCTIONS \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_callout_enumerate(const pcre2_code *, \
+    int (*)(pcre2_callout_enumerate_block *, void *), void *);
+
+
+/* Functions for running a match and inspecting the result. */
+
+#define PCRE2_MATCH_FUNCTIONS \
+PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
+  pcre2_match_data_create(uint32_t, pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
+  pcre2_match_data_create_from_pattern(const pcre2_code *, \
+    pcre2_general_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+    uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+    uint32_t, pcre2_match_data *, pcre2_match_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_match_data_free(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
+  pcre2_get_mark(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
+  pcre2_get_match_data_size(pcre2_match_data *); \
+PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
+  pcre2_get_ovector_count(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
+  pcre2_get_ovector_pointer(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
+  pcre2_get_startchar(pcre2_match_data *);
+
+
+/* Convenience functions for handling matched substrings. */
+
+#define PCRE2_SUBSTRING_FUNCTIONS \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
+    PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
+    PCRE2_SIZE *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_substring_free(PCRE2_UCHAR *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
+    PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
+    PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
+    PCRE2_SPTR *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_substring_list_free(PCRE2_SPTR *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
+
+/* Functions for serializing / deserializing compiled patterns. */
+
+#define PCRE2_SERIALIZE_FUNCTIONS \
+PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
+  pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
+    PCRE2_SIZE *, pcre2_general_context *); \
+PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
+  pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
+    pcre2_general_context *); \
+PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
+  pcre2_serialize_get_number_of_codes(const uint8_t *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_serialize_free(uint8_t *);
+
+
+/* Convenience function for match + substitute. */
+
+#define PCRE2_SUBSTITUTE_FUNCTION \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+    uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
+    PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
+
+
+/* Functions for converting pattern source strings. */
+
+#define PCRE2_CONVERT_FUNCTIONS \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
+    PCRE2_SIZE *, pcre2_convert_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_converted_pattern_free(PCRE2_UCHAR *);
+
+
+/* Functions for JIT processing */
+
+#define PCRE2_JIT_FUNCTIONS \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_jit_compile(pcre2_code *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+    uint32_t, pcre2_match_data *, pcre2_match_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_jit_free_unused_memory(pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
+  pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_jit_stack_free(pcre2_jit_stack *);
+
+
+/* Other miscellaneous functions. */
+
+#define PCRE2_OTHER_FUNCTIONS \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
+PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
+  pcre2_maketables(pcre2_general_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
+
+/* Define macros that generate width-specific names from generic versions. The
+three-level macro scheme is necessary to get the macros expanded when we want
+them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
+generating three versions of everything below. After that, PCRE2_SUFFIX will be
+re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
+pcre2_compile are called by application code. */
+
+#define PCRE2_JOIN(a,b) a ## b
+#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
+#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
+
+
+/* Data types */
+
+#define PCRE2_UCHAR                 PCRE2_SUFFIX(PCRE2_UCHAR)
+#define PCRE2_SPTR                  PCRE2_SUFFIX(PCRE2_SPTR)
+
+#define pcre2_code                  PCRE2_SUFFIX(pcre2_code_)
+#define pcre2_jit_callback          PCRE2_SUFFIX(pcre2_jit_callback_)
+#define pcre2_jit_stack             PCRE2_SUFFIX(pcre2_jit_stack_)
+
+#define pcre2_real_code             PCRE2_SUFFIX(pcre2_real_code_)
+#define pcre2_real_general_context  PCRE2_SUFFIX(pcre2_real_general_context_)
+#define pcre2_real_compile_context  PCRE2_SUFFIX(pcre2_real_compile_context_)
+#define pcre2_real_convert_context  PCRE2_SUFFIX(pcre2_real_convert_context_)
+#define pcre2_real_match_context    PCRE2_SUFFIX(pcre2_real_match_context_)
+#define pcre2_real_jit_stack        PCRE2_SUFFIX(pcre2_real_jit_stack_)
+#define pcre2_real_match_data       PCRE2_SUFFIX(pcre2_real_match_data_)
+
+
+/* Data blocks */
+
+#define pcre2_callout_block            PCRE2_SUFFIX(pcre2_callout_block_)
+#define pcre2_callout_enumerate_block  PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
+#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
+#define pcre2_general_context          PCRE2_SUFFIX(pcre2_general_context_)
+#define pcre2_compile_context          PCRE2_SUFFIX(pcre2_compile_context_)
+#define pcre2_convert_context          PCRE2_SUFFIX(pcre2_convert_context_)
+#define pcre2_match_context            PCRE2_SUFFIX(pcre2_match_context_)
+#define pcre2_match_data               PCRE2_SUFFIX(pcre2_match_data_)
+
+
+/* Functions: the complete list in alphabetical order */
+
+#define pcre2_callout_enumerate               PCRE2_SUFFIX(pcre2_callout_enumerate_)
+#define pcre2_code_copy                       PCRE2_SUFFIX(pcre2_code_copy_)
+#define pcre2_code_copy_with_tables           PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
+#define pcre2_code_free                       PCRE2_SUFFIX(pcre2_code_free_)
+#define pcre2_compile                         PCRE2_SUFFIX(pcre2_compile_)
+#define pcre2_compile_context_copy            PCRE2_SUFFIX(pcre2_compile_context_copy_)
+#define pcre2_compile_context_create          PCRE2_SUFFIX(pcre2_compile_context_create_)
+#define pcre2_compile_context_free            PCRE2_SUFFIX(pcre2_compile_context_free_)
+#define pcre2_config                          PCRE2_SUFFIX(pcre2_config_)
+#define pcre2_convert_context_copy            PCRE2_SUFFIX(pcre2_convert_context_copy_)
+#define pcre2_convert_context_create          PCRE2_SUFFIX(pcre2_convert_context_create_)
+#define pcre2_convert_context_free            PCRE2_SUFFIX(pcre2_convert_context_free_)
+#define pcre2_converted_pattern_free          PCRE2_SUFFIX(pcre2_converted_pattern_free_)
+#define pcre2_dfa_match                       PCRE2_SUFFIX(pcre2_dfa_match_)
+#define pcre2_general_context_copy            PCRE2_SUFFIX(pcre2_general_context_copy_)
+#define pcre2_general_context_create          PCRE2_SUFFIX(pcre2_general_context_create_)
+#define pcre2_general_context_free            PCRE2_SUFFIX(pcre2_general_context_free_)
+#define pcre2_get_error_message               PCRE2_SUFFIX(pcre2_get_error_message_)
+#define pcre2_get_mark                        PCRE2_SUFFIX(pcre2_get_mark_)
+#define pcre2_get_match_data_size             PCRE2_SUFFIX(pcre2_get_match_data_size_)
+#define pcre2_get_ovector_pointer             PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
+#define pcre2_get_ovector_count               PCRE2_SUFFIX(pcre2_get_ovector_count_)
+#define pcre2_get_startchar                   PCRE2_SUFFIX(pcre2_get_startchar_)
+#define pcre2_jit_compile                     PCRE2_SUFFIX(pcre2_jit_compile_)
+#define pcre2_jit_match                       PCRE2_SUFFIX(pcre2_jit_match_)
+#define pcre2_jit_free_unused_memory          PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
+#define pcre2_jit_stack_assign                PCRE2_SUFFIX(pcre2_jit_stack_assign_)
+#define pcre2_jit_stack_create                PCRE2_SUFFIX(pcre2_jit_stack_create_)
+#define pcre2_jit_stack_free                  PCRE2_SUFFIX(pcre2_jit_stack_free_)
+#define pcre2_maketables                      PCRE2_SUFFIX(pcre2_maketables_)
+#define pcre2_maketables_free                 PCRE2_SUFFIX(pcre2_maketables_free_)
+#define pcre2_match                           PCRE2_SUFFIX(pcre2_match_)
+#define pcre2_match_context_copy              PCRE2_SUFFIX(pcre2_match_context_copy_)
+#define pcre2_match_context_create            PCRE2_SUFFIX(pcre2_match_context_create_)
+#define pcre2_match_context_free              PCRE2_SUFFIX(pcre2_match_context_free_)
+#define pcre2_match_data_create               PCRE2_SUFFIX(pcre2_match_data_create_)
+#define pcre2_match_data_create_from_pattern  PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
+#define pcre2_match_data_free                 PCRE2_SUFFIX(pcre2_match_data_free_)
+#define pcre2_pattern_convert                 PCRE2_SUFFIX(pcre2_pattern_convert_)
+#define pcre2_pattern_info                    PCRE2_SUFFIX(pcre2_pattern_info_)
+#define pcre2_serialize_decode                PCRE2_SUFFIX(pcre2_serialize_decode_)
+#define pcre2_serialize_encode                PCRE2_SUFFIX(pcre2_serialize_encode_)
+#define pcre2_serialize_free                  PCRE2_SUFFIX(pcre2_serialize_free_)
+#define pcre2_serialize_get_number_of_codes   PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
+#define pcre2_set_bsr                         PCRE2_SUFFIX(pcre2_set_bsr_)
+#define pcre2_set_callout                     PCRE2_SUFFIX(pcre2_set_callout_)
+#define pcre2_set_character_tables            PCRE2_SUFFIX(pcre2_set_character_tables_)
+#define pcre2_set_compile_extra_options       PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
+#define pcre2_set_compile_recursion_guard     PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
+#define pcre2_set_depth_limit                 PCRE2_SUFFIX(pcre2_set_depth_limit_)
+#define pcre2_set_glob_escape                 PCRE2_SUFFIX(pcre2_set_glob_escape_)
+#define pcre2_set_glob_separator              PCRE2_SUFFIX(pcre2_set_glob_separator_)
+#define pcre2_set_heap_limit                  PCRE2_SUFFIX(pcre2_set_heap_limit_)
+#define pcre2_set_match_limit                 PCRE2_SUFFIX(pcre2_set_match_limit_)
+#define pcre2_set_max_pattern_length          PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
+#define pcre2_set_newline                     PCRE2_SUFFIX(pcre2_set_newline_)
+#define pcre2_set_parens_nest_limit           PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
+#define pcre2_set_offset_limit                PCRE2_SUFFIX(pcre2_set_offset_limit_)
+#define pcre2_set_substitute_callout          PCRE2_SUFFIX(pcre2_set_substitute_callout_)
+#define pcre2_substitute                      PCRE2_SUFFIX(pcre2_substitute_)
+#define pcre2_substring_copy_byname           PCRE2_SUFFIX(pcre2_substring_copy_byname_)
+#define pcre2_substring_copy_bynumber         PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
+#define pcre2_substring_free                  PCRE2_SUFFIX(pcre2_substring_free_)
+#define pcre2_substring_get_byname            PCRE2_SUFFIX(pcre2_substring_get_byname_)
+#define pcre2_substring_get_bynumber          PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
+#define pcre2_substring_length_byname         PCRE2_SUFFIX(pcre2_substring_length_byname_)
+#define pcre2_substring_length_bynumber       PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
+#define pcre2_substring_list_get              PCRE2_SUFFIX(pcre2_substring_list_get_)
+#define pcre2_substring_list_free             PCRE2_SUFFIX(pcre2_substring_list_free_)
+#define pcre2_substring_nametable_scan        PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
+#define pcre2_substring_number_from_name      PCRE2_SUFFIX(pcre2_substring_number_from_name_)
+
+/* Keep this old function name for backwards compatibility */
+#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
+
+/* Keep this obsolete function for backwards compatibility: it is now a noop. */
+#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
+
+/* Now generate all three sets of width-specific structures and function
+prototypes. */
+
+#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
+PCRE2_TYPES_LIST \
+PCRE2_STRUCTURE_LIST \
+PCRE2_GENERAL_INFO_FUNCTIONS \
+PCRE2_GENERAL_CONTEXT_FUNCTIONS \
+PCRE2_COMPILE_CONTEXT_FUNCTIONS \
+PCRE2_CONVERT_CONTEXT_FUNCTIONS \
+PCRE2_CONVERT_FUNCTIONS \
+PCRE2_MATCH_CONTEXT_FUNCTIONS \
+PCRE2_COMPILE_FUNCTIONS \
+PCRE2_PATTERN_INFO_FUNCTIONS \
+PCRE2_MATCH_FUNCTIONS \
+PCRE2_SUBSTRING_FUNCTIONS \
+PCRE2_SERIALIZE_FUNCTIONS \
+PCRE2_SUBSTITUTE_FUNCTION \
+PCRE2_JIT_FUNCTIONS \
+PCRE2_OTHER_FUNCTIONS
+
+#define PCRE2_LOCAL_WIDTH 8
+PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
+#undef PCRE2_LOCAL_WIDTH
+
+#define PCRE2_LOCAL_WIDTH 16
+PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
+#undef PCRE2_LOCAL_WIDTH
+
+#define PCRE2_LOCAL_WIDTH 32
+PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
+#undef PCRE2_LOCAL_WIDTH
+
+/* Undefine the list macros; they are no longer needed. */
+
+#undef PCRE2_TYPES_LIST
+#undef PCRE2_STRUCTURE_LIST
+#undef PCRE2_GENERAL_INFO_FUNCTIONS
+#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
+#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
+#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
+#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
+#undef PCRE2_COMPILE_FUNCTIONS
+#undef PCRE2_PATTERN_INFO_FUNCTIONS
+#undef PCRE2_MATCH_FUNCTIONS
+#undef PCRE2_SUBSTRING_FUNCTIONS
+#undef PCRE2_SERIALIZE_FUNCTIONS
+#undef PCRE2_SUBSTITUTE_FUNCTION
+#undef PCRE2_JIT_FUNCTIONS
+#undef PCRE2_OTHER_FUNCTIONS
+#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
+
+/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
+PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
+PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
+
+#undef PCRE2_SUFFIX
+#ifndef PCRE2_CODE_UNIT_WIDTH
+#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
+#error Use 8, 16, or 32; or 0 for a multi-width application.
+#else  /* PCRE2_CODE_UNIT_WIDTH is defined */
+#if PCRE2_CODE_UNIT_WIDTH == 8 || \
+    PCRE2_CODE_UNIT_WIDTH == 16 || \
+    PCRE2_CODE_UNIT_WIDTH == 32
+#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
+#elif PCRE2_CODE_UNIT_WIDTH == 0
+#undef PCRE2_JOIN
+#undef PCRE2_GLUE
+#define PCRE2_SUFFIX(a) a
+#else
+#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
+#endif
+#endif  /* PCRE2_CODE_UNIT_WIDTH is defined */
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* PCRE2_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2.h */
--- a/third_party/pcre/pcre2_auto_possess.c
+++ b/third_party/pcre/pcre2_auto_possess.c
--- a/third_party/pcre/pcre2_chartables.c
+++ b/third_party/pcre/pcre2_chartables.c
@ -0,0 +1,202 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* This file was automatically written by the pcre2_dftables auxiliary
+program. It contains character tables that are used when no external
+tables are passed to PCRE2 by the application that calls it. The tables
+are used only for characters whose code values are less than 256. */
+
+/* This set of tables was written in the C locale. */
+
+/* The pcre2_ftables program (which is distributed with PCRE2) can be used
+to build alternative versions of this file. This is necessary if you are
+running in an EBCDIC environment, or if you want to default to a different
+encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
+these tables in the "C" locale by default. This happens automatically if
+PCRE2 is configured with --enable-rebuild-chartables. However, you can run
+pcre2_dftables manually with the -L option to build tables using the LC_ALL
+locale. */
+
+/* The following #include is present because without it gcc 4.x may remove
+the array definition from the final binary if PCRE2 is built into a static
+library and dead code stripping is activated. This leads to link errors.
+Pulling in the header ensures that the array gets flagged as "someone
+outside this compilation unit might reference this" and so it will always
+be supplied to the linker. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+const uint8_t PRIV(default_tables)[] = {
+
+/* This table is a lower casing table. */
+
+    0,  1,  2,  3,  4,  5,  6,  7,
+    8,  9, 10, 11, 12, 13, 14, 15,
+   16, 17, 18, 19, 20, 21, 22, 23,
+   24, 25, 26, 27, 28, 29, 30, 31,
+   32, 33, 34, 35, 36, 37, 38, 39,
+   40, 41, 42, 43, 44, 45, 46, 47,
+   48, 49, 50, 51, 52, 53, 54, 55,
+   56, 57, 58, 59, 60, 61, 62, 63,
+   64, 97, 98, 99,100,101,102,103,
+  104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,
+  120,121,122, 91, 92, 93, 94, 95,
+   96, 97, 98, 99,100,101,102,103,
+  104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,
+  120,121,122,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,
+  136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,
+  152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,
+  168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,
+  184,185,186,187,188,189,190,191,
+  192,193,194,195,196,197,198,199,
+  200,201,202,203,204,205,206,207,
+  208,209,210,211,212,213,214,215,
+  216,217,218,219,220,221,222,223,
+  224,225,226,227,228,229,230,231,
+  232,233,234,235,236,237,238,239,
+  240,241,242,243,244,245,246,247,
+  248,249,250,251,252,253,254,255,
+
+/* This table is a case flipping table. */
+
+    0,  1,  2,  3,  4,  5,  6,  7,
+    8,  9, 10, 11, 12, 13, 14, 15,
+   16, 17, 18, 19, 20, 21, 22, 23,
+   24, 25, 26, 27, 28, 29, 30, 31,
+   32, 33, 34, 35, 36, 37, 38, 39,
+   40, 41, 42, 43, 44, 45, 46, 47,
+   48, 49, 50, 51, 52, 53, 54, 55,
+   56, 57, 58, 59, 60, 61, 62, 63,
+   64, 97, 98, 99,100,101,102,103,
+  104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,
+  120,121,122, 91, 92, 93, 94, 95,
+   96, 65, 66, 67, 68, 69, 70, 71,
+   72, 73, 74, 75, 76, 77, 78, 79,
+   80, 81, 82, 83, 84, 85, 86, 87,
+   88, 89, 90,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,
+  136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,
+  152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,
+  168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,
+  184,185,186,187,188,189,190,191,
+  192,193,194,195,196,197,198,199,
+  200,201,202,203,204,205,206,207,
+  208,209,210,211,212,213,214,215,
+  216,217,218,219,220,221,222,223,
+  224,225,226,227,228,229,230,231,
+  232,233,234,235,236,237,238,239,
+  240,241,242,243,244,245,246,247,
+  248,249,250,251,252,253,254,255,
+
+/* This table contains bit maps for various character classes. Each map is 32
+bytes long and the bits run from the least significant end of each byte. The
+classes that have their own maps are: space, xdigit, digit, upper, lower, word,
+graph, print, punct, and cntrl. Other classes are built from combinations. */
+
+  0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,  /* space */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,  /* xdigit */
+  0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,  /* digit */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,  /* upper */
+  0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,  /* lower */
+  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,  /* word */
+  0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,  /* graph */
+  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,  /* print */
+  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,  /* punct */
+  0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+  0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,  /* cntrl */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+
+/* This table identifies various classes of character by individual bits:
+  0x01   white space character
+  0x02   letter
+  0x04   lower case letter
+  0x08   decimal digit
+  0x10   alphanumeric or '_'
+*/
+
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
+  0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
+  0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */
+  0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /*  0 - 7  */
+  0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */
+  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  @ - G  */
+  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
+  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
+  0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /*  X - _  */
+  0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /*  ` - g  */
+  0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /*  h - o  */
+  0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /*  p - w  */
+  0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /*  x -127 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
+
+/* End of pcre2_chartables.c */
--- a/third_party/pcre/pcre2_compile.c
+++ b/third_party/pcre/pcre2_compile.c
--- a/third_party/pcre/pcre2_config.c
+++ b/third_party/pcre/pcre2_config.c
@ -0,0 +1,252 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
+its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
+be in code units. */
+
+static int configured_link_size = LINK_SIZE;
+
+#include "pcre2_internal.h"
+
+/* These macros are the standard way of turning unquoted text into C strings.
+They allow macros like PCRE2_MAJOR to be defined without quotes, which is
+convenient for user programs that want to test their values. */
+
+#define STRING(a)  # a
+#define XSTRING(s) STRING(s)
+
+
+/*************************************************
+* Return info about what features are configured *
+*************************************************/
+
+/* If where is NULL, the length of memory required is returned.
+
+Arguments:
+  what             what information is required
+  where            where to put the information
+
+Returns:           0 if a numerical value is returned
+                   >= 0 if a string value
+                   PCRE2_ERROR_BADOPTION if "where" not recognized
+                     or JIT target requested when JIT not enabled
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_config(uint32_t what, void *where)
+{
+if (where == NULL)  /* Requests a length */
+  {
+  switch(what)
+    {
+    default:
+    return PCRE2_ERROR_BADOPTION;
+
+    case PCRE2_CONFIG_BSR:
+    case PCRE2_CONFIG_COMPILED_WIDTHS:
+    case PCRE2_CONFIG_DEPTHLIMIT:
+    case PCRE2_CONFIG_HEAPLIMIT:
+    case PCRE2_CONFIG_JIT:
+    case PCRE2_CONFIG_LINKSIZE:
+    case PCRE2_CONFIG_MATCHLIMIT:
+    case PCRE2_CONFIG_NEVER_BACKSLASH_C:
+    case PCRE2_CONFIG_NEWLINE:
+    case PCRE2_CONFIG_PARENSLIMIT:
+    case PCRE2_CONFIG_STACKRECURSE:    /* Obsolete */
+    case PCRE2_CONFIG_TABLES_LENGTH:
+    case PCRE2_CONFIG_UNICODE:
+    return sizeof(uint32_t);
+
+    /* These are handled below */
+
+    case PCRE2_CONFIG_JITTARGET:
+    case PCRE2_CONFIG_UNICODE_VERSION:
+    case PCRE2_CONFIG_VERSION:
+    break;
+    }
+  }
+
+switch (what)
+  {
+  default:
+  return PCRE2_ERROR_BADOPTION;
+
+  case PCRE2_CONFIG_BSR:
+#ifdef BSR_ANYCRLF
+  *((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
+#else
+  *((uint32_t *)where) = PCRE2_BSR_UNICODE;
+#endif
+  break;
+
+  case PCRE2_CONFIG_COMPILED_WIDTHS:
+  *((uint32_t *)where) = 0
+#ifdef SUPPORT_PCRE2_8
+  + 1
+#endif
+#ifdef SUPPORT_PCRE2_16
+  + 2
+#endif
+#ifdef SUPPORT_PCRE2_32
+  + 4
+#endif
+  ;
+  break;
+
+  case PCRE2_CONFIG_DEPTHLIMIT:
+  *((uint32_t *)where) = MATCH_LIMIT_DEPTH;
+  break;
+
+  case PCRE2_CONFIG_HEAPLIMIT:
+  *((uint32_t *)where) = HEAP_LIMIT;
+  break;
+
+  case PCRE2_CONFIG_JIT:
+#ifdef SUPPORT_JIT
+  *((uint32_t *)where) = 1;
+#else
+  *((uint32_t *)where) = 0;
+#endif
+  break;
+
+  case PCRE2_CONFIG_JITTARGET:
+#ifdef SUPPORT_JIT
+    {
+    const char *v = PRIV(jit_get_target)();
+    return (int)(1 + ((where == NULL)?
+      strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
+    }
+#else
+  return PCRE2_ERROR_BADOPTION;
+#endif
+
+  case PCRE2_CONFIG_LINKSIZE:
+  *((uint32_t *)where) = (uint32_t)configured_link_size;
+  break;
+
+  case PCRE2_CONFIG_MATCHLIMIT:
+  *((uint32_t *)where) = MATCH_LIMIT;
+  break;
+
+  case PCRE2_CONFIG_NEWLINE:
+  *((uint32_t *)where) = NEWLINE_DEFAULT;
+  break;
+
+  case PCRE2_CONFIG_NEVER_BACKSLASH_C:
+#ifdef NEVER_BACKSLASH_C
+  *((uint32_t *)where) = 1;
+#else
+  *((uint32_t *)where) = 0;
+#endif
+  break;
+
+  case PCRE2_CONFIG_PARENSLIMIT:
+  *((uint32_t *)where) = PARENS_NEST_LIMIT;
+  break;
+
+  /* This is now obsolete. The stack is no longer used via recursion for
+  handling backtracking in pcre2_match(). */
+
+  case PCRE2_CONFIG_STACKRECURSE:
+  *((uint32_t *)where) = 0;
+  break;
+
+  case PCRE2_CONFIG_TABLES_LENGTH:
+  *((uint32_t *)where) = TABLES_LENGTH;
+  break;
+
+  case PCRE2_CONFIG_UNICODE_VERSION:
+    {
+#if defined SUPPORT_UNICODE
+    const char *v = PRIV(unicode_version);
+#else
+    const char *v = "Unicode not supported";
+#endif
+    return (int)(1 + ((where == NULL)?
+      strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
+   }
+  break;
+
+  case PCRE2_CONFIG_UNICODE:
+#if defined SUPPORT_UNICODE
+  *((uint32_t *)where) = 1;
+#else
+  *((uint32_t *)where) = 0;
+#endif
+  break;
+
+  /* The hackery in setting "v" below is to cope with the case when
+  PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
+  If the second alternative is used in this case, it does not leave a space
+  before the date. On the other hand, if all four macros are put into a single
+  XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
+  There are problems using an "obvious" approach like this:
+
+     XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
+     XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
+
+  because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
+  of STRING(). The C standard states: "If (before argument substitution) any
+  argument consists of no preprocessing tokens, the behavior is undefined." It
+  turns out the gcc treats this case as a single empty string - which is what
+  we really want - but Visual C grumbles about the lack of an argument for the
+  macro. Unfortunately, both are within their rights. As there seems to be no
+  way to test for a macro's value being empty at compile time, we have to
+  resort to a runtime test. */
+
+  case PCRE2_CONFIG_VERSION:
+    {
+    const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
+      XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
+      XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
+    return (int)(1 + ((where == NULL)?
+      strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
+    }
+  }
+
+return 0;
+}
+
+/* End of pcre2_config.c */
--- a/third_party/pcre/pcre2_context.c
+++ b/third_party/pcre/pcre2_context.c
@ -0,0 +1,494 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+
+/*************************************************
+*          Default malloc/free functions         *
+*************************************************/
+
+/* Ignore the "user data" argument in each case. */
+
+static void *default_malloc(size_t size, void *data)
+{
+(void)data;
+return malloc(size);
+}
+
+
+static void default_free(void *block, void *data)
+{
+(void)data;
+free(block);
+}
+
+
+
+/*************************************************
+*        Get a block and save memory control     *
+*************************************************/
+
+/* This internal function is called to get a block of memory in which the
+memory control data is to be stored at the start for future use.
+
+Arguments:
+  size        amount of memory required
+  memctl      pointer to a memctl block or NULL
+
+Returns:      pointer to memory or NULL on failure
+*/
+
+extern void *
+PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
+{
+pcre2_memctl *newmemctl;
+void *yield = (memctl == NULL)? malloc(size) :
+  memctl->malloc(size, memctl->memory_data);
+if (yield == NULL) return NULL;
+newmemctl = (pcre2_memctl *)yield;
+if (memctl == NULL)
+  {
+  newmemctl->malloc = default_malloc;
+  newmemctl->free = default_free;
+  newmemctl->memory_data = NULL;
+  }
+else *newmemctl = *memctl;
+return yield;
+}
+
+
+
+/*************************************************
+*          Create and initialize contexts        *
+*************************************************/
+
+/* Initializing for compile and match contexts is done in separate, private
+functions so that these can be called from functions such as pcre2_compile()
+when an external context is not supplied. The initializing functions have an
+option to set up default memory management. */
+
+PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
+pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
+  void (*private_free)(void *, void *), void *memory_data)
+{
+pcre2_general_context *gcontext;
+if (private_malloc == NULL) private_malloc = default_malloc;
+if (private_free == NULL) private_free = default_free;
+gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
+if (gcontext == NULL) return NULL;
+gcontext->memctl.malloc = private_malloc;
+gcontext->memctl.free = private_free;
+gcontext->memctl.memory_data = memory_data;
+return gcontext;
+}
+
+
+/* A default compile context is set up to save having to initialize at run time
+when no context is supplied to the compile function. */
+
+const pcre2_compile_context PRIV(default_compile_context) = {
+  { default_malloc, default_free, NULL },    /* Default memory handling */
+  NULL,                                      /* Stack guard */
+  NULL,                                      /* Stack guard data */
+  PRIV(default_tables),                      /* Character tables */
+  PCRE2_UNSET,                               /* Max pattern length */
+  BSR_DEFAULT,                               /* Backslash R default */
+  NEWLINE_DEFAULT,                           /* Newline convention */
+  PARENS_NEST_LIMIT,                         /* As it says */
+  0 };                                       /* Extra options */
+
+/* The create function copies the default into the new memory, but must
+override the default memory handling functions if a gcontext was provided. */
+
+PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
+pcre2_compile_context_create(pcre2_general_context *gcontext)
+{
+pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
+  sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
+if (ccontext == NULL) return NULL;
+*ccontext = PRIV(default_compile_context);
+if (gcontext != NULL)
+  *((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
+return ccontext;
+}
+
+
+/* A default match context is set up to save having to initialize at run time
+when no context is supplied to a match function. */
+
+const pcre2_match_context PRIV(default_match_context) = {
+  { default_malloc, default_free, NULL },
+#ifdef SUPPORT_JIT
+  NULL,          /* JIT callback */
+  NULL,          /* JIT callback data */
+#endif
+  NULL,          /* Callout function */
+  NULL,          /* Callout data */
+  NULL,          /* Substitute callout function */
+  NULL,          /* Substitute callout data */
+  PCRE2_UNSET,   /* Offset limit */
+  HEAP_LIMIT,
+  MATCH_LIMIT,
+  MATCH_LIMIT_DEPTH };
+
+/* The create function copies the default into the new memory, but must
+override the default memory handling functions if a gcontext was provided. */
+
+PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
+pcre2_match_context_create(pcre2_general_context *gcontext)
+{
+pcre2_match_context *mcontext = PRIV(memctl_malloc)(
+  sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
+if (mcontext == NULL) return NULL;
+*mcontext = PRIV(default_match_context);
+if (gcontext != NULL)
+  *((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
+return mcontext;
+}
+
+
+/* A default convert context is set up to save having to initialize at run time
+when no context is supplied to the convert function. */
+
+const pcre2_convert_context PRIV(default_convert_context) = {
+  { default_malloc, default_free, NULL },    /* Default memory handling */
+#ifdef _WIN32
+  CHAR_BACKSLASH,                            /* Default path separator */
+  CHAR_GRAVE_ACCENT                          /* Default escape character */
+#else  /* Not Windows */
+  CHAR_SLASH,                                /* Default path separator */
+  CHAR_BACKSLASH                             /* Default escape character */
+#endif
+  };
+
+/* The create function copies the default into the new memory, but must
+override the default memory handling functions if a gcontext was provided. */
+
+PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
+pcre2_convert_context_create(pcre2_general_context *gcontext)
+{
+pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
+  sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
+if (ccontext == NULL) return NULL;
+*ccontext = PRIV(default_convert_context);
+if (gcontext != NULL)
+  *((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
+return ccontext;
+}
+
+
+/*************************************************
+*              Context copy functions            *
+*************************************************/
+
+PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
+pcre2_general_context_copy(pcre2_general_context *gcontext)
+{
+pcre2_general_context *new =
+  gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
+  gcontext->memctl.memory_data);
+if (new == NULL) return NULL;
+memcpy(new, gcontext, sizeof(pcre2_real_general_context));
+return new;
+}
+
+
+PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
+pcre2_compile_context_copy(pcre2_compile_context *ccontext)
+{
+pcre2_compile_context *new =
+  ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
+  ccontext->memctl.memory_data);
+if (new == NULL) return NULL;
+memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
+return new;
+}
+
+
+PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
+pcre2_match_context_copy(pcre2_match_context *mcontext)
+{
+pcre2_match_context *new =
+  mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
+  mcontext->memctl.memory_data);
+if (new == NULL) return NULL;
+memcpy(new, mcontext, sizeof(pcre2_real_match_context));
+return new;
+}
+
+
+
+PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
+pcre2_convert_context_copy(pcre2_convert_context *ccontext)
+{
+pcre2_convert_context *new =
+  ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
+  ccontext->memctl.memory_data);
+if (new == NULL) return NULL;
+memcpy(new, ccontext, sizeof(pcre2_real_convert_context));
+return new;
+}
+
+
+/*************************************************
+*              Context free functions            *
+*************************************************/
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_general_context_free(pcre2_general_context *gcontext)
+{
+if (gcontext != NULL)
+  gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
+}
+
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_compile_context_free(pcre2_compile_context *ccontext)
+{
+if (ccontext != NULL)
+  ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
+}
+
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_match_context_free(pcre2_match_context *mcontext)
+{
+if (mcontext != NULL)
+  mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
+}
+
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_convert_context_free(pcre2_convert_context *ccontext)
+{
+if (ccontext != NULL)
+  ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
+}
+
+
+/*************************************************
+*             Set values in contexts             *
+*************************************************/
+
+/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
+data is given. Only some of the functions are able to test the validity of the
+data. */
+
+
+/* ------------ Compile context ------------ */
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_character_tables(pcre2_compile_context *ccontext,
+  const uint8_t *tables)
+{
+ccontext->tables = tables;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
+{
+switch(value)
+  {
+  case PCRE2_BSR_ANYCRLF:
+  case PCRE2_BSR_UNICODE:
+  ccontext->bsr_convention = value;
+  return 0;
+
+  default:
+  return PCRE2_ERROR_BADDATA;
+  }
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
+{
+ccontext->max_pattern_length = length;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
+{
+switch(newline)
+  {
+  case PCRE2_NEWLINE_CR:
+  case PCRE2_NEWLINE_LF:
+  case PCRE2_NEWLINE_CRLF:
+  case PCRE2_NEWLINE_ANY:
+  case PCRE2_NEWLINE_ANYCRLF:
+  case PCRE2_NEWLINE_NUL:
+  ccontext->newline_convention = newline;
+  return 0;
+
+  default:
+  return PCRE2_ERROR_BADDATA;
+  }
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
+{
+ccontext->parens_nest_limit = limit;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
+{
+ccontext->extra_options = options;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
+  int (*guard)(uint32_t, void *), void *user_data)
+{
+ccontext->stack_guard = guard;
+ccontext->stack_guard_data = user_data;
+return 0;
+}
+
+
+/* ------------ Match context ------------ */
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_callout(pcre2_match_context *mcontext,
+  int (*callout)(pcre2_callout_block *, void *), void *callout_data)
+{
+mcontext->callout = callout;
+mcontext->callout_data = callout_data;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_substitute_callout(pcre2_match_context *mcontext,
+  int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
+    void *substitute_callout_data)
+{
+mcontext->substitute_callout = substitute_callout;
+mcontext->substitute_callout_data = substitute_callout_data;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
+{
+mcontext->heap_limit = limit;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
+{
+mcontext->match_limit = limit;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
+{
+mcontext->depth_limit = limit;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
+{
+mcontext->offset_limit = limit;
+return 0;
+}
+
+/* These functions became obsolete at release 10.30. The first is kept as a
+synonym for backwards compatibility. The second now does nothing. Exclude both
+from coverage reports. */
+
+/* LCOV_EXCL_START */
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
+{
+return pcre2_set_depth_limit(mcontext, limit);
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
+  void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
+  void *mydata)
+{
+(void)mcontext;
+(void)mymalloc;
+(void)myfree;
+(void)mydata;
+return 0;
+}
+
+/* LCOV_EXCL_STOP */
+
+
+/* ------------ Convert context ------------ */
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
+{
+if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
+    separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
+ccontext->glob_separator = separator;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
+{
+if (escape > 255 || (escape != 0 && !ispunct(escape)))
+  return PCRE2_ERROR_BADDATA;
+ccontext->glob_escape = escape;
+return 0;
+}
+
+/* End of pcre2_context.c */
+
--- a/third_party/pcre/pcre2_convert.c
+++ b/third_party/pcre/pcre2_convert.c
--- a/third_party/pcre/pcre2_dfa_match.c
+++ b/third_party/pcre/pcre2_dfa_match.c
--- a/third_party/pcre/pcre2_error.c
+++ b/third_party/pcre/pcre2_error.c
@ -0,0 +1,341 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+#define STRING(a)  # a
+#define XSTRING(s) STRING(s)
+
+/* The texts of compile-time error messages. Compile-time error numbers start
+at COMPILE_ERROR_BASE (100).
+
+This used to be a table of strings, but in order to reduce the number of
+relocations needed when a shared library is loaded dynamically, it is now one
+long string. We cannot use a table of offsets, because the lengths of inserts
+such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
+pcre2_get_error_message() counts through to the one it wants - this isn't a
+performance issue because these strings are used only when there is an error.
+
+Each substring ends with \0 to insert a null character. This includes the final
+substring, so that the whole string ends with \0\0, which can be detected when
+counting through. */
+
+static const unsigned char compile_error_texts[] =
+  "no error\0"
+  "\\ at end of pattern\0"
+  "\\c at end of pattern\0"
+  "unrecognized character follows \\\0"
+  "numbers out of order in {} quantifier\0"
+  /* 5 */
+  "number too big in {} quantifier\0"
+  "missing terminating ] for character class\0"
+  "escape sequence is invalid in character class\0"
+  "range out of order in character class\0"
+  "quantifier does not follow a repeatable item\0"
+  /* 10 */
+  "internal error: unexpected repeat\0"
+  "unrecognized character after (? or (?-\0"
+  "POSIX named classes are supported only within a class\0"
+  "POSIX collating elements are not supported\0"
+  "missing closing parenthesis\0"
+  /* 15 */
+  "reference to non-existent subpattern\0"
+  "pattern passed as NULL\0"
+  "unrecognised compile-time option bit(s)\0"
+  "missing ) after (?# comment\0"
+  "parentheses are too deeply nested\0"
+  /* 20 */
+  "regular expression is too large\0"
+  "failed to allocate heap memory\0"
+  "unmatched closing parenthesis\0"
+  "internal error: code overflow\0"
+  "missing closing parenthesis for condition\0"
+  /* 25 */
+  "lookbehind assertion is not fixed length\0"
+  "a relative value of zero is not allowed\0"
+  "conditional subpattern contains more than two branches\0"
+  "assertion expected after (?( or (?(?C)\0"
+  "digit expected after (?+ or (?-\0"
+  /* 30 */
+  "unknown POSIX class name\0"
+  "internal error in pcre2_study(): should not occur\0"
+  "this version of PCRE2 does not have Unicode support\0"
+  "parentheses are too deeply nested (stack check)\0"
+  "character code point value in \\x{} or \\o{} is too large\0"
+  /* 35 */
+  "lookbehind is too complicated\0"
+  "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
+  "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
+  "number after (?C is greater than 255\0"
+  "closing parenthesis for (?C expected\0"
+  /* 40 */
+  "invalid escape sequence in (*VERB) name\0"
+  "unrecognized character after (?P\0"
+  "syntax error in subpattern name (missing terminator?)\0"
+  "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
+  "subpattern name must start with a non-digit\0"
+  /* 45 */
+  "this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
+  "malformed \\P or \\p sequence\0"
+  "unknown property after \\P or \\p\0"
+  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
+  "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
+  /* 50 */
+  "invalid range in character class\0"
+  "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
+  "internal error: overran compiling workspace\0"
+  "internal error: previously-checked referenced subpattern not found\0"
+  "DEFINE subpattern contains more than one branch\0"
+  /* 55 */
+  "missing opening brace after \\o\0"
+  "internal error: unknown newline setting\0"
+  "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
+  "(?R (recursive pattern call) must be followed by a closing parenthesis\0"
+  /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
+  "obsolete error (should not occur)\0"  /* Was the above */
+  /* 60 */
+  "(*VERB) not recognized or malformed\0"
+  "subpattern number is too big\0"
+  "subpattern name expected\0"
+  "internal error: parsed pattern overflow\0"
+  "non-octal character in \\o{} (closing brace missing?)\0"
+  /* 65 */
+  "different names for subpatterns of the same number are not allowed\0"
+  "(*MARK) must have an argument\0"
+  "non-hex character in \\x{} (closing brace missing?)\0"
+#ifndef EBCDIC
+  "\\c must be followed by a printable ASCII character\0"
+#else
+  "\\c must be followed by a letter or one of [\\]^_?\0"
+#endif
+  "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
+  /* 70 */
+  "internal error: unknown meta code in check_lookbehinds()\0"
+  "\\N is not supported in a class\0"
+  "callout string is too long\0"
+  "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
+  "using UTF is disabled by the application\0"
+  /* 75 */
+  "using UCP is disabled by the application\0"
+  "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
+  "character code point value in \\u.... sequence is too large\0"
+  "digits missing in \\x{} or \\o{} or \\N{U+}\0"
+  "syntax error or number too big in (?(VERSION condition\0"
+  /* 80 */
+  "internal error: unknown opcode in auto_possessify()\0"
+  "missing terminating delimiter for callout with string argument\0"
+  "unrecognized string delimiter follows (?C\0"
+  "using \\C is disabled by the application\0"
+  "(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
+  /* 85 */
+  "using \\C is disabled in this PCRE2 library\0"
+  "regular expression is too complicated\0"
+  "lookbehind assertion is too long\0"
+  "pattern string is longer than the limit set by the application\0"
+  "internal error: unknown code in parsed pattern\0"
+  /* 90 */
+  "internal error: bad code value in parsed_skip()\0"
+  "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
+  "invalid option bits with PCRE2_LITERAL\0"
+  "\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
+  "invalid hyphen in option setting\0"
+  /* 95 */
+  "(*alpha_assertion) not recognized\0"
+  "script runs require Unicode support, which this version of PCRE2 does not have\0"
+  "too many capturing groups (maximum 65535)\0"
+  "atomic assertion expected after (?( or (?(?C)\0"
+  "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
+  ;
+
+/* Match-time and UTF error texts are in the same format. */
+
+static const unsigned char match_error_texts[] =
+  "no error\0"
+  "no match\0"
+  "partial match\0"
+  "UTF-8 error: 1 byte missing at end\0"
+  "UTF-8 error: 2 bytes missing at end\0"
+  /* 5 */
+  "UTF-8 error: 3 bytes missing at end\0"
+  "UTF-8 error: 4 bytes missing at end\0"
+  "UTF-8 error: 5 bytes missing at end\0"
+  "UTF-8 error: byte 2 top bits not 0x80\0"
+  "UTF-8 error: byte 3 top bits not 0x80\0"
+  /* 10 */
+  "UTF-8 error: byte 4 top bits not 0x80\0"
+  "UTF-8 error: byte 5 top bits not 0x80\0"
+  "UTF-8 error: byte 6 top bits not 0x80\0"
+  "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
+  "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
+  /* 15 */
+  "UTF-8 error: code points greater than 0x10ffff are not defined\0"
+  "UTF-8 error: code points 0xd800-0xdfff are not defined\0"
+  "UTF-8 error: overlong 2-byte sequence\0"
+  "UTF-8 error: overlong 3-byte sequence\0"
+  "UTF-8 error: overlong 4-byte sequence\0"
+  /* 20 */
+  "UTF-8 error: overlong 5-byte sequence\0"
+  "UTF-8 error: overlong 6-byte sequence\0"
+  "UTF-8 error: isolated byte with 0x80 bit set\0"
+  "UTF-8 error: illegal byte (0xfe or 0xff)\0"
+  "UTF-16 error: missing low surrogate at end\0"
+  /* 25 */
+  "UTF-16 error: invalid low surrogate\0"
+  "UTF-16 error: isolated low surrogate\0"
+  "UTF-32 error: code points 0xd800-0xdfff are not defined\0"
+  "UTF-32 error: code points greater than 0x10ffff are not defined\0"
+  "bad data value\0"
+  /* 30 */
+  "patterns do not all use the same character tables\0"
+  "magic number missing\0"
+  "pattern compiled in wrong mode: 8/16/32-bit error\0"
+  "bad offset value\0"
+  "bad option value\0"
+  /* 35 */
+  "invalid replacement string\0"
+  "bad offset into UTF string\0"
+  "callout error code\0"              /* Never returned by PCRE2 itself */
+  "invalid data in workspace for DFA restart\0"
+  "too much recursion for DFA matching\0"
+  /* 40 */
+  "backreference condition or recursion test is not supported for DFA matching\0"
+  "function is not supported for DFA matching\0"
+  "pattern contains an item that is not supported for DFA matching\0"
+  "workspace size exceeded in DFA matching\0"
+  "internal error - pattern overwritten?\0"
+  /* 45 */
+  "bad JIT option\0"
+  "JIT stack limit reached\0"
+  "match limit exceeded\0"
+  "no more memory\0"
+  "unknown substring\0"
+  /* 50 */
+  "non-unique substring name\0"
+  "NULL argument passed with non-zero length\0"
+  "nested recursion at the same subject position\0"
+  "matching depth limit exceeded\0"
+  "requested value is not available\0"
+  /* 55 */
+  "requested value is not set\0"
+  "offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
+  "bad escape sequence in replacement string\0"
+  "expected closing curly bracket in replacement string\0"
+  "bad substitution in replacement string\0"
+  /* 60 */
+  "match with end before start or start moved backwards is not supported\0"
+  "too many replacements (more than INT_MAX)\0"
+  "bad serialized data\0"
+  "heap limit exceeded\0"
+  "invalid syntax\0"
+  /* 65 */
+  "internal error - duplicate substitution match\0"
+  "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
+  ;
+
+
+/*************************************************
+*            Return error message                *
+*************************************************/
+
+/* This function copies an error message into a buffer whose units are of an
+appropriate width. Error numbers are positive for compile-time errors, and
+negative for match-time errors (except for UTF errors), but the numbers are all
+distinct.
+
+Arguments:
+  enumber       error number
+  buffer        where to put the message (zero terminated)
+  size          size of the buffer in code units
+
+Returns:        length of message if all is well
+                negative on error
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
+{
+const unsigned char *message;
+PCRE2_SIZE i;
+int n;
+
+if (size == 0) return PCRE2_ERROR_NOMEMORY;
+
+if (enumber >= COMPILE_ERROR_BASE)  /* Compile error */
+  {
+  message = compile_error_texts;
+  n = enumber - COMPILE_ERROR_BASE;
+  }
+else if (enumber < 0)               /* Match or UTF error */
+  {
+  message = match_error_texts;
+  n = -enumber;
+  }
+else                                /* Invalid error number */
+  {
+  message = (unsigned char *)"\0";  /* Empty message list */
+  n = 1;
+  }
+
+for (; n > 0; n--)
+  {
+  while (*message++ != CHAR_NUL) {};
+  if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
+  }
+
+for (i = 0; *message != 0; i++)
+  {
+  if (i >= size - 1)
+    {
+    buffer[i] = 0;     /* Terminate partial message */
+    return PCRE2_ERROR_NOMEMORY;
+    }
+  buffer[i] = *message++;
+  }
+
+buffer[i] = 0;
+return (int)i;
+}
+
+/* End of pcre2_error.c */
--- a/third_party/pcre/pcre2_extuni.c
+++ b/third_party/pcre/pcre2_extuni.c
@ -0,0 +1,148 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains an internal function that is used to match a Unicode
+extended grapheme sequence. It is used by both pcre2_match() and
+pcre2_def_match(). However, it is called only when Unicode support is being
+compiled. Nevertheless, we provide a dummy function when there is no Unicode
+support, because some compilers do not like functionless source files. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "pcre2_internal.h"
+
+
+/* Dummy function */
+
+#ifndef SUPPORT_UNICODE
+PCRE2_SPTR
+PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
+  PCRE2_SPTR end_subject, BOOL utf, int *xcount)
+{
+(void)c;
+(void)eptr;
+(void)start_subject;
+(void)end_subject;
+(void)utf;
+(void)xcount;
+return NULL;
+}
+#else
+
+
+/*************************************************
+*      Match an extended grapheme sequence       *
+*************************************************/
+
+/*
+Arguments:
+  c              the first character
+  eptr           pointer to next character
+  start_subject  pointer to start of subject
+  end_subject    pointer to end of subject
+  utf            TRUE if in UTF mode
+  xcount         pointer to count of additional characters,
+                   or NULL if count not needed
+
+Returns:         pointer after the end of the sequence
+*/
+
+PCRE2_SPTR
+PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
+  PCRE2_SPTR end_subject, BOOL utf, int *xcount)
+{
+int lgb = UCD_GRAPHBREAK(c);
+
+while (eptr < end_subject)
+  {
+  int rgb;
+  int len = 1;
+  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+  rgb = UCD_GRAPHBREAK(c);
+  if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+
+  /* Not breaking between Regional Indicators is allowed only if there
+  are an even number of preceding RIs. */
+
+  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
+    {
+    int ricount = 0;
+    PCRE2_SPTR bptr = eptr - 1;
+    if (utf) BACKCHAR(bptr);
+
+    /* bptr is pointing to the left-hand character */
+
+    while (bptr > start_subject)
+      {
+      bptr--;
+      if (utf)
+        {
+        BACKCHAR(bptr);
+        GETCHAR(c, bptr);
+        }
+      else
+      c = *bptr;
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
+      ricount++;
+      }
+    if ((ricount & 1) != 0) break;  /* Grapheme break required */
+    }
+
+  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
+  allows any number of them before a following Extended_Pictographic. */
+
+  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
+       lgb != ucp_gbExtended_Pictographic)
+    lgb = rgb;
+
+  eptr += len;
+  if (xcount != NULL) *xcount += 1;
+  }
+
+return eptr;
+}
+
+#endif  /* SUPPORT_UNICODE */
+
+/* End of pcre2_extuni.c */
--- a/third_party/pcre/pcre2_find_bracket.c
+++ b/third_party/pcre/pcre2_find_bracket.c
@ -0,0 +1,219 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains a single function that scans through a compiled pattern
+until it finds a capturing bracket with the given number, or, if the number is
+negative, an instance of OP_REVERSE for a lookbehind. The function is called
+from pcre2_compile.c and also from pcre2_study.c when finding the minimum
+matching length. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+*    Scan compiled regex for specific bracket    *
+*************************************************/
+
+/*
+Arguments:
+  code        points to start of expression
+  utf         TRUE in UTF mode
+  number      the required bracket number or negative to find a lookbehind
+
+Returns:      pointer to the opcode for the bracket, or NULL if not found
+*/
+
+PCRE2_SPTR
+PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
+{
+for (;;)
+  {
+  PCRE2_UCHAR c = *code;
+
+  if (c == OP_END) return NULL;
+
+  /* XCLASS is used for classes that cannot be represented just by a bit map.
+  This includes negated single high-valued characters. CALLOUT_STR is used for
+  callouts with string arguments. In both cases the length in the table is
+  zero; the actual length is stored in the compiled code. */
+
+  if (c == OP_XCLASS) code += GET(code, 1);
+    else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
+
+  /* Handle lookbehind */
+
+  else if (c == OP_REVERSE)
+    {
+    if (number < 0) return (PCRE2_UCHAR *)code;
+    code += PRIV(OP_lengths)[c];
+    }
+
+  /* Handle capturing bracket */
+
+  else if (c == OP_CBRA || c == OP_SCBRA ||
+           c == OP_CBRAPOS || c == OP_SCBRAPOS)
+    {
+    int n = (int)GET2(code, 1+LINK_SIZE);
+    if (n == number) return (PCRE2_UCHAR *)code;
+    code += PRIV(OP_lengths)[c];
+    }
+
+  /* Otherwise, we can get the item's length from the table, except that for
+  repeated character types, we have to test for \p and \P, which have an extra
+  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
+  must add in its length. */
+
+  else
+    {
+    switch(c)
+      {
+      case OP_TYPESTAR:
+      case OP_TYPEMINSTAR:
+      case OP_TYPEPLUS:
+      case OP_TYPEMINPLUS:
+      case OP_TYPEQUERY:
+      case OP_TYPEMINQUERY:
+      case OP_TYPEPOSSTAR:
+      case OP_TYPEPOSPLUS:
+      case OP_TYPEPOSQUERY:
+      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+      break;
+
+      case OP_TYPEUPTO:
+      case OP_TYPEMINUPTO:
+      case OP_TYPEEXACT:
+      case OP_TYPEPOSUPTO:
+      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+        code += 2;
+      break;
+
+      case OP_MARK:
+      case OP_COMMIT_ARG:
+      case OP_PRUNE_ARG:
+      case OP_SKIP_ARG:
+      case OP_THEN_ARG:
+      code += code[1];
+      break;
+      }
+
+    /* Add in the fixed length from the table */
+
+    code += PRIV(OP_lengths)[c];
+
+  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
+  followed by a multi-byte character. The length in the table is a minimum, so
+  we have to arrange to skip the extra bytes. */
+
+#ifdef MAYBE_UTF_MULTI
+    if (utf) switch(c)
+      {
+      case OP_CHAR:
+      case OP_CHARI:
+      case OP_NOT:
+      case OP_NOTI:
+      case OP_EXACT:
+      case OP_EXACTI:
+      case OP_NOTEXACT:
+      case OP_NOTEXACTI:
+      case OP_UPTO:
+      case OP_UPTOI:
+      case OP_NOTUPTO:
+      case OP_NOTUPTOI:
+      case OP_MINUPTO:
+      case OP_MINUPTOI:
+      case OP_NOTMINUPTO:
+      case OP_NOTMINUPTOI:
+      case OP_POSUPTO:
+      case OP_POSUPTOI:
+      case OP_NOTPOSUPTO:
+      case OP_NOTPOSUPTOI:
+      case OP_STAR:
+      case OP_STARI:
+      case OP_NOTSTAR:
+      case OP_NOTSTARI:
+      case OP_MINSTAR:
+      case OP_MINSTARI:
+      case OP_NOTMINSTAR:
+      case OP_NOTMINSTARI:
+      case OP_POSSTAR:
+      case OP_POSSTARI:
+      case OP_NOTPOSSTAR:
+      case OP_NOTPOSSTARI:
+      case OP_PLUS:
+      case OP_PLUSI:
+      case OP_NOTPLUS:
+      case OP_NOTPLUSI:
+      case OP_MINPLUS:
+      case OP_MINPLUSI:
+      case OP_NOTMINPLUS:
+      case OP_NOTMINPLUSI:
+      case OP_POSPLUS:
+      case OP_POSPLUSI:
+      case OP_NOTPOSPLUS:
+      case OP_NOTPOSPLUSI:
+      case OP_QUERY:
+      case OP_QUERYI:
+      case OP_NOTQUERY:
+      case OP_NOTQUERYI:
+      case OP_MINQUERY:
+      case OP_MINQUERYI:
+      case OP_NOTMINQUERY:
+      case OP_NOTMINQUERYI:
+      case OP_POSQUERY:
+      case OP_POSQUERYI:
+      case OP_NOTPOSQUERY:
+      case OP_NOTPOSQUERYI:
+      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
+      break;
+      }
+#else
+    (void)(utf);  /* Keep compiler happy by referencing function argument */
+#endif  /* MAYBE_UTF_MULTI */
+    }
+  }
+}
+
+/* End of pcre2_find_bracket.c */
--- a/third_party/pcre/pcre2_internal.h
+++ b/third_party/pcre/pcre2_internal.h
--- a/third_party/pcre/pcre2_intmodedep.h
+++ b/third_party/pcre/pcre2_intmodedep.h
@ -0,0 +1,934 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains mode-dependent macro and structure definitions. The
+file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
+These mode-dependent items are kept in a separate file so that they can also be
+#included multiple times for different code unit widths by pcre2test in order
+to have access to the hidden structures at all supported widths.
+
+Some of the mode-dependent macros are required at different widths for
+different parts of the pcre2test code (in particular, the included
+pcre_printint.c file). We undefine them here so that they can be re-defined for
+multiple inclusions. Not all of these are used in pcre2test, but it's easier
+just to undefine them all. */
+
+#undef ACROSSCHAR
+#undef BACKCHAR
+#undef BYTES2CU
+#undef CHMAX_255
+#undef CU2BYTES
+#undef FORWARDCHAR
+#undef FORWARDCHARTEST
+#undef GET
+#undef GET2
+#undef GETCHAR
+#undef GETCHARINC
+#undef GETCHARINCTEST
+#undef GETCHARLEN
+#undef GETCHARLENTEST
+#undef GETCHARTEST
+#undef GET_EXTRALEN
+#undef HAS_EXTRALEN
+#undef IMM2_SIZE
+#undef MAX_255
+#undef MAX_MARK
+#undef MAX_PATTERN_SIZE
+#undef MAX_UTF_SINGLE_CU
+#undef NOT_FIRSTCU
+#undef PUT
+#undef PUT2
+#undef PUT2INC
+#undef PUTCHAR
+#undef PUTINC
+#undef TABLE_GET
+
+
+
+/* -------------------------- MACROS ----------------------------- */
+
+/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
+(always stored in big-endian order in 8-bit mode) by default. These are used,
+for example, to link from the start of a subpattern to its alternatives and its
+end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
+to around 64K, which is big enough for almost everybody. However, I received a
+request for an even bigger limit. For this reason, and also to make the code
+easier to maintain, the storing and loading of offsets from the compiled code
+unit string is now handled by the macros that are defined here.
+
+The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
+values of 3 or 4 are also supported. */
+
+/* ------------------- 8-bit support  ------------------ */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+
+#if LINK_SIZE == 2
+#define PUT(a,n,d)   \
+  (a[n] = (PCRE2_UCHAR)((d) >> 8)), \
+  (a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
+#define GET(a,n) \
+  (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
+#define MAX_PATTERN_SIZE (1 << 16)
+
+#elif LINK_SIZE == 3
+#define PUT(a,n,d)       \
+  (a[n] = (PCRE2_UCHAR)((d) >> 16)),    \
+  (a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
+  (a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
+#define GET(a,n) \
+  (unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
+#define MAX_PATTERN_SIZE (1 << 24)
+
+#elif LINK_SIZE == 4
+#define PUT(a,n,d)        \
+  (a[n] = (PCRE2_UCHAR)((d) >> 24)),     \
+  (a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
+  (a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)),  \
+  (a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
+#define GET(a,n) \
+  (unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
+#define MAX_PATTERN_SIZE (1 << 30)   /* Keep it positive */
+
+#else
+#error LINK_SIZE must be 2, 3, or 4
+#endif
+
+
+/* ------------------- 16-bit support  ------------------ */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+
+#if LINK_SIZE == 2
+#undef LINK_SIZE
+#define LINK_SIZE 1
+#define PUT(a,n,d)   \
+  (a[n] = (PCRE2_UCHAR)(d))
+#define GET(a,n) \
+  (a[n])
+#define MAX_PATTERN_SIZE (1 << 16)
+
+#elif LINK_SIZE == 3 || LINK_SIZE == 4
+#undef LINK_SIZE
+#define LINK_SIZE 2
+#define PUT(a,n,d)   \
+  (a[n] = (PCRE2_UCHAR)((d) >> 16)), \
+  (a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
+#define GET(a,n) \
+  (unsigned int)(((a)[n] << 16) | (a)[(n)+1])
+#define MAX_PATTERN_SIZE (1 << 30)  /* Keep it positive */
+
+#else
+#error LINK_SIZE must be 2, 3, or 4
+#endif
+
+
+/* ------------------- 32-bit support  ------------------ */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+#undef LINK_SIZE
+#define LINK_SIZE 1
+#define PUT(a,n,d)   \
+  (a[n] = (d))
+#define GET(a,n) \
+  (a[n])
+#define MAX_PATTERN_SIZE (1 << 30)  /* Keep it positive */
+
+#else
+#error Unsupported compiling mode
+#endif
+
+
+/* --------------- Other mode-specific macros ----------------- */
+
+/* PCRE uses some other (at least) 16-bit quantities that do not change when
+the size of offsets changes. There are used for repeat counts and for other
+things such as capturing parenthesis numbers in back references.
+
+Define the number of code units required to hold a 16-bit count/offset, and
+macros to load and store such a value. For reasons that I do not understand,
+the expression in the 8-bit GET2 macro is treated by gcc as a signed
+expression, even when a is declared as unsigned. It seems that any kind of
+arithmetic results in a signed value. Hence the cast. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define IMM2_SIZE 2
+#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
+#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
+
+#else  /* Code units are 16 or 32 bits */
+#define IMM2_SIZE 1
+#define GET2(a,n) a[n]
+#define PUT2(a,n,d) a[n] = d
+#endif
+
+/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
+whether its argument, which is assumed to be one code unit, is less than 256.
+The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
+name must fit in one code unit; currently it is set to 255 or 65535. The
+TABLE_GET macro is used to access elements of tables containing exactly 256
+items. Its argument is a code unit. When code points can be greater than 255, a
+check is needed before accessing these tables. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define MAX_255(c) TRUE
+#define MAX_MARK ((1u << 8) - 1)
+#define TABLE_GET(c, table, default) ((table)[c])
+#ifdef SUPPORT_UNICODE
+#define SUPPORT_WIDE_CHARS
+#define CHMAX_255(c) ((c) <= 255u)
+#else
+#define CHMAX_255(c) TRUE
+#endif  /* SUPPORT_UNICODE */
+
+#else  /* Code units are 16 or 32 bits */
+#define CHMAX_255(c) ((c) <= 255u)
+#define MAX_255(c) ((c) <= 255u)
+#define MAX_MARK ((1u << 16) - 1)
+#define SUPPORT_WIDE_CHARS
+#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
+#endif
+
+
+/* ----------------- Character-handling macros ----------------- */
+
+/* There is a proposed future special "UTF-21" mode, in which only the lowest
+21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
+high-order bits available to the application for other uses. In preparation for
+the future implementation of this mode, there are macros that load a data item
+and, if in this special mode, mask it to 21 bits. These macros all have names
+starting with UCHAR21. In all other modes, including the normal 32-bit
+library, the macros all have the same simple definitions. When the new mode is
+implemented, it is expected that these definitions will be varied appropriately
+using #ifdef when compiling the library that supports the special mode. */
+
+#define UCHAR21(eptr)        (*(eptr))
+#define UCHAR21TEST(eptr)    (*(eptr))
+#define UCHAR21INC(eptr)     (*(eptr)++)
+#define UCHAR21INCTEST(eptr) (*(eptr)++)
+
+/* When UTF encoding is being used, a character is no longer just a single
+byte in 8-bit mode or a single short in 16-bit mode. The macros for character
+handling generate simple sequences when used in the basic mode, and more
+complicated ones for UTF characters. GETCHARLENTEST and other macros are not
+used when UTF is not supported. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
+
+#ifndef SUPPORT_UNICODE
+
+/* #define MAX_UTF_SINGLE_CU */
+/* #define HAS_EXTRALEN(c) */
+/* #define GET_EXTRALEN(c) */
+/* #define NOT_FIRSTCU(c) */
+#define GETCHAR(c, eptr) c = *eptr;
+#define GETCHARTEST(c, eptr) c = *eptr;
+#define GETCHARINC(c, eptr) c = *eptr++;
+#define GETCHARINCTEST(c, eptr) c = *eptr++;
+#define GETCHARLEN(c, eptr, len) c = *eptr;
+#define PUTCHAR(c, p) (*p = c, 1)
+/* #define GETCHARLENTEST(c, eptr, len) */
+/* #define BACKCHAR(eptr) */
+/* #define FORWARDCHAR(eptr) */
+/* #define FORWARCCHARTEST(eptr,end) */
+/* #define ACROSSCHAR(condition, eptr, action) */
+
+#else   /* SUPPORT_UNICODE */
+
+/* ------------------- 8-bit support  ------------------ */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define MAYBE_UTF_MULTI          /* UTF chars may use multiple code units */
+
+/* The largest UTF code point that can be encoded as a single code unit. */
+
+#define MAX_UTF_SINGLE_CU 127
+
+/* Tests whether the code point needs extra characters to decode. */
+
+#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
+
+/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
+Otherwise it has an undefined behaviour. */
+
+#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
+
+/* Returns TRUE, if the given value is not the first code unit of a UTF
+sequence. */
+
+#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
+
+/* Get the next UTF-8 character, not advancing the pointer. This is called when
+we know we are in UTF-8 mode. */
+
+#define GETCHAR(c, eptr) \
+  c = *eptr; \
+  if (c >= 0xc0u) GETUTF8(c, eptr);
+
+/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *eptr; \
+  if (utf && c >= 0xc0u) GETUTF8(c, eptr);
+
+/* Get the next UTF-8 character, advancing the pointer. This is called when we
+know we are in UTF-8 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *eptr++; \
+  if (c >= 0xc0u) GETUTF8INC(c, eptr);
+
+/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-8 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *eptr++; \
+  if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
+
+/* Get the next UTF-8 character, not advancing the pointer, incrementing length
+if there are extra bytes. This is called when we know we are in UTF-8 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  c = *eptr; \
+  if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
+
+/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
+pointer, incrementing length if there are extra bytes. This is called when we
+do not know if we are in UTF-8 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  c = *eptr; \
+  if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-8 mode - we don't put a test within the macro
+because almost all calls are already within a block of UTF-8 only code. */
+
+#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
+
+/* Same as above, just in the other direction. */
+#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
+#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
+
+/* Same as above, but it allows a fully customizable form. */
+#define ACROSSCHAR(condition, eptr, action) \
+  while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
+
+/* Deposit a character into memory, returning the number of code units. */
+
+#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
+  PRIV(ord2utf)(c,p) : (*p = c, 1))
+
+
+/* ------------------- 16-bit support  ------------------ */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+#define MAYBE_UTF_MULTI          /* UTF chars may use multiple code units */
+
+/* The largest UTF code point that can be encoded as a single code unit. */
+
+#define MAX_UTF_SINGLE_CU 65535
+
+/* Tests whether the code point needs extra characters to decode. */
+
+#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
+
+/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
+Otherwise it has an undefined behaviour. */
+
+#define GET_EXTRALEN(c) 1
+
+/* Returns TRUE, if the given value is not the first code unit of a UTF
+sequence. */
+
+#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, not
+advancing the pointer. */
+
+#define GETUTF16(c, eptr) \
+   { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
+
+/* Get the next UTF-16 character, not advancing the pointer. This is called when
+we know we are in UTF-16 mode. */
+
+#define GETCHAR(c, eptr) \
+  c = *eptr; \
+  if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
+
+/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *eptr; \
+  if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
+the pointer. */
+
+#define GETUTF16INC(c, eptr) \
+   { c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
+
+/* Get the next UTF-16 character, advancing the pointer. This is called when we
+know we are in UTF-16 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *eptr++; \
+  if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
+
+/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-16 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *eptr++; \
+  if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, not
+advancing the pointer, incrementing the length. */
+
+#define GETUTF16LEN(c, eptr, len) \
+   { c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
+
+/* Get the next UTF-16 character, not advancing the pointer, incrementing
+length if there is a low surrogate. This is called when we know we are in
+UTF-16 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  c = *eptr; \
+  if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
+
+/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
+pointer, incrementing length if there is a low surrogate. This is called when
+we do not know if we are in UTF-16 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  c = *eptr; \
+  if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-16 mode - we don't put a test within the
+macro because almost all calls are already within a block of UTF-16 only
+code. */
+
+#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
+
+/* Same as above, just in the other direction. */
+#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
+#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
+
+/* Same as above, but it allows a fully customizable form. */
+#define ACROSSCHAR(condition, eptr, action) \
+  if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
+
+/* Deposit a character into memory, returning the number of code units. */
+
+#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
+  PRIV(ord2utf)(c,p) : (*p = c, 1))
+
+
+/* ------------------- 32-bit support  ------------------ */
+
+#else
+
+/* These are trivial for the 32-bit library, since all UTF-32 characters fit
+into one PCRE2_UCHAR unit. */
+
+#define MAX_UTF_SINGLE_CU (0x10ffffu)
+#define HAS_EXTRALEN(c) (0)
+#define GET_EXTRALEN(c) (0)
+#define NOT_FIRSTCU(c) (0)
+
+/* Get the next UTF-32 character, not advancing the pointer. This is called when
+we know we are in UTF-32 mode. */
+
+#define GETCHAR(c, eptr) \
+  c = *(eptr);
+
+/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *(eptr);
+
+/* Get the next UTF-32 character, advancing the pointer. This is called when we
+know we are in UTF-32 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *((eptr)++);
+
+/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-32 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *((eptr)++);
+
+/* Get the next UTF-32 character, not advancing the pointer, not incrementing
+length (since all UTF-32 is of length 1). This is called when we know we are in
+UTF-32 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  GETCHAR(c, eptr)
+
+/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
+pointer, not incrementing the length (since all UTF-32 is of length 1).
+This is called when we do not know if we are in UTF-32 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  GETCHARTEST(c, eptr)
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-32 mode - we don't put a test within the
+macro because almost all calls are already within a block of UTF-32 only
+code.
+
+These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
+
+#define BACKCHAR(eptr) do { } while (0)
+
+/* Same as above, just in the other direction. */
+
+#define FORWARDCHAR(eptr) do { } while (0)
+#define FORWARDCHARTEST(eptr,end) do { } while (0)
+
+/* Same as above, but it allows a fully customizable form. */
+
+#define ACROSSCHAR(condition, eptr, action) do { } while (0)
+
+/* Deposit a character into memory, returning the number of code units. */
+
+#define PUTCHAR(c, p) (*p = c, 1)
+
+#endif  /* UTF-32 character handling */
+#endif  /* SUPPORT_UNICODE */
+
+
+/* Mode-dependent macros that have the same definition in all modes. */
+
+#define CU2BYTES(x)     ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
+#define BYTES2CU(x)     ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
+#define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE
+#define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
+
+
+/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
+
+/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
+code that uses them is simpler because it assumes this. */
+
+/* The real general context structure. At present it holds only data for custom
+memory control. */
+
+typedef struct pcre2_real_general_context {
+  pcre2_memctl memctl;
+} pcre2_real_general_context;
+
+/* The real compile context structure */
+
+typedef struct pcre2_real_compile_context {
+  pcre2_memctl memctl;
+  int (*stack_guard)(uint32_t, void *);
+  void *stack_guard_data;
+  const uint8_t *tables;
+  PCRE2_SIZE max_pattern_length;
+  uint16_t bsr_convention;
+  uint16_t newline_convention;
+  uint32_t parens_nest_limit;
+  uint32_t extra_options;
+} pcre2_real_compile_context;
+
+/* The real match context structure. */
+
+typedef struct pcre2_real_match_context {
+  pcre2_memctl memctl;
+#ifdef SUPPORT_JIT
+  pcre2_jit_callback jit_callback;
+  void *jit_callback_data;
+#endif
+  int    (*callout)(pcre2_callout_block *, void *);
+  void    *callout_data;
+  int    (*substitute_callout)(pcre2_substitute_callout_block *, void *);
+  void    *substitute_callout_data;
+  PCRE2_SIZE offset_limit;
+  uint32_t heap_limit;
+  uint32_t match_limit;
+  uint32_t depth_limit;
+} pcre2_real_match_context;
+
+/* The real convert context structure. */
+
+typedef struct pcre2_real_convert_context {
+  pcre2_memctl memctl;
+  uint32_t glob_separator;
+  uint32_t glob_escape;
+} pcre2_real_convert_context;
+
+/* The real compiled code structure. The type for the blocksize field is
+defined specially because it is required in pcre2_serialize_decode() when
+copying the size from possibly unaligned memory into a variable of the same
+type. Use a macro rather than a typedef to avoid compiler warnings when this
+file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
+largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
+argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
+here.) */
+
+#undef  CODE_BLOCKSIZE_TYPE
+#define CODE_BLOCKSIZE_TYPE size_t
+
+#undef  LOOKBEHIND_MAX
+#define LOOKBEHIND_MAX UINT16_MAX
+
+typedef struct pcre2_real_code {
+  pcre2_memctl memctl;            /* Memory control fields */
+  const uint8_t *tables;          /* The character tables */
+  void    *executable_jit;        /* Pointer to JIT code */
+  uint8_t  start_bitmap[32];      /* Bitmap for starting code unit < 256 */
+  CODE_BLOCKSIZE_TYPE blocksize;  /* Total (bytes) that was malloc-ed */
+  uint32_t magic_number;          /* Paranoid and endianness check */
+  uint32_t compile_options;       /* Options passed to pcre2_compile() */
+  uint32_t overall_options;       /* Options after processing the pattern */
+  uint32_t extra_options;         /* Taken from compile_context */
+  uint32_t flags;                 /* Various state flags */
+  uint32_t limit_heap;            /* Limit set in the pattern */
+  uint32_t limit_match;           /* Limit set in the pattern */
+  uint32_t limit_depth;           /* Limit set in the pattern */
+  uint32_t first_codeunit;        /* Starting code unit */
+  uint32_t last_codeunit;         /* This codeunit must be seen */
+  uint16_t bsr_convention;        /* What \R matches */
+  uint16_t newline_convention;    /* What is a newline? */
+  uint16_t max_lookbehind;        /* Longest lookbehind (characters) */
+  uint16_t minlength;             /* Minimum length of match */
+  uint16_t top_bracket;           /* Highest numbered group */
+  uint16_t top_backref;           /* Highest numbered back reference */
+  uint16_t name_entry_size;       /* Size (code units) of table entries */
+  uint16_t name_count;            /* Number of name entries in the table */
+} pcre2_real_code;
+
+/* The real match data structure. Define ovector as large as it can ever
+actually be so that array bound checkers don't grumble. Memory for this
+structure is obtained by calling pcre2_match_data_create(), which sets the size
+as the offset of ovector plus a pair of elements for each capturable string, so
+the size varies from call to call. As the maximum number of capturing
+subpatterns is 65535 we must allow for 65536 strings to include the overall
+match. (See also the heapframe structure below.) */
+
+struct heapframe;  /* Forward reference */
+
+typedef struct pcre2_real_match_data {
+  pcre2_memctl     memctl;           /* Memory control fields */
+  const pcre2_real_code *code;       /* The pattern used for the match */
+  PCRE2_SPTR       subject;          /* The subject that was matched */
+  PCRE2_SPTR       mark;             /* Pointer to last mark */
+  struct heapframe *heapframes;      /* Backtracking frames heap memory */
+  PCRE2_SIZE       heapframes_size;  /* Malloc-ed size */
+  PCRE2_SIZE       leftchar;         /* Offset to leftmost code unit */
+  PCRE2_SIZE       rightchar;        /* Offset to rightmost code unit */
+  PCRE2_SIZE       startchar;        /* Offset to starting code unit */
+  uint8_t          matchedby;        /* Type of match (normal, JIT, DFA) */
+  uint8_t          flags;            /* Various flags */
+  uint16_t         oveccount;        /* Number of pairs */
+  int              rc;               /* The return code from the match */
+  PCRE2_SIZE       ovector[131072];  /* Must be last in the structure */
+} pcre2_real_match_data;
+
+
+/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
+
+/* These structures are not needed for pcre2test. */
+
+#ifndef PCRE2_PCRE2TEST
+
+/* Structures for checking for mutual recursion when scanning compiled or
+parsed code. */
+
+typedef struct recurse_check {
+  struct recurse_check *prev;
+  PCRE2_SPTR group;
+} recurse_check;
+
+typedef struct parsed_recurse_check {
+  struct parsed_recurse_check *prev;
+  uint32_t *groupptr;
+} parsed_recurse_check;
+
+/* Structure for building a cache when filling in recursion offsets. */
+
+typedef struct recurse_cache {
+  PCRE2_SPTR group;
+  int groupnumber;
+} recurse_cache;
+
+/* Structure for maintaining a chain of pointers to the currently incomplete
+branches, for testing for left recursion while compiling. */
+
+typedef struct branch_chain {
+  struct branch_chain *outer;
+  PCRE2_UCHAR *current_branch;
+} branch_chain;
+
+/* Structure for building a list of named groups during the first pass of
+compiling. */
+
+typedef struct named_group {
+  PCRE2_SPTR   name;          /* Points to the name in the pattern */
+  uint32_t     number;        /* Group number */
+  uint16_t     length;        /* Length of the name */
+  uint16_t     isdup;         /* TRUE if a duplicate */
+} named_group;
+
+/* Structure for passing "static" information around between the functions
+doing the compiling, so that they are thread-safe. */
+
+typedef struct compile_block {
+  pcre2_real_compile_context *cx;  /* Points to the compile context */
+  const uint8_t *lcc;              /* Points to lower casing table */
+  const uint8_t *fcc;              /* Points to case-flipping table */
+  const uint8_t *cbits;            /* Points to character type table */
+  const uint8_t *ctypes;           /* Points to table of type maps */
+  PCRE2_SPTR start_workspace;      /* The start of working space */
+  PCRE2_SPTR start_code;           /* The start of the compiled code */
+  PCRE2_SPTR start_pattern;        /* The start of the pattern */
+  PCRE2_SPTR end_pattern;          /* The end of the pattern */
+  PCRE2_UCHAR *name_table;         /* The name/number table */
+  PCRE2_SIZE workspace_size;       /* Size of workspace */
+  PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
+  PCRE2_SIZE erroroffset;          /* Offset of error in pattern */
+  uint16_t names_found;            /* Number of entries so far */
+  uint16_t name_entry_size;        /* Size of each entry */
+  uint16_t parens_depth;           /* Depth of nested parentheses */
+  uint16_t assert_depth;           /* Depth of nested assertions */
+  open_capitem *open_caps;         /* Chain of open capture items */
+  named_group *named_groups;       /* Points to vector in pre-compile */
+  uint32_t named_group_list_size;  /* Number of entries in the list */
+  uint32_t external_options;       /* External (initial) options */
+  uint32_t external_flags;         /* External flag bits to be set */
+  uint32_t bracount;               /* Count of capturing parentheses */
+  uint32_t lastcapture;            /* Last capture encountered */
+  uint32_t *parsed_pattern;        /* Parsed pattern buffer */
+  uint32_t *parsed_pattern_end;    /* Parsed pattern should not get here */
+  uint32_t *groupinfo;             /* Group info vector */
+  uint32_t top_backref;            /* Maximum back reference */
+  uint32_t backref_map;            /* Bitmap of low back refs */
+  uint32_t nltype;                 /* Newline type */
+  uint32_t nllen;                  /* Newline string length */
+  uint32_t class_range_start;      /* Overall class range start */
+  uint32_t class_range_end;        /* Overall class range end */
+  PCRE2_UCHAR nl[4];               /* Newline string when fixed length */
+  uint32_t req_varyopt;            /* "After variable item" flag for reqbyte */
+  int  max_lookbehind;             /* Maximum lookbehind (characters) */
+  BOOL had_accept;                 /* (*ACCEPT) encountered */
+  BOOL had_pruneorskip;            /* (*PRUNE) or (*SKIP) encountered */
+  BOOL had_recurse;                /* Had a recursion or subroutine call */
+  BOOL dupnames;                   /* Duplicate names exist */
+} compile_block;
+
+/* Structure for keeping the properties of the in-memory stack used
+by the JIT matcher. */
+
+typedef struct pcre2_real_jit_stack {
+  pcre2_memctl memctl;
+  void* stack;
+} pcre2_real_jit_stack;
+
+/* Structure for items in a linked list that represents an explicit recursive
+call within the pattern when running pcre2_dfa_match(). */
+
+typedef struct dfa_recursion_info {
+  struct dfa_recursion_info *prevrec;
+  PCRE2_SPTR subject_position;
+  uint32_t group_num;
+} dfa_recursion_info;
+
+/* Structure for "stack" frames that are used for remembering backtracking
+positions during matching. As these are used in a vector, with the ovector item
+being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
+only way to check this at compile time is to force an error by generating an
+array with a negative size. By putting this in a typedef (which is never used),
+we don't generate any code when all is well. */
+
+typedef struct heapframe {
+
+  /* The first set of fields are variables that have to be preserved over calls
+  to RRMATCH(), but which do not need to be copied to new frames. */
+
+  PCRE2_SPTR ecode;          /* The current position in the pattern */
+  PCRE2_SPTR temp_sptr[2];   /* Used for short-term PCRE_SPTR values */
+  PCRE2_SIZE length;         /* Used for character, string, or code lengths */
+  PCRE2_SIZE back_frame;     /* Amount to subtract on RRETURN */
+  PCRE2_SIZE temp_size;      /* Used for short-term PCRE2_SIZE values */
+  uint32_t rdepth;           /* "Recursion" depth */
+  uint32_t group_frame_type; /* Type information for group frames */
+  uint32_t temp_32[4];       /* Used for short-term 32-bit or BOOL values */
+  uint8_t return_id;         /* Where to go on in internal "return" */
+  uint8_t op;                /* Processing opcode */
+
+  /* At this point, the structure is 16-bit aligned. On most architectures
+  the alignment requirement for a pointer will ensure that the eptr field below
+  is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
+  that is 16-bit aligned. We must therefore ensure that what comes between here
+  and eptr is an odd multiple of 16 bits so as to get back into 32-bit
+  alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
+  fudges in the other cases. In the 32-bit case the padding comes first so that
+  the occu field itself is 32-bit aligned. Without the padding, this structure
+  is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  PCRE2_UCHAR occu[6];       /* Used for other case code units */
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+  PCRE2_UCHAR occu[2];       /* Used for other case code units */
+  uint8_t unused[2];         /* Ensure 32-bit alignment (see above) */
+#else
+  uint8_t unused[2];         /* Ensure 32-bit alignment (see above) */
+  PCRE2_UCHAR occu[1];       /* Used for other case code units */
+#endif
+
+  /* The rest have to be copied from the previous frame whenever a new frame
+  becomes current. The final field is specified as a large vector so that
+  runtime array bound checks don't catch references to it. However, for any
+  specific call to pcre2_match() the memory allocated for each frame structure
+  allows for exactly the right size ovector for the number of capturing
+  parentheses. (See also the comment for pcre2_real_match_data above.) */
+
+  PCRE2_SPTR eptr;           /* MUST BE FIRST */
+  PCRE2_SPTR start_match;    /* Can be adjusted by \K */
+  PCRE2_SPTR mark;           /* Most recent mark on the success path */
+  uint32_t current_recurse;  /* Current (deepest) recursion number */
+  uint32_t capture_last;     /* Most recent capture */
+  PCRE2_SIZE last_group_offset;  /* Saved offset to most recent group frame */
+  PCRE2_SIZE offset_top;     /* Offset after highest capture */
+  PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
+} heapframe;
+
+/* This typedef is a check that the size of the heapframe structure is a
+multiple of PCRE2_SIZE. See various comments above. */
+
+typedef char check_heapframe_size[
+  ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
+
+/* Structure for computing the alignment of heapframe. */
+
+typedef struct heapframe_align {
+  char unalign;    /* Completely unalign the current offset */
+  heapframe frame; /* Offset is its alignment */
+} heapframe_align;
+
+/* This define is the minimum alignment required for a heapframe, in bytes. */
+
+#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
+
+/* Structure for passing "static" information around between the functions
+doing traditional NFA matching (pcre2_match() and friends). */
+
+typedef struct match_block {
+  pcre2_memctl memctl;            /* For general use */
+  PCRE2_SIZE heap_limit;          /* As it says */
+  uint32_t match_limit;           /* As it says */
+  uint32_t match_limit_depth;     /* As it says */
+  uint32_t match_call_count;      /* Number of times a new frame is created */
+  BOOL hitend;                    /* Hit the end of the subject at some point */
+  BOOL hasthen;                   /* Pattern contains (*THEN) */
+  BOOL allowemptypartial;         /* Allow empty hard partial */
+  const uint8_t *lcc;             /* Points to lower casing table */
+  const uint8_t *fcc;             /* Points to case-flipping table */
+  const uint8_t *ctypes;          /* Points to table of type maps */
+  PCRE2_SIZE start_offset;        /* The start offset value */
+  PCRE2_SIZE end_offset_top;      /* Highwater mark at end of match */
+  uint16_t partial;               /* PARTIAL options */
+  uint16_t bsr_convention;        /* \R interpretation */
+  uint16_t name_count;            /* Number of names in name table */
+  uint16_t name_entry_size;       /* Size of entry in names table */
+  PCRE2_SPTR name_table;          /* Table of group names */
+  PCRE2_SPTR start_code;          /* For use when recursing */
+  PCRE2_SPTR start_subject;       /* Start of the subject string */
+  PCRE2_SPTR check_subject;       /* Where UTF-checked from */
+  PCRE2_SPTR end_subject;         /* End of the subject string */
+  PCRE2_SPTR end_match_ptr;       /* Subject position at end match */
+  PCRE2_SPTR start_used_ptr;      /* Earliest consulted character */
+  PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
+  PCRE2_SPTR mark;                /* Mark pointer to pass back on success */
+  PCRE2_SPTR nomatch_mark;        /* Mark pointer to pass back on failure */
+  PCRE2_SPTR verb_ecode_ptr;      /* For passing back info */
+  PCRE2_SPTR verb_skip_ptr;       /* For passing back a (*SKIP) name */
+  uint32_t verb_current_recurse;  /* Current recurse when (*VERB) happens */
+  uint32_t moptions;              /* Match options */
+  uint32_t poptions;              /* Pattern options */
+  uint32_t skip_arg_count;        /* For counting SKIP_ARGs */
+  uint32_t ignore_skip_arg;       /* For re-run when SKIP arg name not found */
+  uint32_t nltype;                /* Newline type */
+  uint32_t nllen;                 /* Newline string length */
+  PCRE2_UCHAR nl[4];              /* Newline string when fixed */
+  pcre2_callout_block *cb;        /* Points to a callout block */
+  void  *callout_data;            /* To pass back to callouts */
+  int (*callout)(pcre2_callout_block *,void *);  /* Callout function or NULL */
+} match_block;
+
+/* A similar structure is used for the same purpose by the DFA matching
+functions. */
+
+typedef struct dfa_match_block {
+  pcre2_memctl memctl;            /* For general use */
+  PCRE2_SPTR start_code;          /* Start of the compiled pattern */
+  PCRE2_SPTR start_subject ;      /* Start of the subject string */
+  PCRE2_SPTR end_subject;         /* End of subject string */
+  PCRE2_SPTR start_used_ptr;      /* Earliest consulted character */
+  PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
+  const uint8_t *tables;          /* Character tables */
+  PCRE2_SIZE start_offset;        /* The start offset value */
+  PCRE2_SIZE heap_limit;          /* As it says */
+  PCRE2_SIZE heap_used;           /* As it says */
+  uint32_t match_limit;           /* As it says */
+  uint32_t match_limit_depth;     /* As it says */
+  uint32_t match_call_count;      /* Number of calls of internal function */
+  uint32_t moptions;              /* Match options */
+  uint32_t poptions;              /* Pattern options */
+  uint32_t nltype;                /* Newline type */
+  uint32_t nllen;                 /* Newline string length */
+  BOOL allowemptypartial;         /* Allow empty hard partial */
+  PCRE2_UCHAR nl[4];              /* Newline string when fixed */
+  uint16_t bsr_convention;        /* \R interpretation */
+  pcre2_callout_block *cb;        /* Points to a callout block */
+  void *callout_data;             /* To pass back to callouts */
+  int (*callout)(pcre2_callout_block *,void *);  /* Callout function or NULL */
+  dfa_recursion_info *recursive;  /* Linked list of recursion data */
+} dfa_match_block;
+
+#endif  /* PCRE2_PCRE2TEST */
+
+/* End of pcre2_intmodedep.h */
--- a/third_party/pcre/pcre2_maketables.c
+++ b/third_party/pcre/pcre2_maketables.c
@ -0,0 +1,163 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains the external function pcre2_maketables(), which builds
+character tables for PCRE2 in the current locale. The file is compiled on its
+own as part of the PCRE2 library. It is also included in the compilation of
+pcre2_dftables.c as a freestanding program, in which case the macro
+PCRE2_DFTABLES is defined. */
+
+#ifndef PCRE2_DFTABLES    /* Compiling the library */
+#  ifdef HAVE_CONFIG_H
+#  include "config.h"
+#  endif
+#  include "pcre2_internal.h"
+#endif
+
+
+
+/*************************************************
+*           Create PCRE2 character tables        *
+*************************************************/
+
+/* This function builds a set of character tables for use by PCRE2 and returns
+a pointer to them. They are build using the ctype functions, and consequently
+their contents will depend upon the current locale setting. When compiled as
+part of the library, the store is obtained via a general context malloc, if
+supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
+freestanding auxiliary program) malloc() is used, and the function has a
+different name so as not to clash with the prototype in pcre2.h.
+
+Arguments:   none when PCRE2_DFTABLES is defined
+               else a PCRE2 general context or NULL
+Returns:     pointer to the contiguous block of data
+               else NULL if memory allocation failed
+*/
+
+#ifdef PCRE2_DFTABLES  /* Included in freestanding pcre2_dftables program */
+static const uint8_t *maketables(void)
+{
+uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
+
+#else  /* Not PCRE2_DFTABLES, that is, compiling the library */
+PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
+pcre2_maketables(pcre2_general_context *gcontext)
+{
+uint8_t *yield = (uint8_t *)((gcontext != NULL)?
+  gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
+  malloc(TABLES_LENGTH));
+#endif  /* PCRE2_DFTABLES */
+
+int i;
+uint8_t *p;
+
+if (yield == NULL) return NULL;
+p = yield;
+
+/* First comes the lower casing table */
+
+for (i = 0; i < 256; i++) *p++ = tolower(i);
+
+/* Next the case-flipping table */
+
+for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
+
+/* Then the character class tables. Don't try to be clever and save effort on
+exclusive ones - in some locales things may be different.
+
+Note that the table for "space" includes everything "isspace" gives, including
+VT in the default locale. This makes it work for the POSIX class [:space:].
+From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
+space, because Perl added VT at release 5.18.
+
+Note also that it is possible for a character to be alnum or alpha without
+being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
+fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
+test for alnum specially. */
+
+memset(p, 0, cbit_length);
+for (i = 0; i < 256; i++)
+  {
+  if (isdigit(i))  p[cbit_digit  + i/8] |= 1u << (i&7);
+  if (isupper(i))  p[cbit_upper  + i/8] |= 1u << (i&7);
+  if (islower(i))  p[cbit_lower  + i/8] |= 1u << (i&7);
+  if (isalnum(i))  p[cbit_word   + i/8] |= 1u << (i&7);
+  if (i == '_')    p[cbit_word   + i/8] |= 1u << (i&7);
+  if (isspace(i))  p[cbit_space  + i/8] |= 1u << (i&7);
+  if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
+  if (isgraph(i))  p[cbit_graph  + i/8] |= 1u << (i&7);
+  if (isprint(i))  p[cbit_print  + i/8] |= 1u << (i&7);
+  if (ispunct(i))  p[cbit_punct  + i/8] |= 1u << (i&7);
+  if (iscntrl(i))  p[cbit_cntrl  + i/8] |= 1u << (i&7);
+  }
+p += cbit_length;
+
+/* Finally, the character type table. In this, we used to exclude VT from the
+white space chars, because Perl didn't recognize it as such for \s and for
+comments within regexes. However, Perl changed at release 5.18, so PCRE1
+changed at release 8.34 and it's always been this way for PCRE2. */
+
+for (i = 0; i < 256; i++)
+  {
+  int x = 0;
+  if (isspace(i)) x += ctype_space;
+  if (isalpha(i)) x += ctype_letter;
+  if (islower(i)) x += ctype_lcletter;
+  if (isdigit(i)) x += ctype_digit;
+  if (isalnum(i) || i == '_') x += ctype_word;
+  *p++ = x;
+  }
+
+return yield;
+}
+
+#ifndef PCRE2_DFTABLES   /* Compiling the library */
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
+{
+  if (gcontext)
+    gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
+  else
+    free((void *)tables);
+}
+#endif
+
+/* End of pcre2_maketables.c */
--- a/third_party/pcre/pcre2_match.c
+++ b/third_party/pcre/pcre2_match.c
--- a/third_party/pcre/pcre2_match_data.c
+++ b/third_party/pcre/pcre2_match_data.c
@ -0,0 +1,173 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+
+/*************************************************
+*  Create a match data block given ovector size  *
+*************************************************/
+
+/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
+imposed because the oveccount field in a match data block is uintt6_t. */
+
+PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
+pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
+{
+pcre2_match_data *yield;
+if (oveccount < 1) oveccount = 1;
+if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
+yield = PRIV(memctl_malloc)(
+  offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
+  (pcre2_memctl *)gcontext);
+if (yield == NULL) return NULL;
+yield->oveccount = oveccount;
+yield->flags = 0;
+yield->heapframes = NULL;
+yield->heapframes_size = 0;
+return yield;
+}
+
+
+
+/*************************************************
+*  Create a match data block using pattern data  *
+*************************************************/
+
+/* If no context is supplied, use the memory allocator from the code. */
+
+PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
+pcre2_match_data_create_from_pattern(const pcre2_code *code,
+  pcre2_general_context *gcontext)
+{
+if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
+return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
+  gcontext);
+}
+
+
+
+/*************************************************
+*            Free a match data block             *
+*************************************************/
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_match_data_free(pcre2_match_data *match_data)
+{
+if (match_data != NULL)
+  {
+  if (match_data->heapframes != NULL)
+    match_data->memctl.free(match_data->heapframes,
+      match_data->memctl.memory_data);
+  if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
+    match_data->memctl.free((void *)match_data->subject,
+      match_data->memctl.memory_data);
+  match_data->memctl.free(match_data, match_data->memctl.memory_data);
+  }
+}
+
+
+
+/*************************************************
+*         Get last mark in match                 *
+*************************************************/
+
+PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
+pcre2_get_mark(pcre2_match_data *match_data)
+{
+return match_data->mark;
+}
+
+
+
+/*************************************************
+*          Get pointer to ovector                *
+*************************************************/
+
+PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
+pcre2_get_ovector_pointer(pcre2_match_data *match_data)
+{
+return match_data->ovector;
+}
+
+
+
+/*************************************************
+*          Get number of ovector slots           *
+*************************************************/
+
+PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
+pcre2_get_ovector_count(pcre2_match_data *match_data)
+{
+return match_data->oveccount;
+}
+
+
+
+/*************************************************
+*         Get starting code unit in match        *
+*************************************************/
+
+PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
+pcre2_get_startchar(pcre2_match_data *match_data)
+{
+return match_data->startchar;
+}
+
+
+
+/*************************************************
+*         Get size of match data block           *
+*************************************************/
+
+PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
+pcre2_get_match_data_size(pcre2_match_data *match_data)
+{
+return offsetof(pcre2_match_data, ovector) +
+  2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
+}
+
+/* End of pcre2_match_data.c */
--- a/third_party/pcre/pcre2_newline.c
+++ b/third_party/pcre/pcre2_newline.c
@ -0,0 +1,243 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+         New API code Copyright (c) 2016 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains internal functions for testing newlines when more than
+one kind of newline is to be recognized. When a newline is found, its length is
+returned. In principle, we could implement several newline "types", each
+referring to a different set of newline characters. At present, PCRE2 supports
+only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
+and NLTYPE_ANY. The full list of Unicode newline characters is taken from
+http://unicode.org/unicode/reports/tr18/. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+
+/*************************************************
+*      Check for newline at given position       *
+*************************************************/
+
+/* This function is called only via the IS_NEWLINE macro, which does so only
+when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
+newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
+pointed to by ptr is less than the end of the string.
+
+Arguments:
+  ptr          pointer to possible newline
+  type         the newline type
+  endptr       pointer to the end of the string
+  lenptr       where to return the length
+  utf          TRUE if in utf mode
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
+PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
+  uint32_t *lenptr, BOOL utf)
+{
+uint32_t c;
+
+#ifdef SUPPORT_UNICODE
+if (utf) { GETCHAR(c, ptr); } else c = *ptr;
+#else
+(void)utf;
+c = *ptr;
+#endif  /* SUPPORT_UNICODE */
+
+if (type == NLTYPE_ANYCRLF) switch(c)
+  {
+  case CHAR_LF:
+  *lenptr = 1;
+  return TRUE;
+
+  case CHAR_CR:
+  *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+  return TRUE;
+
+  default:
+  return FALSE;
+  }
+
+/* NLTYPE_ANY */
+
+else switch(c)
+  {
+#ifdef EBCDIC
+  case CHAR_NEL:
+#endif
+  case CHAR_LF:
+  case CHAR_VT:
+  case CHAR_FF:
+  *lenptr = 1;
+  return TRUE;
+
+  case CHAR_CR:
+  *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
+  return TRUE;
+
+#ifndef EBCDIC
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  case CHAR_NEL:
+  *lenptr = utf? 2 : 1;
+  return TRUE;
+
+  case 0x2028:   /* LS */
+  case 0x2029:   /* PS */
+  *lenptr = 3;
+  return TRUE;
+
+#else  /* 16-bit or 32-bit code units */
+  case CHAR_NEL:
+  case 0x2028:   /* LS */
+  case 0x2029:   /* PS */
+  *lenptr = 1;
+  return TRUE;
+#endif
+#endif /* Not EBCDIC */
+
+  default:
+  return FALSE;
+  }
+}
+
+
+
+/*************************************************
+*     Check for newline at previous position     *
+*************************************************/
+
+/* This function is called only via the WAS_NEWLINE macro, which does so only
+when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
+newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
+value of ptr is greater than the start of the string that is being processed.
+
+Arguments:
+  ptr          pointer to possible newline
+  type         the newline type
+  startptr     pointer to the start of the string
+  lenptr       where to return the length
+  utf          TRUE if in utf mode
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
+PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
+  uint32_t *lenptr, BOOL utf)
+{
+uint32_t c;
+ptr--;
+
+#ifdef SUPPORT_UNICODE
+if (utf)
+  {
+  BACKCHAR(ptr);
+  GETCHAR(c, ptr);
+  }
+else c = *ptr;
+#else
+(void)utf;
+c = *ptr;
+#endif  /* SUPPORT_UNICODE */
+
+if (type == NLTYPE_ANYCRLF) switch(c)
+  {
+  case CHAR_LF:
+  *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+  return TRUE;
+
+  case CHAR_CR:
+  *lenptr = 1;
+  return TRUE;
+
+  default:
+  return FALSE;
+  }
+
+/* NLTYPE_ANY */
+
+else switch(c)
+  {
+  case CHAR_LF:
+  *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
+  return TRUE;
+
+#ifdef EBCDIC
+  case CHAR_NEL:
+#endif
+  case CHAR_VT:
+  case CHAR_FF:
+  case CHAR_CR:
+  *lenptr = 1;
+  return TRUE;
+
+#ifndef EBCDIC
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  case CHAR_NEL:
+  *lenptr = utf? 2 : 1;
+  return TRUE;
+
+  case 0x2028:   /* LS */
+  case 0x2029:   /* PS */
+  *lenptr = 3;
+  return TRUE;
+
+#else /* 16-bit or 32-bit code units */
+  case CHAR_NEL:
+  case 0x2028:   /* LS */
+  case 0x2029:   /* PS */
+  *lenptr = 1;
+  return TRUE;
+#endif
+#endif /* Not EBCDIC */
+
+  default:
+  return FALSE;
+  }
+}
+
+/* End of pcre2_newline.c */
--- a/third_party/pcre/pcre2_ord2utf.c
+++ b/third_party/pcre/pcre2_ord2utf.c
@ -0,0 +1,120 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+         New API code Copyright (c) 2016 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This file contains a function that converts a Unicode character code point
+into a UTF string. The behaviour is different for each code unit width. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/* If SUPPORT_UNICODE is not defined, this function will never be called.
+Supply a dummy function because some compilers do not like empty source
+modules. */
+
+#ifndef SUPPORT_UNICODE
+unsigned int
+PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
+{
+(void)(cvalue);
+(void)(buffer);
+return 0;
+}
+#else  /* SUPPORT_UNICODE */
+
+
+/*************************************************
+*          Convert code point to UTF             *
+*************************************************/
+
+/*
+Arguments:
+  cvalue     the character value
+  buffer     pointer to buffer for result
+
+Returns:     number of code units placed in the buffer
+*/
+
+unsigned int
+PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
+{
+/* Convert to UTF-8 */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+int i, j;
+for (i = 0; i < PRIV(utf8_table1_size); i++)
+  if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*buffer = PRIV(utf8_table2)[i] | cvalue;
+return i + 1;
+
+/* Convert to UTF-16 */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+if (cvalue <= 0xffff)
+  {
+  *buffer = (PCRE2_UCHAR)cvalue;
+  return 1;
+  }
+cvalue -= 0x10000;
+*buffer++ = 0xd800 | (cvalue >> 10);
+*buffer = 0xdc00 | (cvalue & 0x3ff);
+return 2;
+
+/* Convert to UTF-32 */
+
+#else
+*buffer = (PCRE2_UCHAR)cvalue;
+return 1;
+#endif
+}
+#endif  /* SUPPORT_UNICODE */
+
+/* End of pcre_ord2utf.c */
--- a/third_party/pcre/pcre2_pattern_info.c
+++ b/third_party/pcre/pcre2_pattern_info.c
@ -0,0 +1,432 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+*        Return info about compiled pattern      *
+*************************************************/
+
+/*
+Arguments:
+  code          points to compiled code
+  what          what information is required
+  where         where to put the information; if NULL, return length
+
+Returns:        0 when data returned
+                > 0 when length requested
+                < 0 on error or unset value
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
+{
+const pcre2_real_code *re = (pcre2_real_code *)code;
+
+if (where == NULL)   /* Requests field length */
+  {
+  switch(what)
+    {
+    case PCRE2_INFO_ALLOPTIONS:
+    case PCRE2_INFO_ARGOPTIONS:
+    case PCRE2_INFO_BACKREFMAX:
+    case PCRE2_INFO_BSR:
+    case PCRE2_INFO_CAPTURECOUNT:
+    case PCRE2_INFO_DEPTHLIMIT:
+    case PCRE2_INFO_EXTRAOPTIONS:
+    case PCRE2_INFO_FIRSTCODETYPE:
+    case PCRE2_INFO_FIRSTCODEUNIT:
+    case PCRE2_INFO_HASBACKSLASHC:
+    case PCRE2_INFO_HASCRORLF:
+    case PCRE2_INFO_HEAPLIMIT:
+    case PCRE2_INFO_JCHANGED:
+    case PCRE2_INFO_LASTCODETYPE:
+    case PCRE2_INFO_LASTCODEUNIT:
+    case PCRE2_INFO_MATCHEMPTY:
+    case PCRE2_INFO_MATCHLIMIT:
+    case PCRE2_INFO_MAXLOOKBEHIND:
+    case PCRE2_INFO_MINLENGTH:
+    case PCRE2_INFO_NAMEENTRYSIZE:
+    case PCRE2_INFO_NAMECOUNT:
+    case PCRE2_INFO_NEWLINE:
+    return sizeof(uint32_t);
+
+    case PCRE2_INFO_FIRSTBITMAP:
+    return sizeof(const uint8_t *);
+
+    case PCRE2_INFO_JITSIZE:
+    case PCRE2_INFO_SIZE:
+    case PCRE2_INFO_FRAMESIZE:
+    return sizeof(size_t);
+
+    case PCRE2_INFO_NAMETABLE:
+    return sizeof(PCRE2_SPTR);
+    }
+  }
+
+if (re == NULL) return PCRE2_ERROR_NULL;
+
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE2_ERROR_BADMAGIC. */
+
+if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
+
+/* Check that this pattern was compiled in the correct bit mode */
+
+if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
+
+switch(what)
+  {
+  case PCRE2_INFO_ALLOPTIONS:
+  *((uint32_t *)where) = re->overall_options;
+  break;
+
+  case PCRE2_INFO_ARGOPTIONS:
+  *((uint32_t *)where) = re->compile_options;
+  break;
+
+  case PCRE2_INFO_BACKREFMAX:
+  *((uint32_t *)where) = re->top_backref;
+  break;
+
+  case PCRE2_INFO_BSR:
+  *((uint32_t *)where) = re->bsr_convention;
+  break;
+
+  case PCRE2_INFO_CAPTURECOUNT:
+  *((uint32_t *)where) = re->top_bracket;
+  break;
+
+  case PCRE2_INFO_DEPTHLIMIT:
+  *((uint32_t *)where) = re->limit_depth;
+  if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
+  break;
+
+  case PCRE2_INFO_EXTRAOPTIONS:
+  *((uint32_t *)where) = re->extra_options;
+  break;
+
+  case PCRE2_INFO_FIRSTCODETYPE:
+  *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
+                         ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
+  break;
+
+  case PCRE2_INFO_FIRSTCODEUNIT:
+  *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
+    re->first_codeunit : 0;
+  break;
+
+  case PCRE2_INFO_FIRSTBITMAP:
+  *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
+    &(re->start_bitmap[0]) : NULL;
+  break;
+
+  case PCRE2_INFO_FRAMESIZE:
+  *((size_t *)where) = offsetof(heapframe, ovector) +
+    re->top_bracket * 2 * sizeof(PCRE2_SIZE);
+  break;
+
+  case PCRE2_INFO_HASBACKSLASHC:
+  *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
+  break;
+
+  case PCRE2_INFO_HASCRORLF:
+  *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
+  break;
+
+  case PCRE2_INFO_HEAPLIMIT:
+  *((uint32_t *)where) = re->limit_heap;
+  if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
+  break;
+
+  case PCRE2_INFO_JCHANGED:
+  *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
+  break;
+
+  case PCRE2_INFO_JITSIZE:
+#ifdef SUPPORT_JIT
+  *((size_t *)where) = (re->executable_jit != NULL)?
+    PRIV(jit_get_size)(re->executable_jit) : 0;
+#else
+  *((size_t *)where) = 0;
+#endif
+  break;
+
+  case PCRE2_INFO_LASTCODETYPE:
+  *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
+  break;
+
+  case PCRE2_INFO_LASTCODEUNIT:
+  *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
+    re->last_codeunit : 0;
+  break;
+
+  case PCRE2_INFO_MATCHEMPTY:
+  *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
+  break;
+
+  case PCRE2_INFO_MATCHLIMIT:
+  *((uint32_t *)where) = re->limit_match;
+  if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
+  break;
+
+  case PCRE2_INFO_MAXLOOKBEHIND:
+  *((uint32_t *)where) = re->max_lookbehind;
+  break;
+
+  case PCRE2_INFO_MINLENGTH:
+  *((uint32_t *)where) = re->minlength;
+  break;
+
+  case PCRE2_INFO_NAMEENTRYSIZE:
+  *((uint32_t *)where) = re->name_entry_size;
+  break;
+
+  case PCRE2_INFO_NAMECOUNT:
+  *((uint32_t *)where) = re->name_count;
+  break;
+
+  case PCRE2_INFO_NAMETABLE:
+  *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
+  break;
+
+  case PCRE2_INFO_NEWLINE:
+  *((uint32_t *)where) = re->newline_convention;
+  break;
+
+  case PCRE2_INFO_SIZE:
+  *((size_t *)where) = re->blocksize;
+  break;
+
+  default: return PCRE2_ERROR_BADOPTION;
+  }
+
+return 0;
+}
+
+
+
+/*************************************************
+*              Callout enumerator                *
+*************************************************/
+
+/*
+Arguments:
+  code          points to compiled code
+  callback      function called for each callout block
+  callout_data  user data passed to the callback
+
+Returns:        0 when successfully completed
+                < 0 on local error
+               != 0 for callback error
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_callout_enumerate(const pcre2_code *code,
+  int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
+{
+pcre2_real_code *re = (pcre2_real_code *)code;
+pcre2_callout_enumerate_block cb;
+PCRE2_SPTR cc;
+#ifdef SUPPORT_UNICODE
+BOOL utf;
+#endif
+
+if (re == NULL) return PCRE2_ERROR_NULL;
+
+#ifdef SUPPORT_UNICODE
+utf = (re->overall_options & PCRE2_UTF) != 0;
+#endif
+
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE2_ERROR_BADMAGIC. */
+
+if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
+
+/* Check that this pattern was compiled in the correct bit mode */
+
+if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
+
+cb.version = 0;
+cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
+     + re->name_count * re->name_entry_size;
+
+while (TRUE)
+  {
+  int rc;
+  switch (*cc)
+    {
+    case OP_END:
+    return 0;
+
+    case OP_CHAR:
+    case OP_CHARI:
+    case OP_NOT:
+    case OP_NOTI:
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_EXACT:
+    case OP_POSSTAR:
+    case OP_POSPLUS:
+    case OP_POSQUERY:
+    case OP_POSUPTO:
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_EXACTI:
+    case OP_POSSTARI:
+    case OP_POSPLUSI:
+    case OP_POSQUERYI:
+    case OP_POSUPTOI:
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTEXACT:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPOSPLUS:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSUPTO:
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTEXACTI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTPOSQUERYI:
+    case OP_NOTPOSUPTOI:
+    cc += PRIV(OP_lengths)[*cc];
+#ifdef SUPPORT_UNICODE
+    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+    break;
+
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEEXACT:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEPOSQUERY:
+    case OP_TYPEPOSUPTO:
+    cc += PRIV(OP_lengths)[*cc];
+#ifdef SUPPORT_UNICODE
+    if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
+#endif
+    break;
+
+#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
+    case OP_XCLASS:
+    cc += GET(cc, 1);
+    break;
+#endif
+
+    case OP_MARK:
+    case OP_COMMIT_ARG:
+    case OP_PRUNE_ARG:
+    case OP_SKIP_ARG:
+    case OP_THEN_ARG:
+    cc += PRIV(OP_lengths)[*cc] + cc[1];
+    break;
+
+    case OP_CALLOUT:
+    cb.pattern_position = GET(cc, 1);
+    cb.next_item_length = GET(cc, 1 + LINK_SIZE);
+    cb.callout_number = cc[1 + 2*LINK_SIZE];
+    cb.callout_string_offset = 0;
+    cb.callout_string_length = 0;
+    cb.callout_string = NULL;
+    rc = callback(&cb, callout_data);
+    if (rc != 0) return rc;
+    cc += PRIV(OP_lengths)[*cc];
+    break;
+
+    case OP_CALLOUT_STR:
+    cb.pattern_position = GET(cc, 1);
+    cb.next_item_length = GET(cc, 1 + LINK_SIZE);
+    cb.callout_number = 0;
+    cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
+    cb.callout_string_length =
+      GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
+    cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
+    rc = callback(&cb, callout_data);
+    if (rc != 0) return rc;
+    cc += GET(cc, 1 + 2*LINK_SIZE);
+    break;
+
+    default:
+    cc += PRIV(OP_lengths)[*cc];
+    break;
+    }
+  }
+}
+
+/* End of pcre2_pattern_info.c */
--- a/third_party/pcre/pcre2_printint.inc
+++ b/third_party/pcre/pcre2_printint.inc
@ -0,0 +1,868 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains a PCRE private debugging function for printing out the
+internal form of a compiled regular expression, along with some supporting
+local functions. This source file is #included in pcre2test.c at each supported
+code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
+that comprise the library. It can also optionally be included in
+pcre2_compile.c for detailed debugging in error situations. */
+
+
+/* Tables of operator names. The same 8-bit table is used for all code unit
+widths, so it must be defined only once. The list itself is defined in
+pcre2_internal.h, which is #included by pcre2test before this file. */
+
+#ifndef OP_LISTS_DEFINED
+static const char *OP_names[] = { OP_NAME_LIST };
+#define OP_LISTS_DEFINED
+#endif
+
+/* The functions and tables herein must all have mode-dependent names. */
+
+#define OP_lengths            PCRE2_SUFFIX(OP_lengths_)
+#define get_ucpname           PCRE2_SUFFIX(get_ucpname_)
+#define pcre2_printint        PCRE2_SUFFIX(pcre2_printint_)
+#define print_char            PCRE2_SUFFIX(print_char_)
+#define print_custring        PCRE2_SUFFIX(print_custring_)
+#define print_custring_bylen  PCRE2_SUFFIX(print_custring_bylen_)
+#define print_prop            PCRE2_SUFFIX(print_prop_)
+
+/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
+the definition is next to the definition of the opcodes in pcre2_internal.h.
+The contents of the table are, however, mode-dependent. */
+
+static const uint8_t OP_lengths[] = { OP_LENGTHS };
+
+
+
+/*************************************************
+*       Print one character from a string        *
+*************************************************/
+
+/* In UTF mode the character may occupy more than one code unit.
+
+Arguments:
+  f           file to write to
+  ptr         pointer to first code unit of the character
+  utf         TRUE if string is UTF (will be FALSE if UTF is not supported)
+
+Returns:      number of additional code units used
+*/
+
+static unsigned int
+print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
+{
+uint32_t c = *ptr;
+BOOL one_code_unit = !utf;
+
+/* If UTF is supported and requested, check for a valid single code unit. */
+
+#ifdef SUPPORT_UNICODE
+if (utf)
+  {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  one_code_unit = c < 0x80;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+  one_code_unit = (c & 0xfc00) != 0xd800;
+#else
+  one_code_unit = (c & 0xfffff800u) != 0xd800u;
+#endif  /* CODE_UNIT_WIDTH */
+  }
+#endif  /* SUPPORT_UNICODE */
+
+/* Handle a valid one-code-unit character at any width. */
+
+if (one_code_unit)
+  {
+  if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
+  else if (c < 0x80) fprintf(f, "\\x%02x", c);
+  else fprintf(f, "\\x{%02x}", c);
+  return 0;
+  }
+
+/* Code for invalid UTF code units and multi-unit UTF characters is different
+for each width. If UTF is not supported, control should never get here, but we
+need a return statement to keep the compiler happy. */
+
+#ifndef SUPPORT_UNICODE
+return 0;
+#else
+
+/* Malformed UTF-8 should occur only if the sanity check has been turned off.
+Rather than swallow random bytes, just stop if we hit a bad one. Print it with
+\X instead of \x as an indication. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+if ((c & 0xc0) != 0xc0)
+  {
+  fprintf(f, "\\X{%x}", c);       /* Invalid starting byte */
+  return 0;
+  }
+else
+  {
+  int i;
+  int a = PRIV(utf8_table4)[c & 0x3f];  /* Number of additional bytes */
+  int s = 6*a;
+  c = (c & PRIV(utf8_table3)[a]) << s;
+  for (i = 1; i <= a; i++)
+    {
+    if ((ptr[i] & 0xc0) != 0x80)
+      {
+      fprintf(f, "\\X{%x}", c);   /* Invalid secondary byte */
+      return i - 1;
+      }
+    s -= 6;
+    c |= (ptr[i] & 0x3f) << s;
+    }
+  fprintf(f, "\\x{%x}", c);
+  return a;
+}
+#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
+
+/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
+Print it with \X instead of \x as an indication. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 16
+if ((ptr[1] & 0xfc00) != 0xdc00)
+  {
+  fprintf(f, "\\X{%x}", c);
+  return 0;
+  }
+c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
+fprintf(f, "\\x{%x}", c);
+return 1;
+#endif  /* PCRE2_CODE_UNIT_WIDTH == 16 */
+
+/* For UTF-32 we get here only for a malformed code unit, which should only
+occur if the sanity check has been turned off. Print it with \X instead of \x
+as an indication. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+fprintf(f, "\\X{%x}", c);
+return 0;
+#endif  /* PCRE2_CODE_UNIT_WIDTH == 32 */
+#endif  /* SUPPORT_UNICODE */
+}
+
+
+
+/*************************************************
+*     Print string as a list of code units       *
+*************************************************/
+
+/* These take no account of UTF as they always print each individual code unit.
+The string is zero-terminated for print_custring(); the length is given for
+print_custring_bylen().
+
+Arguments:
+  f          file to write to
+  ptr        point to the string
+  len        length for print_custring_bylen()
+
+Returns:     nothing
+*/
+
+static void
+print_custring(FILE *f, PCRE2_SPTR ptr)
+{
+while (*ptr != '\0')
+  {
+  uint32_t c = *ptr++;
+  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
+  }
+}
+
+static void
+print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
+{
+for (; len > 0; len--)
+  {
+  uint32_t c = *ptr++;
+  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
+  }
+}
+
+
+
+/*************************************************
+*          Find Unicode property name            *
+*************************************************/
+
+/* When there is no UTF/UCP support, the table of names does not exist. This
+function should not be called in such configurations, because a pattern that
+tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
+into the main code, however, we just put one into this function.
+
+Now that the table contains both full names and their abbreviations, we do some
+fiddling to try to get the full name, which is either the longer of two found
+names, or a 3-character script name. */
+
+static const char *
+get_ucpname(unsigned int ptype, unsigned int pvalue)
+{
+#ifdef SUPPORT_UNICODE
+int count = 0;
+const char *yield = "??";
+size_t len = 0;
+unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
+
+for (int i = PRIV(utt_size) - 1; i >= 0; i--)
+  {
+  const ucp_type_table *u = PRIV(utt) + i;
+
+  if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
+    {
+    const char *s = PRIV(utt_names) + u->name_offset;
+    size_t sl = strlen(s);
+
+    if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
+      {
+      yield = s;
+      break;
+      }
+
+    if (sl > len)
+      {
+      yield = s;
+      len = sl;
+      }
+
+    if (++count >= 2) break;
+    }
+  }
+
+return yield;
+
+#else   /* No UTF support */
+(void)ptype;
+(void)pvalue;
+return "??";
+#endif  /* SUPPORT_UNICODE */
+}
+
+
+
+/*************************************************
+*       Print Unicode property value             *
+*************************************************/
+
+/* "Normal" properties can be printed from tables. The PT_CLIST property is a
+pseudo-property that contains a pointer to a list of case-equivalent
+characters.
+
+Arguments:
+  f            file to write to
+  code         pointer in the compiled code
+  before       text to print before
+  after        text to print after
+
+Returns:       nothing
+*/
+
+static void
+print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
+{
+if (code[1] != PT_CLIST)
+  {
+  const char *sc = (code[1] == PT_SC)? "script:" : "";
+  const char *s = get_ucpname(code[1], code[2]);
+  fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
+  }
+else
+  {
+  const char *not = (*code == OP_PROP)? "" : "not ";
+  const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
+  fprintf (f, "%s%sclist", before, not);
+  while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
+  fprintf(f, "%s", after);
+  }
+}
+
+
+
+/*************************************************
+*            Print compiled pattern              *
+*************************************************/
+
+/* The print_lengths flag controls whether offsets and lengths of items are
+printed. Lenths can be turned off from pcre2test so that automatic tests on
+bytecode can be written that do not depend on the value of LINK_SIZE.
+
+Arguments:
+  re              a compiled pattern
+  f               the file to write to
+  print_lengths   show various lengths
+
+Returns:          nothing
+*/
+
+static void
+pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
+{
+PCRE2_SPTR codestart, nametable, code;
+uint32_t nesize = re->name_entry_size;
+BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
+
+nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
+code = codestart = nametable + re->name_count * re->name_entry_size;
+
+for(;;)
+  {
+  PCRE2_SPTR ccode;
+  uint32_t c;
+  int i;
+  const char *flag = "  ";
+  unsigned int extra = 0;
+
+  if (print_lengths)
+    fprintf(f, "%3d ", (int)(code - codestart));
+  else
+    fprintf(f, "    ");
+
+  switch(*code)
+    {
+/* ========================================================================== */
+      /* These cases are never obeyed. This is a fudge that causes a compile-
+      time error if the vectors OP_names or OP_lengths, which are indexed
+      by opcode, are not the correct length. It seems to be the only way to do
+      such a check at compile time, as the sizeof() operator does not work in
+      the C preprocessor. */
+
+      case OP_TABLE_LENGTH:
+      case OP_TABLE_LENGTH +
+        ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
+        (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
+      return;
+/* ========================================================================== */
+
+    case OP_END:
+    fprintf(f, "    %s\n", OP_names[*code]);
+    fprintf(f, "------------------------------------------------------------------\n");
+    return;
+
+    case OP_CHAR:
+    fprintf(f, "    ");
+    do
+      {
+      code++;
+      code += 1 + print_char(f, code, utf);
+      }
+    while (*code == OP_CHAR);
+    fprintf(f, "\n");
+    continue;
+
+    case OP_CHARI:
+    fprintf(f, " /i ");
+    do
+      {
+      code++;
+      code += 1 + print_char(f, code, utf);
+      }
+    while (*code == OP_CHARI);
+    fprintf(f, "\n");
+    continue;
+
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    case OP_SCBRA:
+    case OP_SCBRAPOS:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
+    break;
+
+    case OP_BRA:
+    case OP_BRAPOS:
+    case OP_SBRA:
+    case OP_SBRAPOS:
+    case OP_KETRMAX:
+    case OP_KETRMIN:
+    case OP_KETRPOS:
+    case OP_ALT:
+    case OP_KET:
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    case OP_ASSERT_NA:
+    case OP_ASSERTBACK_NA:
+    case OP_ONCE:
+    case OP_SCRIPT_RUN:
+    case OP_COND:
+    case OP_SCOND:
+    case OP_REVERSE:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s", OP_names[*code]);
+    break;
+
+    case OP_CLOSE:
+    fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
+    break;
+
+    case OP_CREF:
+    fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
+    break;
+
+    case OP_DNCREF:
+      {
+      PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
+      fprintf(f, " %s Cond ref <", flag);
+      print_custring(f, entry);
+      fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+      }
+    break;
+
+    case OP_RREF:
+    c = GET2(code, 1);
+    if (c == RREF_ANY)
+      fprintf(f, "    Cond recurse any");
+    else
+      fprintf(f, "    Cond recurse %d", c);
+    break;
+
+    case OP_DNRREF:
+      {
+      PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
+      fprintf(f, " %s Cond recurse <", flag);
+      print_custring(f, entry);
+      fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+      }
+    break;
+
+    case OP_FALSE:
+    fprintf(f, "    Cond false");
+    break;
+
+    case OP_TRUE:
+    fprintf(f, "    Cond true");
+    break;
+
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_POSSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_POSPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_POSQUERYI:
+    flag = "/i";
+    /* Fall through */
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_POSSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_POSPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_POSQUERY:
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEPOSQUERY:
+    fprintf(f, " %s ", flag);
+
+    if (*code >= OP_TYPESTAR)
+      {
+      if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
+        {
+        print_prop(f, code + 1, "", " ");
+        extra = 2;
+        }
+      else fprintf(f, "%s", OP_names[code[1]]);
+      }
+    else extra = print_char(f, code+1, utf);
+    fprintf(f, "%s", OP_names[*code]);
+    break;
+
+    case OP_EXACTI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_POSUPTOI:
+    flag = "/i";
+    /* Fall through */
+    case OP_EXACT:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_POSUPTO:
+    fprintf(f, " %s ", flag);
+    extra = print_char(f, code + 1 + IMM2_SIZE, utf);
+    fprintf(f, "{");
+    if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
+      else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
+    break;
+
+    case OP_TYPEEXACT:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEPOSUPTO:
+    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+      {
+      print_prop(f, code + IMM2_SIZE + 1, "    ", " ");
+      extra = 2;
+      }
+    else fprintf(f, "    %s", OP_names[code[1 + IMM2_SIZE]]);
+    fprintf(f, "{");
+    if (*code != OP_TYPEEXACT) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
+      else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
+    break;
+
+    case OP_NOTI:
+    flag = "/i";
+    /* Fall through */
+    case OP_NOT:
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1, utf);
+    fprintf(f, "]");
+    break;
+
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTPOSQUERYI:
+    flag = "/i";
+    /* Fall through */
+
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTPOSPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTPOSQUERY:
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1, utf);
+    fprintf(f, "]%s", OP_names[*code]);
+    break;
+
+    case OP_NOTEXACTI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTPOSUPTOI:
+    flag = "/i";
+    /* Fall through */
+
+    case OP_NOTEXACT:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTPOSUPTO:
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1 + IMM2_SIZE, utf);
+    fprintf(f, "]{");
+    if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
+      else
+    if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
+    break;
+
+    case OP_RECURSE:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s", OP_names[*code]);
+    break;
+
+    case OP_REFI:
+    flag = "/i";
+    /* Fall through */
+    case OP_REF:
+    fprintf(f, " %s \\%d", flag, GET2(code,1));
+    ccode = code + OP_lengths[*code];
+    goto CLASS_REF_REPEAT;
+
+    case OP_DNREFI:
+    flag = "/i";
+    /* Fall through */
+    case OP_DNREF:
+      {
+      PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
+      fprintf(f, " %s \\k<", flag);
+      print_custring(f, entry);
+      fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+      }
+    ccode = code + OP_lengths[*code];
+    goto CLASS_REF_REPEAT;
+
+    case OP_CALLOUT:
+    fprintf(f, "    %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
+      GET(code, 1), GET(code, 1 + LINK_SIZE));
+    break;
+
+    case OP_CALLOUT_STR:
+    c = code[1 + 4*LINK_SIZE];
+    fprintf(f, "    %s %c", OP_names[*code], c);
+    extra = GET(code, 1 + 2*LINK_SIZE);
+    print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
+    for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
+      if (c == PRIV(callout_start_delims)[i])
+        {
+        c = PRIV(callout_end_delims)[i];
+        break;
+        }
+    fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
+      GET(code, 1 + LINK_SIZE));
+    break;
+
+    case OP_PROP:
+    case OP_NOTPROP:
+    print_prop(f, code, "    ", "");
+    break;
+
+    /* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
+    in having this code always here, and it makes it less messy without all
+    those #ifdefs. */
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    case OP_XCLASS:
+      {
+      unsigned int min, max;
+      BOOL printmap;
+      BOOL invertmap = FALSE;
+      uint8_t *map;
+      uint8_t inverted_map[32];
+
+      fprintf(f, "    [");
+
+      if (*code == OP_XCLASS)
+        {
+        extra = GET(code, 1);
+        ccode = code + LINK_SIZE + 1;
+        printmap = (*ccode & XCL_MAP) != 0;
+        if ((*ccode & XCL_NOT) != 0)
+          {
+          invertmap = (*ccode & XCL_HASPROP) == 0;
+          fprintf(f, "^");
+          }
+        ccode++;
+        }
+      else
+        {
+        printmap = TRUE;
+        ccode = code + 1;
+        }
+
+      /* Print a bit map */
+
+      if (printmap)
+        {
+        map = (uint8_t *)ccode;
+        if (invertmap)
+          {
+          /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
+          for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i];
+          map = inverted_map;
+          }
+
+        for (i = 0; i < 256; i++)
+          {
+          if ((map[i/8] & (1u << (i&7))) != 0)
+            {
+            int j;
+            for (j = i+1; j < 256; j++)
+              if ((map[j/8] & (1u << (j&7))) == 0) break;
+            if (i == '-' || i == ']') fprintf(f, "\\");
+            if (PRINTABLE(i)) fprintf(f, "%c", i);
+              else fprintf(f, "\\x%02x", i);
+            if (--j > i)
+              {
+              if (j != i + 1) fprintf(f, "-");
+              if (j == '-' || j == ']') fprintf(f, "\\");
+              if (PRINTABLE(j)) fprintf(f, "%c", j);
+                else fprintf(f, "\\x%02x", j);
+              }
+            i = j;
+            }
+          }
+        ccode += 32 / sizeof(PCRE2_UCHAR);
+        }
+
+      /* For an XCLASS there is always some additional data */
+
+      if (*code == OP_XCLASS)
+        {
+        PCRE2_UCHAR ch;
+        while ((ch = *ccode++) != XCL_END)
+          {
+          BOOL not = FALSE;
+          const char *notch = "";
+
+          switch(ch)
+            {
+            case XCL_NOTPROP:
+            not = TRUE;
+            notch = "^";
+            /* Fall through */
+
+            case XCL_PROP:
+              {
+              unsigned int ptype = *ccode++;
+              unsigned int pvalue = *ccode++;
+              const char *s;
+
+              switch(ptype)
+                {
+                case PT_PXGRAPH:
+                fprintf(f, "[:%sgraph:]", notch);
+                break;
+
+                case PT_PXPRINT:
+                fprintf(f, "[:%sprint:]", notch);
+                break;
+
+                case PT_PXPUNCT:
+                fprintf(f, "[:%spunct:]", notch);
+                break;
+
+                default:
+                s = get_ucpname(ptype, pvalue);
+                fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s[0]), s+1);
+                break;
+                }
+              }
+            break;
+
+            default:
+            ccode += 1 + print_char(f, ccode, utf);
+            if (ch == XCL_RANGE)
+              {
+              fprintf(f, "-");
+              ccode += 1 + print_char(f, ccode, utf);
+              }
+            break;
+            }
+          }
+        }
+
+      /* Indicate a non-UTF class which was created by negation */
+
+      fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
+
+      /* Handle repeats after a class or a back reference */
+
+      CLASS_REF_REPEAT:
+      switch(*ccode)
+        {
+        case OP_CRSTAR:
+        case OP_CRMINSTAR:
+        case OP_CRPLUS:
+        case OP_CRMINPLUS:
+        case OP_CRQUERY:
+        case OP_CRMINQUERY:
+        case OP_CRPOSSTAR:
+        case OP_CRPOSPLUS:
+        case OP_CRPOSQUERY:
+        fprintf(f, "%s", OP_names[*ccode]);
+        extra += OP_lengths[*ccode];
+        break;
+
+        case OP_CRRANGE:
+        case OP_CRMINRANGE:
+        case OP_CRPOSRANGE:
+        min = GET2(ccode,1);
+        max = GET2(ccode,1 + IMM2_SIZE);
+        if (max == 0) fprintf(f, "{%u,}", min);
+        else fprintf(f, "{%u,%u}", min, max);
+        if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
+        else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
+        extra += OP_lengths[*ccode];
+        break;
+
+        /* Do nothing if it's not a repeat; this code stops picky compilers
+        warning about the lack of a default code path. */
+
+        default:
+        break;
+        }
+      }
+    break;
+
+    case OP_MARK:
+    case OP_COMMIT_ARG:
+    case OP_PRUNE_ARG:
+    case OP_SKIP_ARG:
+    case OP_THEN_ARG:
+    fprintf(f, "    %s ", OP_names[*code]);
+    print_custring_bylen(f, code + 2, code[1]);
+    extra += code[1];
+    break;
+
+    case OP_THEN:
+    fprintf(f, "    %s", OP_names[*code]);
+    break;
+
+    case OP_CIRCM:
+    case OP_DOLLM:
+    flag = "/m";
+    /* Fall through */
+
+    /* Anything else is just an item with no data, but possibly a flag. */
+
+    default:
+    fprintf(f, " %s %s", flag, OP_names[*code]);
+    break;
+    }
+
+  code += OP_lengths[*code] + extra;
+  fprintf(f, "\n");
+  }
+}
+
+/* End of pcre2_printint.c */
--- a/third_party/pcre/pcre2_script_run.c
+++ b/third_party/pcre/pcre2_script_run.c
@ -0,0 +1,344 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains the function for checking a script run. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+*                Check script run                *
+*************************************************/
+
+/* A script run is conceptually a sequence of characters all in the same
+Unicode script. However, it isn't quite that simple. There are special rules
+for scripts that are commonly used together, and also special rules for digits.
+This function implements the appropriate checks, which is possible only when
+PCRE2 is compiled with Unicode support. The function returns TRUE if there is
+no Unicode support; however, it should never be called in that circumstance
+because an error is given by pcre2_compile() if a script run is called for in a
+version of PCRE2 compiled without Unicode support.
+
+Arguments:
+  pgr       point to the first character
+  endptr    point after the last character
+  utf       TRUE if in UTF mode
+
+Returns:    TRUE if this is a valid script run
+*/
+
+/* These are states in the checking process. */
+
+enum { SCRIPT_UNSET,          /* Requirement as yet unknown */
+       SCRIPT_MAP,            /* Bitmap contains acceptable scripts */
+       SCRIPT_HANPENDING,     /* Have had only Han characters */
+       SCRIPT_HANHIRAKATA,    /* Expect Han or Hirikata */
+       SCRIPT_HANBOPOMOFO,    /* Expect Han or Bopomofo */
+       SCRIPT_HANHANGUL       /* Expect Han or Hangul */
+       };
+
+#define UCD_MAPSIZE (ucp_Unknown/32 + 1)
+#define FULL_MAPSIZE (ucp_Script_Count/32 + 1)
+
+BOOL
+PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
+{
+#ifdef SUPPORT_UNICODE
+uint32_t require_state = SCRIPT_UNSET;
+uint32_t require_map[FULL_MAPSIZE];
+uint32_t map[FULL_MAPSIZE];
+uint32_t require_digitset = 0;
+uint32_t c;
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+(void)utf;    /* Avoid compiler warning */
+#endif
+
+/* Any string containing fewer than 2 characters is a valid script run. */
+
+if (ptr >= endptr) return TRUE;
+GETCHARINCTEST(c, ptr);
+if (ptr >= endptr) return TRUE;
+
+/* Initialize the require map. This is a full-size bitmap that has a bit for
+every script, as opposed to the maps in ucd_script_sets, which only have bits
+for scripts less than ucp_Unknown - those that appear in script extension
+lists. */
+
+for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0;
+
+/* Scan strings of two or more characters, checking the Unicode characteristics
+of each code point. There is special code for scripts that can be combined with
+characters from the Han Chinese script. This may be used in conjunction with
+four other scripts in these combinations:
+
+. Han with Hiragana and Katakana is allowed (for Japanese).
+. Han with Bopomofo is allowed (for Taiwanese Mandarin).
+. Han with Hangul is allowed (for Korean).
+
+If the first significant character's script is one of the four, the required
+script type is immediately known. However, if the first significant
+character's script is Han, we have to keep checking for a non-Han character.
+Hence the SCRIPT_HANPENDING state. */
+
+for (;;)
+  {
+  const ucd_record *ucd = GET_UCD(c);
+  uint32_t script = ucd->script;
+
+  /* If the script is Unknown, the string is not a valid script run. Such
+  characters can only form script runs of length one (see test above). */
+
+  if (script == ucp_Unknown) return FALSE;
+
+  /* A character without any script extensions whose script is Inherited or
+  Common is always accepted with any script. If there are extensions, the
+  following processing happens for all scripts. */
+
+  if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
+    {
+    BOOL OK;
+
+    /* Set up a full-sized map for this character that can include bits for all
+    scripts. Copy the scriptx map for this character (which covers those
+    scripts that appear in script extension lists), set the remaining values to
+    zero, and then, except for Common or Inherited, add this script's bit to
+    the map. */
+
+    memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
+    memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
+    if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
+
+    /* Handle the different checking states */
+
+    switch(require_state)
+      {
+      /* First significant character - it might follow Common or Inherited
+      characters that do not have any script extensions. */
+
+      case SCRIPT_UNSET:
+      switch(script)
+        {
+        case ucp_Han:
+        require_state = SCRIPT_HANPENDING;
+        break;
+
+        case ucp_Hiragana:
+        case ucp_Katakana:
+        require_state = SCRIPT_HANHIRAKATA;
+        break;
+
+        case ucp_Bopomofo:
+        require_state = SCRIPT_HANBOPOMOFO;
+        break;
+
+        case ucp_Hangul:
+        require_state = SCRIPT_HANHANGUL;
+        break;
+
+        default:
+        memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t));
+        require_state = SCRIPT_MAP;
+        break;
+        }
+      break;
+
+      /* The first significant character was Han. An inspection of the Unicode
+      11.0.0 files shows that there are the following types of Script Extension
+      list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul
+      scripts:
+
+      . Bopomofo + Han
+      . Han + Hiragana + Katakana
+      . Hiragana + Katakana
+      . Bopopmofo + Hangul + Han + Hiragana + Katakana
+
+      The following code tries to make sense of this. */
+
+#define FOUND_BOPOMOFO 1
+#define FOUND_HIRAGANA 2
+#define FOUND_KATAKANA 4
+#define FOUND_HANGUL   8
+
+      case SCRIPT_HANPENDING:
+      if (script != ucp_Han)   /* Another Han does nothing */
+        {
+        uint32_t chspecial = 0;
+
+        if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO;
+        if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA;
+        if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA;
+        if (MAPBIT(map, ucp_Hangul) != 0)   chspecial |= FOUND_HANGUL;
+
+        if (chspecial == 0) return FALSE;   /* Not allowed with Han */
+
+        if (chspecial == FOUND_BOPOMOFO)
+          require_state = SCRIPT_HANBOPOMOFO;
+        else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
+          require_state = SCRIPT_HANHIRAKATA;
+
+        /* Otherwise this character must be allowed with all of them, so remain
+        in the pending state. */
+        }
+      break;
+
+      /* Previously encountered one of the "with Han" scripts. Check that
+      this character is appropriate. */
+
+      case SCRIPT_HANHIRAKATA:
+      if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) +
+          MAPBIT(map, ucp_Katakana) == 0) return FALSE;
+      break;
+
+      case SCRIPT_HANBOPOMOFO:
+      if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE;
+      break;
+
+      case SCRIPT_HANHANGUL:
+      if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE;
+      break;
+
+      /* Previously encountered one or more characters that are allowed with a
+      list of scripts. */
+
+      case SCRIPT_MAP:
+      OK = FALSE;
+
+      for (int i = 0; i < FULL_MAPSIZE; i++)
+        {
+        if ((require_map[i] & map[i]) != 0)
+          {
+          OK = TRUE;
+          break;
+          }
+        }
+
+      if (!OK) return FALSE;
+
+      /* The rest of the string must be in this script, but we have to
+      allow for the Han complications. */
+
+      switch(script)
+        {
+        case ucp_Han:
+        require_state = SCRIPT_HANPENDING;
+        break;
+
+        case ucp_Hiragana:
+        case ucp_Katakana:
+        require_state = SCRIPT_HANHIRAKATA;
+        break;
+
+        case ucp_Bopomofo:
+        require_state = SCRIPT_HANBOPOMOFO;
+        break;
+
+        case ucp_Hangul:
+        require_state = SCRIPT_HANHANGUL;
+        break;
+
+        /* Compute the intersection of the required list of scripts and the
+        allowed scripts for this character. */
+
+        default:
+        for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i];
+        break;
+        }
+
+      break;
+      }
+    }   /* End checking character's script and extensions. */
+
+  /* The character is in an acceptable script. We must now ensure that all
+  decimal digits in the string come from the same set. Some scripts (e.g.
+  Common, Arabic) have more than one set of decimal digits. This code does
+  not allow mixing sets, even within the same script. The vector called
+  PRIV(ucd_digit_sets)[] contains, in its first element, the number of
+  following elements, and then, in ascending order, the code points of the
+  '9' characters in every set of 10 digits. Each set is identified by the
+  offset in the vector of its '9' character. An initial check of the first
+  value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
+
+  if (ucd->chartype == ucp_Nd)
+    {
+    uint32_t digitset;
+
+    if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
+      {
+      int mid;
+      int bot = 1;
+      int top = PRIV(ucd_digit_sets)[0];
+      for (;;)
+        {
+        if (top <= bot + 1)    /* <= rather than == is paranoia */
+          {
+          digitset = top;
+          break;
+          }
+        mid = (top + bot) / 2;
+        if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
+        }
+      }
+
+    /* A required value of 0 means "unset". */
+
+    if (require_digitset == 0) require_digitset = digitset;
+      else if (digitset != require_digitset) return FALSE;
+    }   /* End digit handling */
+
+  /* If we haven't yet got to the end, pick up the next character. */
+
+  if (ptr >= endptr) return TRUE;
+  GETCHARINCTEST(c, ptr);
+  }  /* End checking loop */
+
+#else   /* NOT SUPPORT_UNICODE */
+(void)ptr;
+(void)endptr;
+(void)utf;
+return TRUE;
+#endif  /* SUPPORT_UNICODE */
+}
+
+/* End of pcre2_script_run.c */
--- a/third_party/pcre/pcre2_serialize.c
+++ b/third_party/pcre/pcre2_serialize.c
@ -0,0 +1,286 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains functions for serializing and deserializing
+a sequence of compiled codes. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "pcre2_internal.h"
+
+/* Magic number to provide a small check against being handed junk. */
+
+#define SERIALIZED_DATA_MAGIC 0x50523253u
+
+/* Deserialization is limited to the current PCRE version and
+character width. */
+
+#define SERIALIZED_DATA_VERSION \
+  ((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
+
+#define SERIALIZED_DATA_CONFIG \
+  (sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
+
+
+
+/*************************************************
+*           Serialize compiled patterns          *
+*************************************************/
+
+PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
+pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
+   uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
+   pcre2_general_context *gcontext)
+{
+uint8_t *bytes;
+uint8_t *dst_bytes;
+int32_t i;
+PCRE2_SIZE total_size;
+const pcre2_real_code *re;
+const uint8_t *tables;
+pcre2_serialized_data *data;
+
+const pcre2_memctl *memctl = (gcontext != NULL) ?
+  &gcontext->memctl : &PRIV(default_compile_context).memctl;
+
+if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
+  return PCRE2_ERROR_NULL;
+
+if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
+
+/* Compute total size. */
+total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
+tables = NULL;
+
+for (i = 0; i < number_of_codes; i++)
+  {
+  if (codes[i] == NULL) return PCRE2_ERROR_NULL;
+  re = (const pcre2_real_code *)(codes[i]);
+  if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
+  if (tables == NULL)
+    tables = re->tables;
+  else if (tables != re->tables)
+    return PCRE2_ERROR_MIXEDTABLES;
+  total_size += re->blocksize;
+  }
+
+/* Initialize the byte stream. */
+bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
+if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
+
+/* The controller is stored as a hidden parameter. */
+memcpy(bytes, memctl, sizeof(pcre2_memctl));
+bytes += sizeof(pcre2_memctl);
+
+data = (pcre2_serialized_data *)bytes;
+data->magic = SERIALIZED_DATA_MAGIC;
+data->version = SERIALIZED_DATA_VERSION;
+data->config = SERIALIZED_DATA_CONFIG;
+data->number_of_codes = number_of_codes;
+
+/* Copy all compiled code data. */
+dst_bytes = bytes + sizeof(pcre2_serialized_data);
+memcpy(dst_bytes, tables, TABLES_LENGTH);
+dst_bytes += TABLES_LENGTH;
+
+for (i = 0; i < number_of_codes; i++)
+  {
+  re = (const pcre2_real_code *)(codes[i]);
+  (void)memcpy(dst_bytes, (char *)re, re->blocksize);
+  
+  /* Certain fields in the compiled code block are re-set during 
+  deserialization. In order to ensure that the serialized data stream is always 
+  the same for the same pattern, set them to zero here. We can't assume the 
+  copy of the pattern is correctly aligned for accessing the fields as part of 
+  a structure. Note the use of sizeof(void *) in the second of these, to
+  specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a 
+  pointer to uint8_t), gcc gives a warning because the first argument is also a 
+  pointer to uint8_t. Casting the first argument to (void *) can stop this, but 
+  it didn't stop Coverity giving the same complaint. */
+  
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, 
+    sizeof(pcre2_memctl));
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, 
+    sizeof(void *));
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
+    sizeof(void *));        
+ 
+  dst_bytes += re->blocksize;
+  }
+
+*serialized_bytes = bytes;
+*serialized_size = total_size;
+return number_of_codes;
+}
+
+
+/*************************************************
+*          Deserialize compiled patterns         *
+*************************************************/
+
+PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
+pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
+   const uint8_t *bytes, pcre2_general_context *gcontext)
+{
+const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
+const pcre2_memctl *memctl = (gcontext != NULL) ?
+  &gcontext->memctl : &PRIV(default_compile_context).memctl;
+
+const uint8_t *src_bytes;
+pcre2_real_code *dst_re;
+uint8_t *tables;
+int32_t i, j;
+
+/* Sanity checks. */
+
+if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
+if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
+if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
+if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
+if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
+if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
+
+if (number_of_codes > data->number_of_codes)
+  number_of_codes = data->number_of_codes;
+
+src_bytes = bytes + sizeof(pcre2_serialized_data);
+
+/* Decode tables. The reference count for the tables is stored immediately
+following them. */
+
+tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
+if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
+
+memcpy(tables, src_bytes, TABLES_LENGTH);
+*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
+src_bytes += TABLES_LENGTH;
+
+/* Decode the byte stream. We must not try to read the size from the compiled
+code block in the stream, because it might be unaligned, which causes errors on
+hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
+of the blocksize field is given its own name to ensure that it is the same here
+as in the block. */
+
+for (i = 0; i < number_of_codes; i++)
+  {
+  CODE_BLOCKSIZE_TYPE blocksize;
+  memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
+    sizeof(CODE_BLOCKSIZE_TYPE));
+  if (blocksize <= sizeof(pcre2_real_code))
+    return PCRE2_ERROR_BADSERIALIZEDDATA;
+
+  /* The allocator provided by gcontext replaces the original one. */
+
+  dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
+    (pcre2_memctl *)gcontext);
+  if (dst_re == NULL)
+    {
+    memctl->free(tables, memctl->memory_data);
+    for (j = 0; j < i; j++)
+      {
+      memctl->free(codes[j], memctl->memory_data);
+      codes[j] = NULL;
+      }
+    return PCRE2_ERROR_NOMEMORY;
+    }
+
+  /* The new allocator must be preserved. */
+
+  memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
+    src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
+  if (dst_re->magic_number != MAGIC_NUMBER ||
+      dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
+      dst_re->name_count > MAX_NAME_COUNT)
+    {   
+    memctl->free(dst_re, memctl->memory_data); 
+    return PCRE2_ERROR_BADSERIALIZEDDATA;
+    } 
+
+  /* At the moment only one table is supported. */
+
+  dst_re->tables = tables;
+  dst_re->executable_jit = NULL;
+  dst_re->flags |= PCRE2_DEREF_TABLES;
+
+  codes[i] = dst_re;
+  src_bytes += blocksize;
+  }
+
+return number_of_codes;
+}
+
+
+/*************************************************
+*    Get the number of serialized patterns       *
+*************************************************/
+
+PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
+pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
+{
+const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
+
+if (data == NULL) return PCRE2_ERROR_NULL;
+if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
+if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
+if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
+
+return data->number_of_codes;
+}
+
+
+/*************************************************
+*            Free the allocated stream           *
+*************************************************/
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_serialize_free(uint8_t *bytes)
+{
+if (bytes != NULL)
+  {
+  pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
+  memctl->free(memctl, memctl->memory_data);
+  }
+}
+
+/* End of pcre2_serialize.c */
--- a/third_party/pcre/pcre2_string_utils.c
+++ b/third_party/pcre/pcre2_string_utils.c
@ -0,0 +1,237 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2018-2021 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains internal functions for comparing and finding the length
+of strings. These are used instead of strcmp() etc because the standard
+functions work only on 8-bit data. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+*    Emulated memmove() for systems without it   *
+*************************************************/
+
+/* This function can make use of bcopy() if it is available. Otherwise do it by
+steam, as there some non-Unix environments that lack both memmove() and
+bcopy(). */
+
+#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
+void *
+PRIV(memmove)(void *d, const void *s, size_t n)
+{
+#ifdef HAVE_BCOPY
+bcopy(s, d, n);
+return d;
+#else
+size_t i;
+unsigned char *dest = (unsigned char *)d;
+const unsigned char *src = (const unsigned char *)s;
+if (dest > src)
+  {
+  dest += n;
+  src += n;
+  for (i = 0; i < n; ++i) *(--dest) = *(--src);
+  return (void *)dest;
+  }
+else
+  {
+  for (i = 0; i < n; ++i) *dest++ = *src++;
+  return (void *)(dest - n);
+  }
+#endif   /* not HAVE_BCOPY */
+}
+#endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
+
+
+/*************************************************
+*    Compare two zero-terminated PCRE2 strings   *
+*************************************************/
+
+/*
+Arguments:
+  str1        first string
+  str2        second string
+
+Returns:      0, 1, or -1
+*/
+
+int
+PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
+{
+PCRE2_UCHAR c1, c2;
+while (*str1 != '\0' || *str2 != '\0')
+  {
+  c1 = *str1++;
+  c2 = *str2++;
+  if (c1 != c2) return ((c1 > c2) << 1) - 1;
+  }
+return 0;
+}
+
+
+/*************************************************
+*  Compare zero-terminated PCRE2 & 8-bit strings *
+*************************************************/
+
+/* As the 8-bit string is almost always a literal, its type is specified as
+const char *.
+
+Arguments:
+  str1        first string
+  str2        second string
+
+Returns:      0, 1, or -1
+*/
+
+int
+PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
+{
+PCRE2_UCHAR c1, c2;
+while (*str1 != '\0' || *str2 != '\0')
+  {
+  c1 = *str1++;
+  c2 = *str2++;
+  if (c1 != c2) return ((c1 > c2) << 1) - 1;
+  }
+return 0;
+}
+
+
+/*************************************************
+*    Compare two PCRE2 strings, given a length   *
+*************************************************/
+
+/*
+Arguments:
+  str1        first string
+  str2        second string
+  len         the length
+
+Returns:      0, 1, or -1
+*/
+
+int
+PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
+{
+PCRE2_UCHAR c1, c2;
+for (; len > 0; len--)
+  {
+  c1 = *str1++;
+  c2 = *str2++;
+  if (c1 != c2) return ((c1 > c2) << 1) - 1;
+  }
+return 0;
+}
+
+
+/*************************************************
+* Compare PCRE2 string to 8-bit string by length *
+*************************************************/
+
+/* As the 8-bit string is almost always a literal, its type is specified as
+const char *.
+
+Arguments:
+  str1        first string
+  str2        second string
+  len         the length
+
+Returns:      0, 1, or -1
+*/
+
+int
+PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
+{
+PCRE2_UCHAR c1, c2;
+for (; len > 0; len--)
+  {
+  c1 = *str1++;
+  c2 = *str2++;
+  if (c1 != c2) return ((c1 > c2) << 1) - 1;
+  }
+return 0;
+}
+
+
+/*************************************************
+*        Find the length of a PCRE2 string       *
+*************************************************/
+
+/*
+Argument:    the string
+Returns:     the length
+*/
+
+PCRE2_SIZE
+PRIV(strlen)(PCRE2_SPTR str)
+{
+PCRE2_SIZE c = 0;
+while (*str++ != 0) c++;
+return c;
+}
+
+
+/*************************************************
+* Copy 8-bit 0-terminated string to PCRE2 string *
+*************************************************/
+
+/* Arguments:
+  str1     buffer to receive the string
+  str2     8-bit string to be copied
+
+Returns:   the number of code units used (excluding trailing zero)
+*/
+
+PCRE2_SIZE
+PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
+{
+PCRE2_UCHAR *t = str1;
+while (*str2 != 0) *t++ = *str2++;
+*t = 0;
+return t - str1;
+}
+
+/* End of pcre2_string_utils.c */
--- a/third_party/pcre/pcre2_study.c
+++ b/third_party/pcre/pcre2_study.c
--- a/third_party/pcre/pcre2_substitute.c
+++ b/third_party/pcre/pcre2_substitute.c
--- a/third_party/pcre/pcre2_substring.c
+++ b/third_party/pcre/pcre2_substring.c
@ -0,0 +1,547 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+
+/*************************************************
+*   Copy named captured string to given buffer   *
+*************************************************/
+
+/* This function copies a single captured substring into a given buffer,
+identifying it by name. If the regex permits duplicate names, the first
+substring that is set is chosen.
+
+Arguments:
+  match_data     points to the match data
+  stringname     the name of the required substring
+  buffer         where to put the substring
+  sizeptr        the size of the buffer, updated to the size of the substring
+
+Returns:         if successful: zero
+                 if not successful, a negative error code:
+                   (1) an error from nametable_scan()
+                   (2) an error from copy_bynumber()
+                   (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
+                   (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
+  PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
+{
+PCRE2_SPTR first, last, entry;
+int failrc, entrysize;
+if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
+  return PCRE2_ERROR_DFA_UFUNC;
+entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
+  &first, &last);
+if (entrysize < 0) return entrysize;
+failrc = PCRE2_ERROR_UNAVAILABLE;
+for (entry = first; entry <= last; entry += entrysize)
+  {
+  uint32_t n = GET2(entry, 0);
+  if (n < match_data->oveccount)
+    {
+    if (match_data->ovector[n*2] != PCRE2_UNSET)
+      return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
+    failrc = PCRE2_ERROR_UNSET;
+    }
+  }
+return failrc;
+}
+
+
+
+/*************************************************
+*  Copy numbered captured string to given buffer *
+*************************************************/
+
+/* This function copies a single captured substring into a given buffer,
+identifying it by number.
+
+Arguments:
+  match_data     points to the match data
+  stringnumber   the number of the required substring
+  buffer         where to put the substring
+  sizeptr        the size of the buffer, updated to the size of the substring
+
+Returns:         if successful: 0
+                 if not successful, a negative error code:
+                   PCRE2_ERROR_NOMEMORY: buffer too small
+                   PCRE2_ERROR_NOSUBSTRING: no such substring
+                   PCRE2_ERROR_UNAVAILABLE: ovector too small
+                   PCRE2_ERROR_UNSET: substring is not set
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
+  uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
+{
+int rc;
+PCRE2_SIZE size;
+rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
+if (rc < 0) return rc;
+if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
+memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
+  CU2BYTES(size));
+buffer[size] = 0;
+*sizeptr = size;
+return 0;
+}
+
+
+
+/*************************************************
+*          Extract named captured string         *
+*************************************************/
+
+/* This function copies a single captured substring, identified by name, into
+new memory. If the regex permits duplicate names, the first substring that is
+set is chosen.
+
+Arguments:
+  match_data     pointer to match_data
+  stringname     the name of the required substring
+  stringptr      where to put the pointer to the new memory
+  sizeptr        where to put the length of the substring
+
+Returns:         if successful: zero
+                 if not successful, a negative value:
+                   (1) an error from nametable_scan()
+                   (2) an error from get_bynumber()
+                   (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
+                   (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_get_byname(pcre2_match_data *match_data,
+  PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
+{
+PCRE2_SPTR first, last, entry;
+int failrc, entrysize;
+if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
+  return PCRE2_ERROR_DFA_UFUNC;
+entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
+  &first, &last);
+if (entrysize < 0) return entrysize;
+failrc = PCRE2_ERROR_UNAVAILABLE;
+for (entry = first; entry <= last; entry += entrysize)
+  {
+  uint32_t n = GET2(entry, 0);
+  if (n < match_data->oveccount)
+    {
+    if (match_data->ovector[n*2] != PCRE2_UNSET)
+      return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
+    failrc = PCRE2_ERROR_UNSET;
+    }
+  }
+return failrc;
+}
+
+
+
+/*************************************************
+*      Extract captured string to new memory     *
+*************************************************/
+
+/* This function copies a single captured substring into a piece of new
+memory.
+
+Arguments:
+  match_data     points to match data
+  stringnumber   the number of the required substring
+  stringptr      where to put a pointer to the new memory
+  sizeptr        where to put the size of the substring
+
+Returns:         if successful: 0
+                 if not successful, a negative error code:
+                   PCRE2_ERROR_NOMEMORY: failed to get memory
+                   PCRE2_ERROR_NOSUBSTRING: no such substring
+                   PCRE2_ERROR_UNAVAILABLE: ovector too small
+                   PCRE2_ERROR_UNSET: substring is not set
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_get_bynumber(pcre2_match_data *match_data,
+  uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
+{
+int rc;
+PCRE2_SIZE size;
+PCRE2_UCHAR *yield;
+rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
+if (rc < 0) return rc;
+yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
+  (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
+if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
+yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
+memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
+  CU2BYTES(size));
+yield[size] = 0;
+*stringptr = yield;
+*sizeptr = size;
+return 0;
+}
+
+
+
+/*************************************************
+*       Free memory obtained by get_substring    *
+*************************************************/
+
+/*
+Argument:     the result of a previous pcre2_substring_get_byxxx()
+Returns:      nothing
+*/
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_substring_free(PCRE2_UCHAR *string)
+{
+if (string != NULL)
+  {
+  pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
+  memctl->free(memctl, memctl->memory_data);
+  }
+}
+
+
+
+/*************************************************
+*         Get length of a named substring        *
+*************************************************/
+
+/* This function returns the length of a named captured substring. If the regex
+permits duplicate names, the first substring that is set is chosen.
+
+Arguments:
+  match_data      pointer to match data
+  stringname      the name of the required substring
+  sizeptr         where to put the length
+
+Returns:          0 if successful, else a negative error number
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_length_byname(pcre2_match_data *match_data,
+  PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
+{
+PCRE2_SPTR first, last, entry;
+int failrc, entrysize;
+if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
+  return PCRE2_ERROR_DFA_UFUNC;
+entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
+  &first, &last);
+if (entrysize < 0) return entrysize;
+failrc = PCRE2_ERROR_UNAVAILABLE;
+for (entry = first; entry <= last; entry += entrysize)
+  {
+  uint32_t n = GET2(entry, 0);
+  if (n < match_data->oveccount)
+    {
+    if (match_data->ovector[n*2] != PCRE2_UNSET)
+      return pcre2_substring_length_bynumber(match_data, n, sizeptr);
+    failrc = PCRE2_ERROR_UNSET;
+    }
+  }
+return failrc;
+}
+
+
+
+/*************************************************
+*        Get length of a numbered substring      *
+*************************************************/
+
+/* This function returns the length of a captured substring. If the start is
+beyond the end (which can happen when \K is used in an assertion), it sets the
+length to zero.
+
+Arguments:
+  match_data      pointer to match data
+  stringnumber    the number of the required substring
+  sizeptr         where to put the length, if not NULL
+
+Returns:         if successful: 0
+                 if not successful, a negative error code:
+                   PCRE2_ERROR_NOSUBSTRING: no such substring
+                   PCRE2_ERROR_UNAVAILABLE: ovector is too small
+                   PCRE2_ERROR_UNSET: substring is not set
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_length_bynumber(pcre2_match_data *match_data,
+  uint32_t stringnumber, PCRE2_SIZE *sizeptr)
+{
+PCRE2_SIZE left, right;
+int count = match_data->rc;
+if (count == PCRE2_ERROR_PARTIAL)
+  {
+  if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
+  count = 0;
+  }
+else if (count < 0) return count;            /* Match failed */
+
+if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
+  {
+  if (stringnumber > match_data->code->top_bracket)
+    return PCRE2_ERROR_NOSUBSTRING;
+  if (stringnumber >= match_data->oveccount)
+    return PCRE2_ERROR_UNAVAILABLE;
+  if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
+    return PCRE2_ERROR_UNSET;
+  }
+else  /* Matched using pcre2_dfa_match() */
+  {
+  if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
+  if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
+  }
+
+left = match_data->ovector[stringnumber*2];
+right = match_data->ovector[stringnumber*2+1];
+if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
+return 0;
+}
+
+
+
+/*************************************************
+*    Extract all captured strings to new memory  *
+*************************************************/
+
+/* This function gets one chunk of memory and builds a list of pointers and all
+the captured substrings in it. A NULL pointer is put on the end of the list.
+The substrings are zero-terminated, but also, if the final argument is
+non-NULL, a list of lengths is also returned. This allows binary data to be
+handled.
+
+Arguments:
+  match_data     points to the match data
+  listptr        set to point to the list of pointers
+  lengthsptr     set to point to the list of lengths (may be NULL)
+
+Returns:         if successful: 0
+                 if not successful, a negative error code:
+                   PCRE2_ERROR_NOMEMORY: failed to get memory,
+                   or a match failure code
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
+  PCRE2_SIZE **lengthsptr)
+{
+int i, count, count2;
+PCRE2_SIZE size;
+PCRE2_SIZE *lensp;
+pcre2_memctl *memp;
+PCRE2_UCHAR **listp;
+PCRE2_UCHAR *sp;
+PCRE2_SIZE *ovector;
+
+if ((count = match_data->rc) < 0) return count;   /* Match failed */
+if (count == 0) count = match_data->oveccount;    /* Ovector too small */
+
+count2 = 2*count;
+ovector = match_data->ovector;
+size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *);      /* For final NULL */
+if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count;  /* For lengths */
+
+for (i = 0; i < count2; i += 2)
+  {
+  size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
+  if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
+  }
+
+memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
+if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
+
+*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
+lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
+
+if (lengthsptr == NULL)
+  {
+  sp = (PCRE2_UCHAR *)lensp;
+  lensp = NULL;
+  }
+else
+  {
+  *lengthsptr = lensp;
+  sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
+  }
+
+for (i = 0; i < count2; i += 2)
+  {
+  size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
+
+  /* Size == 0 includes the case when the capture is unset. Avoid adding
+  PCRE2_UNSET to match_data->subject because it overflows, even though with
+  zero size calling memcpy() is harmless. */
+
+  if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
+  *listp++ = sp;
+  if (lensp != NULL) *lensp++ = size;
+  sp += size;
+  *sp++ = 0;
+  }
+
+*listp = NULL;
+return 0;
+}
+
+
+
+/*************************************************
+*   Free memory obtained by substring_list_get   *
+*************************************************/
+
+/*
+Argument:     the result of a previous pcre2_substring_list_get()
+Returns:      nothing
+*/
+
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_substring_list_free(PCRE2_SPTR *list)
+{
+if (list != NULL)
+  {
+  pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
+  memctl->free(memctl, memctl->memory_data);
+  }
+}
+
+
+
+/*************************************************
+*     Find (multiple) entries for named string   *
+*************************************************/
+
+/* This function scans the nametable for a given name, using binary chop. It
+returns either two pointers to the entries in the table, or, if no pointers are
+given, the number of a unique group with the given name. If duplicate names are
+permitted, and the name is not unique, an error is generated.
+
+Arguments:
+  code        the compiled regex
+  stringname  the name whose entries required
+  firstptr    where to put the pointer to the first entry
+  lastptr     where to put the pointer to the last entry
+
+Returns:      PCRE2_ERROR_NOSUBSTRING if the name is not found
+              otherwise, if firstptr and lastptr are NULL:
+                a group number for a unique substring
+                else PCRE2_ERROR_NOUNIQUESUBSTRING
+              otherwise:
+                the length of each entry, having set firstptr and lastptr
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
+  PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
+{
+uint16_t bot = 0;
+uint16_t top = code->name_count;
+uint16_t entrysize = code->name_entry_size;
+PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
+
+while (top > bot)
+  {
+  uint16_t mid = (top + bot) / 2;
+  PCRE2_SPTR entry = nametable + entrysize*mid;
+  int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
+  if (c == 0)
+    {
+    PCRE2_SPTR first;
+    PCRE2_SPTR last;
+    PCRE2_SPTR lastentry;
+    lastentry = nametable + entrysize * (code->name_count - 1);
+    first = last = entry;
+    while (first > nametable)
+      {
+      if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
+      first -= entrysize;
+      }
+    while (last < lastentry)
+      {
+      if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
+      last += entrysize;
+      }
+    if (firstptr == NULL) return (first == last)?
+      (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
+    *firstptr = first;
+    *lastptr = last;
+    return entrysize;
+    }
+  if (c > 0) bot = mid + 1; else top = mid;
+  }
+
+return PCRE2_ERROR_NOSUBSTRING;
+}
+
+
+/*************************************************
+*           Find number for named string         *
+*************************************************/
+
+/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
+when it is known that names are unique. If there are duplicate names, it is not
+defined which number is returned.
+
+Arguments:
+  code        the compiled regex
+  stringname  the name whose number is required
+
+Returns:      the number of the named parenthesis, or a negative number
+                PCRE2_ERROR_NOSUBSTRING if not found
+                PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
+*/
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_substring_number_from_name(const pcre2_code *code,
+  PCRE2_SPTR stringname)
+{
+return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
+}
+
+/* End of pcre2_substring.c */
--- a/third_party/pcre/pcre2_tables.c
+++ b/third_party/pcre/pcre2_tables.c
@ -0,0 +1,234 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains some fixed tables that are used by more than one of the
+PCRE2 code modules. The tables are also #included by the pcre2test program,
+which uses macros to change their names from _pcre2_xxx to xxxx, thereby
+avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
+defined. */
+
+#ifndef PCRE2_PCRE2TEST           /* We're compiling the library */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "pcre2_internal.h"
+#endif /* PCRE2_PCRE2TEST */
+
+/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
+the definition is next to the definition of the opcodes in pcre2_internal.h.
+This is mode-dependent, so it is skipped when this file is included by
+pcre2test. */
+
+#ifndef PCRE2_PCRE2TEST
+const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
+#endif
+
+/* Tables of horizontal and vertical whitespace characters, suitable for
+adding to classes. */
+
+const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
+const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
+
+/* These tables are the pairs of delimiters that are valid for callout string
+arguments. For each starting delimiter there must be a matching ending
+delimiter, which in fact is different only for bracket-like delimiters. */
+
+const uint32_t PRIV(callout_start_delims)[] = {
+  CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
+  CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
+  CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
+
+const uint32_t PRIV(callout_end_delims[]) = {
+  CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
+  CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
+  CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
+
+
+/*************************************************
+*           Tables for UTF-8 support             *
+*************************************************/
+
+/* These tables are required by pcre2test in 16- or 32-bit mode, as well
+as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
+handling wide characters. */
+
+#if defined PCRE2_PCRE2TEST || \
+   (defined SUPPORT_UNICODE && \
+    defined PCRE2_CODE_UNIT_WIDTH && \
+    PCRE2_CODE_UNIT_WIDTH == 8)
+
+/* These are the breakpoints for different numbers of bytes in a UTF-8
+character. */
+
+const int PRIV(utf8_table1)[] =
+  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
+
+const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
+
+/* These are the indicator bits and the mask for the data bits to set in the
+first byte of a character, indexed by the number of additional bytes. */
+
+const int PRIV(utf8_table2)[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+
+/* Table of the number of extra bytes, indexed by the first byte masked with
+0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
+
+const uint8_t PRIV(utf8_table4)[] = {
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+
+#endif /* UTF-8 support needed */
+
+/* Tables concerned with Unicode properties are relevant only when Unicode
+support is enabled. See also the pcre2_ucptables.c file, which is generated by
+a Python script from Unicode data files. */
+
+#ifdef SUPPORT_UNICODE
+
+/* Table to translate from particular type value to the general value. */
+
+const uint32_t PRIV(ucp_gentype)[] = {
+  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
+  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
+  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
+  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
+  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
+  ucp_P, ucp_P,                       /* Ps, Po */
+  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
+  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
+};
+
+/* This table encodes the rules for finding the end of an extended grapheme
+cluster. Every code point has a grapheme break property which is one of the
+ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
+10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
+code points. The left property selects a word from the table, and the right
+property selects a bit from that word like this:
+
+  PRIV(ucp_gbtable)[left-property] & (1u << right-property)
+
+The value is non-zero if a grapheme break is NOT permitted between the relevant
+two code points. The breaking rules are as follows:
+
+1. Break at the start and end of text (pretty obviously).
+
+2. Do not break between a CR and LF; otherwise, break before and after
+   controls.
+
+3. Do not break Hangul syllable sequences, the rules for which are:
+
+    L may be followed by L, V, LV or LVT
+    LV or V may be followed by V or T
+    LVT or T may be followed by T
+
+4. Do not break before extending characters or zero-width-joiner (ZWJ).
+
+The following rules are only for extended grapheme clusters (but that's what we
+are implementing).
+
+5. Do not break before SpacingMarks.
+
+6. Do not break after Prepend characters.
+
+7. Do not break within emoji modifier sequences or emoji zwj sequences. That
+   is, do not break between characters with the Extended_Pictographic property.
+   Extend and ZWJ characters are allowed between the characters; this cannot be
+   represented in this table, the code has to deal with it.
+
+8. Do not break within emoji flag sequences. That is, do not break between
+   regional indicator (RI) symbols if there are an odd number of RI characters
+   before the break point. This table encodes "join RI characters"; the code
+   has to deal with checking for previous adjoining RIs.
+
+9. Otherwise, break everywhere.
+*/
+
+#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
+
+const uint32_t PRIV(ucp_gbtable)[] = {
+   (1u<<ucp_gbLF),                                      /*  0 CR */
+   0,                                                   /*  1 LF */
+   0,                                                   /*  2 Control */
+   ESZ,                                                 /*  3 Extend */
+   ESZ|(1u<<ucp_gbPrepend)|                             /*  4 Prepend */
+       (1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
+       (1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
+       (1u<<ucp_gbRegional_Indicator),
+   ESZ,                                                 /*  5 SpacingMark */
+   ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)|      /*  6 L */
+       (1u<<ucp_gbLVT),
+   ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT),                     /*  7 V */
+   ESZ|(1u<<ucp_gbT),                                   /*  8 T */
+   ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT),                     /*  9 LV */
+   ESZ|(1u<<ucp_gbT),                                   /* 10 LVT */
+   (1u<<ucp_gbRegional_Indicator),                      /* 11 Regional Indicator */
+   ESZ,                                                 /* 12 Other */
+   ESZ,                                                 /* 13 ZWJ */
+   ESZ|(1u<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
+};
+
+#undef ESZ
+
+#ifdef SUPPORT_JIT
+/* This table reverses PRIV(ucp_gentype). We can save the cost
+of a memory load. */
+
+const int PRIV(ucp_typerange)[] = {
+  ucp_Cc, ucp_Cs,
+  ucp_Ll, ucp_Lu,
+  ucp_Mc, ucp_Mn,
+  ucp_Nd, ucp_No,
+  ucp_Pc, ucp_Ps,
+  ucp_Sc, ucp_So,
+  ucp_Zl, ucp_Zs,
+};
+#endif /* SUPPORT_JIT */
+
+/* Finally, include the tables that are auto-generated from the Unicode data
+files. */
+
+#include "pcre2_ucptables.inc"
+
+#endif /* SUPPORT_UNICODE */
+
+/* End of pcre2_tables.c */
--- a/third_party/pcre/pcre2_tables.inc
+++ b/third_party/pcre/pcre2_tables.inc
@ -0,0 +1,234 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains some fixed tables that are used by more than one of the
+PCRE2 code modules. The tables are also #included by the pcre2test program,
+which uses macros to change their names from _pcre2_xxx to xxxx, thereby
+avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
+defined. */
+
+#ifndef PCRE2_PCRE2TEST           /* We're compiling the library */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "pcre2_internal.h"
+#endif /* PCRE2_PCRE2TEST */
+
+/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
+the definition is next to the definition of the opcodes in pcre2_internal.h.
+This is mode-dependent, so it is skipped when this file is included by
+pcre2test. */
+
+#ifndef PCRE2_PCRE2TEST
+const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
+#endif
+
+/* Tables of horizontal and vertical whitespace characters, suitable for
+adding to classes. */
+
+const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
+const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
+
+/* These tables are the pairs of delimiters that are valid for callout string
+arguments. For each starting delimiter there must be a matching ending
+delimiter, which in fact is different only for bracket-like delimiters. */
+
+const uint32_t PRIV(callout_start_delims)[] = {
+  CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
+  CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
+  CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
+
+const uint32_t PRIV(callout_end_delims[]) = {
+  CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
+  CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
+  CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
+
+
+/*************************************************
+*           Tables for UTF-8 support             *
+*************************************************/
+
+/* These tables are required by pcre2test in 16- or 32-bit mode, as well
+as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
+handling wide characters. */
+
+#if defined PCRE2_PCRE2TEST || \
+   (defined SUPPORT_UNICODE && \
+    defined PCRE2_CODE_UNIT_WIDTH && \
+    PCRE2_CODE_UNIT_WIDTH == 8)
+
+/* These are the breakpoints for different numbers of bytes in a UTF-8
+character. */
+
+const int PRIV(utf8_table1)[] =
+  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
+
+const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
+
+/* These are the indicator bits and the mask for the data bits to set in the
+first byte of a character, indexed by the number of additional bytes. */
+
+const int PRIV(utf8_table2)[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+
+/* Table of the number of extra bytes, indexed by the first byte masked with
+0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
+
+const uint8_t PRIV(utf8_table4)[] = {
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+
+#endif /* UTF-8 support needed */
+
+/* Tables concerned with Unicode properties are relevant only when Unicode
+support is enabled. See also the pcre2_ucptables.c file, which is generated by
+a Python script from Unicode data files. */
+
+#ifdef SUPPORT_UNICODE
+
+/* Table to translate from particular type value to the general value. */
+
+const uint32_t PRIV(ucp_gentype)[] = {
+  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
+  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
+  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
+  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
+  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
+  ucp_P, ucp_P,                       /* Ps, Po */
+  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
+  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
+};
+
+/* This table encodes the rules for finding the end of an extended grapheme
+cluster. Every code point has a grapheme break property which is one of the
+ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
+10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
+code points. The left property selects a word from the table, and the right
+property selects a bit from that word like this:
+
+  PRIV(ucp_gbtable)[left-property] & (1u << right-property)
+
+The value is non-zero if a grapheme break is NOT permitted between the relevant
+two code points. The breaking rules are as follows:
+
+1. Break at the start and end of text (pretty obviously).
+
+2. Do not break between a CR and LF; otherwise, break before and after
+   controls.
+
+3. Do not break Hangul syllable sequences, the rules for which are:
+
+    L may be followed by L, V, LV or LVT
+    LV or V may be followed by V or T
+    LVT or T may be followed by T
+
+4. Do not break before extending characters or zero-width-joiner (ZWJ).
+
+The following rules are only for extended grapheme clusters (but that's what we
+are implementing).
+
+5. Do not break before SpacingMarks.
+
+6. Do not break after Prepend characters.
+
+7. Do not break within emoji modifier sequences or emoji zwj sequences. That
+   is, do not break between characters with the Extended_Pictographic property.
+   Extend and ZWJ characters are allowed between the characters; this cannot be
+   represented in this table, the code has to deal with it.
+
+8. Do not break within emoji flag sequences. That is, do not break between
+   regional indicator (RI) symbols if there are an odd number of RI characters
+   before the break point. This table encodes "join RI characters"; the code
+   has to deal with checking for previous adjoining RIs.
+
+9. Otherwise, break everywhere.
+*/
+
+#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
+
+const uint32_t PRIV(ucp_gbtable)[] = {
+   (1u<<ucp_gbLF),                                      /*  0 CR */
+   0,                                                   /*  1 LF */
+   0,                                                   /*  2 Control */
+   ESZ,                                                 /*  3 Extend */
+   ESZ|(1u<<ucp_gbPrepend)|                             /*  4 Prepend */
+       (1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
+       (1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
+       (1u<<ucp_gbRegional_Indicator),
+   ESZ,                                                 /*  5 SpacingMark */
+   ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)|      /*  6 L */
+       (1u<<ucp_gbLVT),
+   ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT),                     /*  7 V */
+   ESZ|(1u<<ucp_gbT),                                   /*  8 T */
+   ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT),                     /*  9 LV */
+   ESZ|(1u<<ucp_gbT),                                   /* 10 LVT */
+   (1u<<ucp_gbRegional_Indicator),                      /* 11 Regional Indicator */
+   ESZ,                                                 /* 12 Other */
+   ESZ,                                                 /* 13 ZWJ */
+   ESZ|(1u<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
+};
+
+#undef ESZ
+
+#ifdef SUPPORT_JIT
+/* This table reverses PRIV(ucp_gentype). We can save the cost
+of a memory load. */
+
+const int PRIV(ucp_typerange)[] = {
+  ucp_Cc, ucp_Cs,
+  ucp_Ll, ucp_Lu,
+  ucp_Mc, ucp_Mn,
+  ucp_Nd, ucp_No,
+  ucp_Pc, ucp_Ps,
+  ucp_Sc, ucp_So,
+  ucp_Zl, ucp_Zs,
+};
+#endif /* SUPPORT_JIT */
+
+/* Finally, include the tables that are auto-generated from the Unicode data
+files. */
+
+#include "pcre2_ucptables.inc"
+
+#endif /* SUPPORT_UNICODE */
+
+/* End of pcre2_tables.c */
--- a/third_party/pcre/pcre2_ucd.c
+++ b/third_party/pcre/pcre2_ucd.c
--- a/third_party/pcre/pcre2_ucd.inc
+++ b/third_party/pcre/pcre2_ucd.inc
--- a/third_party/pcre/pcre2_ucp.h
+++ b/third_party/pcre/pcre2_ucp.h
@ -0,0 +1,394 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
+a new version of this code.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
+#define PCRE2_UCP_H_IDEMPOTENT_GUARD
+
+/* This file contains definitions of the Unicode property values that are
+returned by the UCD access macros and used throughout PCRE2.
+
+IMPORTANT: The specific values of the first two enums (general and particular
+character categories) are assumed by the table called catposstab in the file
+pcre2_auto_possess.c. They are unlikely to change, but should be checked after
+an update. */
+
+/* These are the general character categories. */
+
+enum {
+  ucp_C,
+  ucp_L,
+  ucp_M,
+  ucp_N,
+  ucp_P,
+  ucp_S,
+  ucp_Z,
+};
+
+/* These are the particular character categories. */
+
+enum {
+  ucp_Cc,    /* Control */
+  ucp_Cf,    /* Format */
+  ucp_Cn,    /* Unassigned */
+  ucp_Co,    /* Private use */
+  ucp_Cs,    /* Surrogate */
+  ucp_Ll,    /* Lower case letter */
+  ucp_Lm,    /* Modifier letter */
+  ucp_Lo,    /* Other letter */
+  ucp_Lt,    /* Title case letter */
+  ucp_Lu,    /* Upper case letter */
+  ucp_Mc,    /* Spacing mark */
+  ucp_Me,    /* Enclosing mark */
+  ucp_Mn,    /* Non-spacing mark */
+  ucp_Nd,    /* Decimal number */
+  ucp_Nl,    /* Letter number */
+  ucp_No,    /* Other number */
+  ucp_Pc,    /* Connector punctuation */
+  ucp_Pd,    /* Dash punctuation */
+  ucp_Pe,    /* Close punctuation */
+  ucp_Pf,    /* Final punctuation */
+  ucp_Pi,    /* Initial punctuation */
+  ucp_Po,    /* Other punctuation */
+  ucp_Ps,    /* Open punctuation */
+  ucp_Sc,    /* Currency symbol */
+  ucp_Sk,    /* Modifier symbol */
+  ucp_Sm,    /* Mathematical symbol */
+  ucp_So,    /* Other symbol */
+  ucp_Zl,    /* Line separator */
+  ucp_Zp,    /* Paragraph separator */
+  ucp_Zs,    /* Space separator */
+};
+
+/* These are Boolean properties. */
+
+enum {
+  ucp_ASCII,
+  ucp_ASCII_Hex_Digit,
+  ucp_Alphabetic,
+  ucp_Bidi_Control,
+  ucp_Bidi_Mirrored,
+  ucp_Case_Ignorable,
+  ucp_Cased,
+  ucp_Changes_When_Casefolded,
+  ucp_Changes_When_Casemapped,
+  ucp_Changes_When_Lowercased,
+  ucp_Changes_When_Titlecased,
+  ucp_Changes_When_Uppercased,
+  ucp_Dash,
+  ucp_Default_Ignorable_Code_Point,
+  ucp_Deprecated,
+  ucp_Diacritic,
+  ucp_Emoji,
+  ucp_Emoji_Component,
+  ucp_Emoji_Modifier,
+  ucp_Emoji_Modifier_Base,
+  ucp_Emoji_Presentation,
+  ucp_Extended_Pictographic,
+  ucp_Extender,
+  ucp_Grapheme_Base,
+  ucp_Grapheme_Extend,
+  ucp_Grapheme_Link,
+  ucp_Hex_Digit,
+  ucp_IDS_Binary_Operator,
+  ucp_IDS_Trinary_Operator,
+  ucp_ID_Continue,
+  ucp_ID_Start,
+  ucp_Ideographic,
+  ucp_Join_Control,
+  ucp_Logical_Order_Exception,
+  ucp_Lowercase,
+  ucp_Math,
+  ucp_Noncharacter_Code_Point,
+  ucp_Pattern_Syntax,
+  ucp_Pattern_White_Space,
+  ucp_Prepended_Concatenation_Mark,
+  ucp_Quotation_Mark,
+  ucp_Radical,
+  ucp_Regional_Indicator,
+  ucp_Sentence_Terminal,
+  ucp_Soft_Dotted,
+  ucp_Terminal_Punctuation,
+  ucp_Unified_Ideograph,
+  ucp_Uppercase,
+  ucp_Variation_Selector,
+  ucp_White_Space,
+  ucp_XID_Continue,
+  ucp_XID_Start,
+  /* This must be last */
+  ucp_Bprop_Count
+};
+
+/* Size of entries in ucd_boolprop_sets[] */
+
+#define ucd_boolprop_sets_item_size 2
+
+/* These are the bidi class values. */
+
+enum {
+  ucp_bidiAL,   /* Arabic letter */
+  ucp_bidiAN,   /* Arabic number */
+  ucp_bidiB,    /* Paragraph separator */
+  ucp_bidiBN,   /* Boundary neutral */
+  ucp_bidiCS,   /* Common separator */
+  ucp_bidiEN,   /* European number */
+  ucp_bidiES,   /* European separator */
+  ucp_bidiET,   /* European terminator */
+  ucp_bidiFSI,  /* First strong isolate */
+  ucp_bidiL,    /* Left to right */
+  ucp_bidiLRE,  /* Left to right embedding */
+  ucp_bidiLRI,  /* Left to right isolate */
+  ucp_bidiLRO,  /* Left to right override */
+  ucp_bidiNSM,  /* Non-spacing mark */
+  ucp_bidiON,   /* Other neutral */
+  ucp_bidiPDF,  /* Pop directional format */
+  ucp_bidiPDI,  /* Pop directional isolate */
+  ucp_bidiR,    /* Right to left */
+  ucp_bidiRLE,  /* Right to left embedding */
+  ucp_bidiRLI,  /* Right to left isolate */
+  ucp_bidiRLO,  /* Right to left override */
+  ucp_bidiS,    /* Segment separator */
+  ucp_bidiWS,   /* White space */
+};
+
+/* These are grapheme break properties. The Extended Pictographic property
+comes from the emoji-data.txt file. */
+
+enum {
+  ucp_gbCR,                    /*  0 */
+  ucp_gbLF,                    /*  1 */
+  ucp_gbControl,               /*  2 */
+  ucp_gbExtend,                /*  3 */
+  ucp_gbPrepend,               /*  4 */
+  ucp_gbSpacingMark,           /*  5 */
+  ucp_gbL,                     /*  6 Hangul syllable type L */
+  ucp_gbV,                     /*  7 Hangul syllable type V */
+  ucp_gbT,                     /*  8 Hangul syllable type T */
+  ucp_gbLV,                    /*  9 Hangul syllable type LV */
+  ucp_gbLVT,                   /* 10 Hangul syllable type LVT */
+  ucp_gbRegional_Indicator,    /* 11 */
+  ucp_gbOther,                 /* 12 */
+  ucp_gbZWJ,                   /* 13 */
+  ucp_gbExtended_Pictographic, /* 14 */
+};
+
+/* These are the script identifications. */
+
+enum {
+  /* Scripts which has characters in other scripts. */
+  ucp_Latin,
+  ucp_Greek,
+  ucp_Cyrillic,
+  ucp_Arabic,
+  ucp_Syriac,
+  ucp_Thaana,
+  ucp_Devanagari,
+  ucp_Bengali,
+  ucp_Gurmukhi,
+  ucp_Gujarati,
+  ucp_Oriya,
+  ucp_Tamil,
+  ucp_Telugu,
+  ucp_Kannada,
+  ucp_Malayalam,
+  ucp_Sinhala,
+  ucp_Myanmar,
+  ucp_Georgian,
+  ucp_Hangul,
+  ucp_Mongolian,
+  ucp_Hiragana,
+  ucp_Katakana,
+  ucp_Bopomofo,
+  ucp_Han,
+  ucp_Yi,
+  ucp_Tagalog,
+  ucp_Hanunoo,
+  ucp_Buhid,
+  ucp_Tagbanwa,
+  ucp_Limbu,
+  ucp_Tai_Le,
+  ucp_Linear_B,
+  ucp_Cypriot,
+  ucp_Buginese,
+  ucp_Coptic,
+  ucp_Glagolitic,
+  ucp_Syloti_Nagri,
+  ucp_Phags_Pa,
+  ucp_Nko,
+  ucp_Kayah_Li,
+  ucp_Javanese,
+  ucp_Kaithi,
+  ucp_Mandaic,
+  ucp_Chakma,
+  ucp_Sharada,
+  ucp_Takri,
+  ucp_Duployan,
+  ucp_Grantha,
+  ucp_Khojki,
+  ucp_Linear_A,
+  ucp_Mahajani,
+  ucp_Manichaean,
+  ucp_Modi,
+  ucp_Old_Permic,
+  ucp_Psalter_Pahlavi,
+  ucp_Khudawadi,
+  ucp_Tirhuta,
+  ucp_Multani,
+  ucp_Adlam,
+  ucp_Masaram_Gondi,
+  ucp_Dogra,
+  ucp_Gunjala_Gondi,
+  ucp_Hanifi_Rohingya,
+  ucp_Sogdian,
+  ucp_Nandinagari,
+  ucp_Yezidi,
+  ucp_Cypro_Minoan,
+  ucp_Old_Uyghur,
+
+  /* Scripts which has no characters in other scripts. */
+  ucp_Unknown,
+  ucp_Common,
+  ucp_Armenian,
+  ucp_Hebrew,
+  ucp_Thai,
+  ucp_Lao,
+  ucp_Tibetan,
+  ucp_Ethiopic,
+  ucp_Cherokee,
+  ucp_Canadian_Aboriginal,
+  ucp_Ogham,
+  ucp_Runic,
+  ucp_Khmer,
+  ucp_Old_Italic,
+  ucp_Gothic,
+  ucp_Deseret,
+  ucp_Inherited,
+  ucp_Ugaritic,
+  ucp_Shavian,
+  ucp_Osmanya,
+  ucp_Braille,
+  ucp_New_Tai_Lue,
+  ucp_Tifinagh,
+  ucp_Old_Persian,
+  ucp_Kharoshthi,
+  ucp_Balinese,
+  ucp_Cuneiform,
+  ucp_Phoenician,
+  ucp_Sundanese,
+  ucp_Lepcha,
+  ucp_Ol_Chiki,
+  ucp_Vai,
+  ucp_Saurashtra,
+  ucp_Rejang,
+  ucp_Lycian,
+  ucp_Carian,
+  ucp_Lydian,
+  ucp_Cham,
+  ucp_Tai_Tham,
+  ucp_Tai_Viet,
+  ucp_Avestan,
+  ucp_Egyptian_Hieroglyphs,
+  ucp_Samaritan,
+  ucp_Lisu,
+  ucp_Bamum,
+  ucp_Meetei_Mayek,
+  ucp_Imperial_Aramaic,
+  ucp_Old_South_Arabian,
+  ucp_Inscriptional_Parthian,
+  ucp_Inscriptional_Pahlavi,
+  ucp_Old_Turkic,
+  ucp_Batak,
+  ucp_Brahmi,
+  ucp_Meroitic_Cursive,
+  ucp_Meroitic_Hieroglyphs,
+  ucp_Miao,
+  ucp_Sora_Sompeng,
+  ucp_Caucasian_Albanian,
+  ucp_Bassa_Vah,
+  ucp_Elbasan,
+  ucp_Pahawh_Hmong,
+  ucp_Mende_Kikakui,
+  ucp_Mro,
+  ucp_Old_North_Arabian,
+  ucp_Nabataean,
+  ucp_Palmyrene,
+  ucp_Pau_Cin_Hau,
+  ucp_Siddham,
+  ucp_Warang_Citi,
+  ucp_Ahom,
+  ucp_Anatolian_Hieroglyphs,
+  ucp_Hatran,
+  ucp_Old_Hungarian,
+  ucp_SignWriting,
+  ucp_Bhaiksuki,
+  ucp_Marchen,
+  ucp_Newa,
+  ucp_Osage,
+  ucp_Tangut,
+  ucp_Nushu,
+  ucp_Soyombo,
+  ucp_Zanabazar_Square,
+  ucp_Makasar,
+  ucp_Medefaidrin,
+  ucp_Old_Sogdian,
+  ucp_Elymaic,
+  ucp_Nyiakeng_Puachue_Hmong,
+  ucp_Wancho,
+  ucp_Chorasmian,
+  ucp_Dives_Akuru,
+  ucp_Khitan_Small_Script,
+  ucp_Tangsa,
+  ucp_Toto,
+  ucp_Vithkuqi,
+
+  /* This must be last */
+  ucp_Script_Count
+};
+
+/* Size of entries in ucd_script_sets[] */
+
+#define ucd_script_sets_item_size 3
+
+#endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2_ucp.h */
--- a/third_party/pcre/pcre2_ucptables.inc
+++ b/third_party/pcre/pcre2_ucptables.inc
--- a/third_party/pcre/pcre2_valid_utf.c
+++ b/third_party/pcre/pcre2_valid_utf.c
@ -0,0 +1,398 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains an internal function for validating UTF character
+strings. This file is also #included by the pcre2test program, which uses
+macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
+with the library. In this case, PCRE2_PCRE2TEST is defined. */
+
+#ifndef PCRE2_PCRE2TEST           /* We're compiling the library */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "pcre2_internal.h"
+#endif /* PCRE2_PCRE2TEST */
+
+
+#ifndef SUPPORT_UNICODE
+/*************************************************
+*  Dummy function when Unicode is not supported  *
+*************************************************/
+
+/* This function should never be called when Unicode is not supported. */
+
+int
+PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
+{
+(void)string;
+(void)length;
+(void)erroroffset;
+return 0;
+}
+#else  /* UTF is supported */
+
+
+
+/*************************************************
+*           Validate a UTF string                *
+*************************************************/
+
+/* This function is called (optionally) at the start of compile or match, to
+check that a supposed UTF string is actually valid. The early check means
+that subsequent code can assume it is dealing with a valid string. The check
+can be turned off for maximum performance, but the consequences of supplying an
+invalid string are then undefined.
+
+Arguments:
+  string       points to the string
+  length       length of string
+  errp         pointer to an error position offset variable
+
+Returns:       == 0    if the string is a valid UTF string
+               != 0    otherwise, setting the offset of the bad character
+*/
+
+int
+PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
+{
+PCRE2_SPTR p;
+uint32_t c;
+
+/* ----------------- Check a UTF-8 string ----------------- */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+
+/* Originally, this function checked according to RFC 2279, allowing for values
+in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
+in the canonical format. Once somebody had pointed out RFC 3629 to me (it
+obsoletes 2279), additional restrictions were applied. The values are now
+limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
+subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
+characters is still checked. Error returns are as follows:
+
+PCRE2_ERROR_UTF8_ERR1   Missing 1 byte at the end of the string
+PCRE2_ERROR_UTF8_ERR2   Missing 2 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR3   Missing 3 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR4   Missing 4 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR5   Missing 5 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR6   2nd-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR7   3rd-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR8   4th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR9   5th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR10  6th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR11  5-byte character is not permitted by RFC 3629
+PCRE2_ERROR_UTF8_ERR12  6-byte character is not permitted by RFC 3629
+PCRE2_ERROR_UTF8_ERR13  4-byte character with value > 0x10ffff is not permitted
+PCRE2_ERROR_UTF8_ERR14  3-byte character with value 0xd800-0xdfff is not permitted
+PCRE2_ERROR_UTF8_ERR15  Overlong 2-byte sequence
+PCRE2_ERROR_UTF8_ERR16  Overlong 3-byte sequence
+PCRE2_ERROR_UTF8_ERR17  Overlong 4-byte sequence
+PCRE2_ERROR_UTF8_ERR18  Overlong 5-byte sequence (won't ever occur)
+PCRE2_ERROR_UTF8_ERR19  Overlong 6-byte sequence (won't ever occur)
+PCRE2_ERROR_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)
+PCRE2_ERROR_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff
+*/
+
+for (p = string; length > 0; p++)
+  {
+  uint32_t ab, d;
+
+  c = *p;
+  length--;
+
+  if (c < 128) continue;                /* ASCII character */
+
+  if (c < 0xc0)                         /* Isolated 10xx xxxx byte */
+    {
+    *erroroffset = (PCRE2_SIZE)(p - string);
+    return PCRE2_ERROR_UTF8_ERR20;
+    }
+
+  if (c >= 0xfe)                        /* Invalid 0xfe or 0xff bytes */
+    {
+    *erroroffset = (PCRE2_SIZE)(p - string);
+    return PCRE2_ERROR_UTF8_ERR21;
+    }
+
+  ab = PRIV(utf8_table4)[c & 0x3f];     /* Number of additional bytes (1-5) */
+  if (length < ab)                      /* Missing bytes */
+    {
+    *erroroffset = (PCRE2_SIZE)(p - string);
+    switch(ab - length)
+      {
+      case 1: return PCRE2_ERROR_UTF8_ERR1;
+      case 2: return PCRE2_ERROR_UTF8_ERR2;
+      case 3: return PCRE2_ERROR_UTF8_ERR3;
+      case 4: return PCRE2_ERROR_UTF8_ERR4;
+      case 5: return PCRE2_ERROR_UTF8_ERR5;
+      }
+    }
+  length -= ab;                         /* Length remaining */
+
+  /* Check top bits in the second byte */
+
+  if (((d = *(++p)) & 0xc0) != 0x80)
+    {
+    *erroroffset = (int)(p - string) - 1;
+    return PCRE2_ERROR_UTF8_ERR6;
+    }
+
+  /* For each length, check that the remaining bytes start with the 0x80 bit
+  set and not the 0x40 bit. Then check for an overlong sequence, and for the
+  excluded range 0xd800 to 0xdfff. */
+
+  switch (ab)
+    {
+    /* 2-byte character. No further bytes to check for 0x80. Check first byte
+    for for xx00 000x (overlong sequence). */
+
+    case 1: if ((c & 0x3e) == 0)
+      {
+      *erroroffset = (int)(p - string) - 1;
+      return PCRE2_ERROR_UTF8_ERR15;
+      }
+    break;
+
+    /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
+      for 1110 0000, xx0x xxxx (overlong sequence) or
+          1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
+
+    case 2:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if (c == 0xe0 && (d & 0x20) == 0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR16;
+      }
+    if (c == 0xed && d >= 0xa0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR14;
+      }
+    break;
+
+    /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
+       bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
+       character greater than 0x0010ffff (f4 8f bf bf) */
+
+    case 3:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if (c == 0xf0 && (d & 0x30) == 0)
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR17;
+      }
+    if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR13;
+      }
+    break;
+
+    /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
+    rejected by the length test below. However, we do the appropriate tests
+    here so that overlong sequences get diagnosed, and also in case there is
+    ever an option for handling these larger code points. */
+
+    /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
+    1111 1000, xx00 0xxx */
+
+    case 4:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR9;
+      }
+    if (c == 0xf8 && (d & 0x38) == 0)
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR18;
+      }
+    break;
+
+    /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
+    1111 1100, xx00 00xx. */
+
+    case 5:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR9;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Sixth byte */
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE2_ERROR_UTF8_ERR10;
+      }
+    if (c == 0xfc && (d & 0x3c) == 0)
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE2_ERROR_UTF8_ERR19;
+      }
+    break;
+    }
+
+  /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
+  excluded by RFC 3629. The pointer p is currently at the last byte of the
+  character. */
+
+  if (ab > 3)
+    {
+    *erroroffset = (int)(p - string) - ab;
+    return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
+    }
+  }
+return 0;
+
+
+/* ----------------- Check a UTF-16 string ----------------- */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+
+/* There's not so much work, nor so many errors, for UTF-16.
+PCRE2_ERROR_UTF16_ERR1  Missing low surrogate at the end of the string
+PCRE2_ERROR_UTF16_ERR2  Invalid low surrogate
+PCRE2_ERROR_UTF16_ERR3  Isolated low surrogate
+*/
+
+for (p = string; length > 0; p++)
+  {
+  c = *p;
+  length--;
+
+  if ((c & 0xf800) != 0xd800)
+    {
+    /* Normal UTF-16 code point. Neither high nor low surrogate. */
+    }
+  else if ((c & 0x0400) == 0)
+    {
+    /* High surrogate. Must be a followed by a low surrogate. */
+    if (length == 0)
+      {
+      *erroroffset = p - string;
+      return PCRE2_ERROR_UTF16_ERR1;
+      }
+    p++;
+    length--;
+    if ((*p & 0xfc00) != 0xdc00)
+      {
+      *erroroffset = p - string - 1;
+      return PCRE2_ERROR_UTF16_ERR2;
+      }
+    }
+  else
+    {
+    /* Isolated low surrogate. Always an error. */
+    *erroroffset = p - string;
+    return PCRE2_ERROR_UTF16_ERR3;
+    }
+  }
+return 0;
+
+
+
+/* ----------------- Check a UTF-32 string ----------------- */
+
+#else
+
+/* There is very little to do for a UTF-32 string.
+PCRE2_ERROR_UTF32_ERR1  Surrogate character
+PCRE2_ERROR_UTF32_ERR2  Character > 0x10ffff
+*/
+
+for (p = string; length > 0; length--, p++)
+  {
+  c = *p;
+  if ((c & 0xfffff800u) != 0xd800u)
+    {
+    /* Normal UTF-32 code point. Neither high nor low surrogate. */
+    if (c > 0x10ffffu)
+      {
+      *erroroffset = p - string;
+      return PCRE2_ERROR_UTF32_ERR2;
+      }
+    }
+  else
+    {
+    /* A surrogate */
+    *erroroffset = p - string;
+    return PCRE2_ERROR_UTF32_ERR1;
+    }
+  }
+return 0;
+#endif  /* CODE_UNIT_WIDTH */
+}
+#endif  /* SUPPORT_UNICODE */
+
+/* End of pcre2_valid_utf.c */
--- a/third_party/pcre/pcre2_valid_utf.inc
+++ b/third_party/pcre/pcre2_valid_utf.inc
@ -0,0 +1,398 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains an internal function for validating UTF character
+strings. This file is also #included by the pcre2test program, which uses
+macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
+with the library. In this case, PCRE2_PCRE2TEST is defined. */
+
+#ifndef PCRE2_PCRE2TEST           /* We're compiling the library */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "pcre2_internal.h"
+#endif /* PCRE2_PCRE2TEST */
+
+
+#ifndef SUPPORT_UNICODE
+/*************************************************
+*  Dummy function when Unicode is not supported  *
+*************************************************/
+
+/* This function should never be called when Unicode is not supported. */
+
+int
+PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
+{
+(void)string;
+(void)length;
+(void)erroroffset;
+return 0;
+}
+#else  /* UTF is supported */
+
+
+
+/*************************************************
+*           Validate a UTF string                *
+*************************************************/
+
+/* This function is called (optionally) at the start of compile or match, to
+check that a supposed UTF string is actually valid. The early check means
+that subsequent code can assume it is dealing with a valid string. The check
+can be turned off for maximum performance, but the consequences of supplying an
+invalid string are then undefined.
+
+Arguments:
+  string       points to the string
+  length       length of string
+  errp         pointer to an error position offset variable
+
+Returns:       == 0    if the string is a valid UTF string
+               != 0    otherwise, setting the offset of the bad character
+*/
+
+int
+PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
+{
+PCRE2_SPTR p;
+uint32_t c;
+
+/* ----------------- Check a UTF-8 string ----------------- */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+
+/* Originally, this function checked according to RFC 2279, allowing for values
+in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
+in the canonical format. Once somebody had pointed out RFC 3629 to me (it
+obsoletes 2279), additional restrictions were applied. The values are now
+limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
+subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
+characters is still checked. Error returns are as follows:
+
+PCRE2_ERROR_UTF8_ERR1   Missing 1 byte at the end of the string
+PCRE2_ERROR_UTF8_ERR2   Missing 2 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR3   Missing 3 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR4   Missing 4 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR5   Missing 5 bytes at the end of the string
+PCRE2_ERROR_UTF8_ERR6   2nd-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR7   3rd-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR8   4th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR9   5th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR10  6th-byte's two top bits are not 0x80
+PCRE2_ERROR_UTF8_ERR11  5-byte character is not permitted by RFC 3629
+PCRE2_ERROR_UTF8_ERR12  6-byte character is not permitted by RFC 3629
+PCRE2_ERROR_UTF8_ERR13  4-byte character with value > 0x10ffff is not permitted
+PCRE2_ERROR_UTF8_ERR14  3-byte character with value 0xd800-0xdfff is not permitted
+PCRE2_ERROR_UTF8_ERR15  Overlong 2-byte sequence
+PCRE2_ERROR_UTF8_ERR16  Overlong 3-byte sequence
+PCRE2_ERROR_UTF8_ERR17  Overlong 4-byte sequence
+PCRE2_ERROR_UTF8_ERR18  Overlong 5-byte sequence (won't ever occur)
+PCRE2_ERROR_UTF8_ERR19  Overlong 6-byte sequence (won't ever occur)
+PCRE2_ERROR_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)
+PCRE2_ERROR_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff
+*/
+
+for (p = string; length > 0; p++)
+  {
+  uint32_t ab, d;
+
+  c = *p;
+  length--;
+
+  if (c < 128) continue;                /* ASCII character */
+
+  if (c < 0xc0)                         /* Isolated 10xx xxxx byte */
+    {
+    *erroroffset = (PCRE2_SIZE)(p - string);
+    return PCRE2_ERROR_UTF8_ERR20;
+    }
+
+  if (c >= 0xfe)                        /* Invalid 0xfe or 0xff bytes */
+    {
+    *erroroffset = (PCRE2_SIZE)(p - string);
+    return PCRE2_ERROR_UTF8_ERR21;
+    }
+
+  ab = PRIV(utf8_table4)[c & 0x3f];     /* Number of additional bytes (1-5) */
+  if (length < ab)                      /* Missing bytes */
+    {
+    *erroroffset = (PCRE2_SIZE)(p - string);
+    switch(ab - length)
+      {
+      case 1: return PCRE2_ERROR_UTF8_ERR1;
+      case 2: return PCRE2_ERROR_UTF8_ERR2;
+      case 3: return PCRE2_ERROR_UTF8_ERR3;
+      case 4: return PCRE2_ERROR_UTF8_ERR4;
+      case 5: return PCRE2_ERROR_UTF8_ERR5;
+      }
+    }
+  length -= ab;                         /* Length remaining */
+
+  /* Check top bits in the second byte */
+
+  if (((d = *(++p)) & 0xc0) != 0x80)
+    {
+    *erroroffset = (int)(p - string) - 1;
+    return PCRE2_ERROR_UTF8_ERR6;
+    }
+
+  /* For each length, check that the remaining bytes start with the 0x80 bit
+  set and not the 0x40 bit. Then check for an overlong sequence, and for the
+  excluded range 0xd800 to 0xdfff. */
+
+  switch (ab)
+    {
+    /* 2-byte character. No further bytes to check for 0x80. Check first byte
+    for for xx00 000x (overlong sequence). */
+
+    case 1: if ((c & 0x3e) == 0)
+      {
+      *erroroffset = (int)(p - string) - 1;
+      return PCRE2_ERROR_UTF8_ERR15;
+      }
+    break;
+
+    /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
+      for 1110 0000, xx0x xxxx (overlong sequence) or
+          1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
+
+    case 2:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if (c == 0xe0 && (d & 0x20) == 0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR16;
+      }
+    if (c == 0xed && d >= 0xa0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR14;
+      }
+    break;
+
+    /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
+       bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
+       character greater than 0x0010ffff (f4 8f bf bf) */
+
+    case 3:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if (c == 0xf0 && (d & 0x30) == 0)
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR17;
+      }
+    if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR13;
+      }
+    break;
+
+    /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
+    rejected by the length test below. However, we do the appropriate tests
+    here so that overlong sequences get diagnosed, and also in case there is
+    ever an option for handling these larger code points. */
+
+    /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
+    1111 1000, xx00 0xxx */
+
+    case 4:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR9;
+      }
+    if (c == 0xf8 && (d & 0x38) == 0)
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR18;
+      }
+    break;
+
+    /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
+    1111 1100, xx00 00xx. */
+
+    case 5:
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE2_ERROR_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE2_ERROR_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE2_ERROR_UTF8_ERR9;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Sixth byte */
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE2_ERROR_UTF8_ERR10;
+      }
+    if (c == 0xfc && (d & 0x3c) == 0)
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE2_ERROR_UTF8_ERR19;
+      }
+    break;
+    }
+
+  /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
+  excluded by RFC 3629. The pointer p is currently at the last byte of the
+  character. */
+
+  if (ab > 3)
+    {
+    *erroroffset = (int)(p - string) - ab;
+    return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
+    }
+  }
+return 0;
+
+
+/* ----------------- Check a UTF-16 string ----------------- */
+
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+
+/* There's not so much work, nor so many errors, for UTF-16.
+PCRE2_ERROR_UTF16_ERR1  Missing low surrogate at the end of the string
+PCRE2_ERROR_UTF16_ERR2  Invalid low surrogate
+PCRE2_ERROR_UTF16_ERR3  Isolated low surrogate
+*/
+
+for (p = string; length > 0; p++)
+  {
+  c = *p;
+  length--;
+
+  if ((c & 0xf800) != 0xd800)
+    {
+    /* Normal UTF-16 code point. Neither high nor low surrogate. */
+    }
+  else if ((c & 0x0400) == 0)
+    {
+    /* High surrogate. Must be a followed by a low surrogate. */
+    if (length == 0)
+      {
+      *erroroffset = p - string;
+      return PCRE2_ERROR_UTF16_ERR1;
+      }
+    p++;
+    length--;
+    if ((*p & 0xfc00) != 0xdc00)
+      {
+      *erroroffset = p - string - 1;
+      return PCRE2_ERROR_UTF16_ERR2;
+      }
+    }
+  else
+    {
+    /* Isolated low surrogate. Always an error. */
+    *erroroffset = p - string;
+    return PCRE2_ERROR_UTF16_ERR3;
+    }
+  }
+return 0;
+
+
+
+/* ----------------- Check a UTF-32 string ----------------- */
+
+#else
+
+/* There is very little to do for a UTF-32 string.
+PCRE2_ERROR_UTF32_ERR1  Surrogate character
+PCRE2_ERROR_UTF32_ERR2  Character > 0x10ffff
+*/
+
+for (p = string; length > 0; length--, p++)
+  {
+  c = *p;
+  if ((c & 0xfffff800u) != 0xd800u)
+    {
+    /* Normal UTF-32 code point. Neither high nor low surrogate. */
+    if (c > 0x10ffffu)
+      {
+      *erroroffset = p - string;
+      return PCRE2_ERROR_UTF32_ERR2;
+      }
+    }
+  else
+    {
+    /* A surrogate */
+    *erroroffset = p - string;
+    return PCRE2_ERROR_UTF32_ERR1;
+    }
+  }
+return 0;
+#endif  /* CODE_UNIT_WIDTH */
+}
+#endif  /* SUPPORT_UNICODE */
+
+/* End of pcre2_valid_utf.c */
--- a/third_party/pcre/pcre2_xclass.c
+++ b/third_party/pcre/pcre2_xclass.c
@ -0,0 +1,289 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains an internal function that is used to match an extended
+class. It is used by pcre2_auto_possessify() and by both pcre2_match() and
+pcre2_def_match(). */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "pcre2_internal.h"
+
+/*************************************************
+*       Match character against an XCLASS        *
+*************************************************/
+
+/* This function is called to match a character against an extended class that
+might contain codepoints above 255 and/or Unicode properties.
+
+Arguments:
+  c           the character
+  data        points to the flag code unit of the XCLASS data
+  utf         TRUE if in UTF mode
+
+Returns:      TRUE if character matches, else FALSE
+*/
+
+BOOL
+PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf)
+{
+PCRE2_UCHAR t;
+BOOL negated = (*data & XCL_NOT) != 0;
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
+utf = TRUE;
+#endif
+
+/* Code points < 256 are matched against a bitmap, if one is present. If not,
+we still carry on, because there may be ranges that start below 256 in the
+additional data. */
+
+if (c < 256)
+  {
+  if ((*data & XCL_HASPROP) == 0)
+    {
+    if ((*data & XCL_MAP) == 0) return negated;
+    return (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0;
+    }
+  if ((*data & XCL_MAP) != 0 &&
+    (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0)
+    return !negated; /* char found */
+  }
+
+/* First skip the bit map if present. Then match against the list of Unicode
+properties or large chars or ranges that end with a large char. We won't ever
+encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
+
+if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR);
+
+while ((t = *data++) != XCL_END)
+  {
+  uint32_t x, y;
+  if (t == XCL_SINGLE)
+    {
+#ifdef SUPPORT_UNICODE
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+    x = *data++;
+    if (c == x) return !negated;
+    }
+  else if (t == XCL_RANGE)
+    {
+#ifdef SUPPORT_UNICODE
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      GETCHARINC(y, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      {
+      x = *data++;
+      y = *data++;
+      }
+    if (c >= x && c <= y) return !negated;
+    }
+
+#ifdef SUPPORT_UNICODE
+  else  /* XCL_PROP & XCL_NOTPROP */
+    {
+    const ucd_record *prop = GET_UCD(c);
+    BOOL isprop = t == XCL_PROP;
+    BOOL ok;
+
+    switch(*data)
+      {
+      case PT_ANY:
+      if (isprop) return !negated;
+      break;
+
+      case PT_LAMP:
+      if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
+           prop->chartype == ucp_Lt) == isprop) return !negated;
+      break;
+
+      case PT_GC:
+      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
+        return !negated;
+      break;
+
+      case PT_PC:
+      if ((data[1] == prop->chartype) == isprop) return !negated;
+      break;
+
+      case PT_SC:
+      if ((data[1] == prop->script) == isprop) return !negated;
+      break;
+
+      case PT_SCX:
+      ok = (data[1] == prop->script ||
+            MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
+      if (ok == isprop) return !negated;
+      break;
+
+      case PT_ALNUM:
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
+        return !negated;
+      break;
+
+      /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+      which means that Perl space and POSIX space are now identical. PCRE
+      was changed at release 8.34. */
+
+      case PT_SPACE:    /* Perl space */
+      case PT_PXSPACE:  /* POSIX space */
+      switch(c)
+        {
+        HSPACE_CASES:
+        VSPACE_CASES:
+        if (isprop) return !negated;
+        break;
+
+        default:
+        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
+          return !negated;
+        break;
+        }
+      break;
+
+      case PT_WORD:
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
+             == isprop)
+        return !negated;
+      break;
+
+      case PT_UCNC:
+      if (c < 0xa0)
+        {
+        if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+             c == CHAR_GRAVE_ACCENT) == isprop)
+          return !negated;
+        }
+      else
+        {
+        if ((c < 0xd800 || c > 0xdfff) == isprop)
+          return !negated;
+        }
+      break;
+
+      case PT_BIDICL:
+      if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
+        return !negated;
+      break;
+
+      case PT_BOOL:
+      ok = MAPBIT(PRIV(ucd_boolprop_sets) +
+        UCD_BPROPS_PROP(prop), data[1]) != 0;
+      if (ok == isprop) return !negated;
+      break;
+
+      /* The following three properties can occur only in an XCLASS, as there
+      is no \p or \P coding for them. */
+
+      /* Graphic character. Implement this as not Z (space or separator) and
+      not C (other), except for Cf (format) with a few exceptions. This seems
+      to be what Perl does. The exceptional characters are:
+
+      U+061C           Arabic Letter Mark
+      U+180E           Mongolian Vowel Separator
+      U+2066 - U+2069  Various "isolate"s
+      */
+
+      case PT_PXGRAPH:
+      if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
+            (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
+              (prop->chartype == ucp_Cf &&
+                c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
+         )) == isprop)
+        return !negated;
+      break;
+
+      /* Printable character: same as graphic, with the addition of Zs, i.e.
+      not Zl and not Zp, and U+180E. */
+
+      case PT_PXPRINT:
+      if ((prop->chartype != ucp_Zl &&
+           prop->chartype != ucp_Zp &&
+            (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
+              (prop->chartype == ucp_Cf &&
+                c != 0x061c && (c < 0x2066 || c > 0x2069))
+         )) == isprop)
+        return !negated;
+      break;
+
+      /* Punctuation: all Unicode punctuation, plus ASCII characters that
+      Unicode treats as symbols rather than punctuation, for Perl
+      compatibility (these are $+<=>^`|~). */
+
+      case PT_PXPUNCT:
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
+            (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
+        return !negated;
+      break;
+
+      /* This should never occur, but compilers may mutter if there is no
+      default. */
+
+      default:
+      return FALSE;
+      }
+
+    data += 2;
+    }
+#else
+  (void)utf;  /* Avoid compiler warning */
+#endif  /* SUPPORT_UNICODE */
+  }
+
+return negated;   /* char did not match */
+}
+
+/* End of pcre2_xclass.c */
--- a/third_party/pcre/pcre2grep.1
+++ b/third_party/pcre/pcre2grep.1
@ -0,0 +1,975 @@
+.TH PCRE2GREP 1 "21 November 2022" "PCRE2 10.41"
+.SH NAME
+pcre2grep - a grep with Perl-compatible regular expressions.
+.SH SYNOPSIS
+.B pcre2grep [options] [long options] [pattern] [path1 path2 ...]
+.
+.SH DESCRIPTION
+.rs
+.sp
+\fBpcre2grep\fP searches files for character patterns, in the same way as other
+grep commands do, but it uses the PCRE2 regular expression library to support
+patterns that are compatible with the regular expressions of Perl 5. See
+.\" HREF
+\fBpcre2syntax\fP(3)
+.\"
+for a quick-reference summary of pattern syntax, or
+.\" HREF
+\fBpcre2pattern\fP(3)
+.\"
+for a full description of the syntax and semantics of the regular expressions
+that PCRE2 supports.
+.P
+Patterns, whether supplied on the command line or in a separate file, are given
+without delimiters. For example:
+.sp
+  pcre2grep Thursday /etc/motd
+.sp
+If you attempt to use delimiters (for example, by surrounding a pattern with
+slashes, as is common in Perl scripts), they are interpreted as part of the
+pattern. Quotes can of course be used to delimit patterns on the command line
+because they are interpreted by the shell, and indeed quotes are required if a
+pattern contains white space or shell metacharacters.
+.P
+The first argument that follows any option settings is treated as the single
+pattern to be matched when neither \fB-e\fP nor \fB-f\fP is present.
+Conversely, when one or both of these options are used to specify patterns, all
+arguments are treated as path names. At least one of \fB-e\fP, \fB-f\fP, or an
+argument pattern must be provided.
+.P
+If no files are specified, \fBpcre2grep\fP reads the standard input. The
+standard input can also be referenced by a name consisting of a single hyphen.
+For example:
+.sp
+  pcre2grep some-pattern file1 - file3
+.sp
+By default, input files are searched line by line. Each line that matches a
+pattern is copied to the standard output, and if there is more than one file,
+the file name is output at the start of each line, followed by a colon.
+However, there are options that can change how \fBpcre2grep\fP behaves. For
+example, the \fB-M\fP option makes it possible to search for strings that span
+line boundaries. What defines a line boundary is controlled by the \fB-N\fP
+(\fB--newline\fP) option. The \fB-h\fP and \fB-H\fP options control whether or
+not file names are shown, and the \fB-Z\fP option changes the file name
+terminator to a zero byte.
+.P
+The amount of memory used for buffering files that are being scanned is
+controlled by parameters that can be set by the \fB--buffer-size\fP and
+\fB--max-buffer-size\fP options. The first of these sets the size of buffer
+that is obtained at the start of processing. If an input file contains very
+long lines, a larger buffer may be needed; this is handled by automatically
+extending the buffer, up to the limit specified by \fB--max-buffer-size\fP. The
+default values for these parameters can be set when \fBpcre2grep\fP is
+built; if nothing is specified, the defaults are set to 20KiB and 1MiB
+respectively. An error occurs if a line is too long and the buffer can no
+longer be expanded.
+.P
+The block of memory that is actually used is three times the "buffer size", to
+allow for buffering "before" and "after" lines. If the buffer size is too
+small, fewer than requested "before" and "after" lines may be output.
+.P
+Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater.
+BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
+(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
+each line in the order in which they are defined, except that all the \fB-e\fP
+patterns are tried before the \fB-f\fP patterns.
+.P
+By default, as soon as one pattern matches a line, no further patterns are
+considered. However, if \fB--colour\fP (or \fB--color\fP) is used to colour the
+matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP,
+\fB--line-offsets\fP, or \fB--output\fP is used to output only the part of the
+line that matched (either shown literally, or as an offset), the behaviour is
+different. In this situation, all the patterns are applied to the line. If
+there is more than one match, the one that begins nearest to the start of the
+subject is processed; if there is more than one match at that position, the one
+with the longest matching substring is processed; if the matching substrings
+are equal, the first match found is processed.
+.P
+Scanning with all the patterns resumes immediately following the match, so that
+later matches on the same line can be found. Note, however, that an overlapping
+match that starts in the middle of another match will not be processed.
+.P
+The above behaviour was changed at release 10.41 to be more compatible with GNU
+grep. In earlier releases, \fBpcre2grep\fP did not recognize matches from
+later patterns that were earlier in the subject.
+.P
+Patterns that can match an empty string are accepted, but empty string
+matches are never recognized. An example is the pattern "(super)?(man)?", in
+which all components are optional. This pattern finds all occurrences of both
+"super" and "man"; the output differs from matching with "super|man" when only
+the matching substrings are being shown.
+.P
+If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set,
+\fBpcre2grep\fP uses the value to set a locale when calling the PCRE2 library.
+The \fB--locale\fP option can be used to override this.
+.
+.
+.SH "SUPPORT FOR COMPRESSED FILES"
+.rs
+.sp
+Compile-time options for \fBpcre2grep\fP can set it up to use \fBlibz\fP or
+\fBlibbz2\fP for reading compressed files whose names end in \fB.gz\fP or
+\fB.bz2\fP, respectively. You can find out whether your \fBpcre2grep\fP binary
+has support for one or both of these file types by running it with the
+\fB--help\fP option. If the appropriate support is not present, all files are
+treated as plain text. The standard input is always so treated. If a file with
+a \fB.gz\fP or \fB.bz2\fP extension is not in fact compressed, it is read as a
+plain text file. When input is from a compressed .gz or .bz2 file, the
+\fB--line-buffered\fP option is ignored.
+.
+.
+.SH "BINARY FILES"
+.rs
+.sp
+By default, a file that contains a binary zero byte within the first 1024 bytes
+is identified as a binary file, and is processed specially. However, if the
+newline type is specified as NUL, that is, the line terminator is a binary
+zero, the test for a binary file is not applied. See the \fB--binary-files\fP
+option for a means of changing the way binary files are handled.
+.
+.
+.SH "BINARY ZEROS IN PATTERNS"
+.rs
+.sp
+Patterns passed from the command line are strings that are terminated by a
+binary zero, so cannot contain internal zeros. However, patterns that are read
+from a file via the \fB-f\fP option may contain binary zeros.
+.
+.
+.SH OPTIONS
+.rs
+.sp
+The order in which some of the options appear can affect the output. For
+example, both the \fB-H\fP and \fB-l\fP options affect the printing of file
+names. Whichever comes later in the command line will be the one that takes
+effect. Similarly, except where noted below, if an option is given twice, the
+later setting is used. Numerical values for options may be followed by K or M,
+to signify multiplication by 1024 or 1024*1024 respectively.
+.TP 10
+\fB--\fP
+This terminates the list of options. It is useful if the next item on the
+command line starts with a hyphen but is not an option. This allows for the
+processing of patterns and file names that start with hyphens.
+.TP
+\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP
+Output up to \fInumber\fP lines of context after each matching line. Fewer
+lines are output if the next match or the end of the file is reached, or if the
+processing buffer size has been set too small. If file names and/or line
+numbers are being output, a hyphen separator is used instead of a colon for the
+context lines (the \fB-Z\fP option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
+\fB-A\fP is ignored.
+.TP
+\fB-a\fP, \fB--text\fP
+Treat binary files as text. This is equivalent to
+\fB--binary-files\fP=\fItext\fP.
+.TP
+\fB--allow-lookaround-bsk\fP
+PCRE2 now forbids the use of \eK in lookarounds by default, in line with Perl.
+This option causes \fBpcre2grep\fP to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+option, which enables this somewhat dangerous usage.
+.TP
+\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
+Output up to \fInumber\fP lines of context before each matching line. Fewer
+lines are output if the previous match or the start of the file is within
+\fInumber\fP lines, or if the processing buffer size has been set too small. If
+file names and/or line numbers are being output, a hyphen separator is used
+instead of a colon for the context lines (the \fB-Z\fP option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of \fInumber\fP is expected to be relatively small. When
+\fB-c\fP is used, \fB-B\fP is ignored.
+.TP
+\fB--binary-files=\fP\fIword\fP
+Specify how binary files are to be processed. If the word is "binary" (the
+default), pattern matching is performed on binary files, but the only output is
+"Binary file <name> matches" when a match succeeds. If the word is "text",
+which is equivalent to the \fB-a\fP or \fB--text\fP option, binary files are
+processed in the same way as any other file. In this case, when a match
+succeeds, the output may be binary garbage, which can have nasty effects if
+sent to a terminal. If the word is "without-match", which is equivalent to the
+\fB-I\fP option, binary files are not processed at all; they are assumed not to
+be of interest and are skipped without causing any output or affecting the
+return code.
+.TP
+\fB--buffer-size=\fP\fInumber\fP
+Set the parameter that controls how much memory is obtained at the start of
+processing for buffering files that are being scanned. See also
+\fB--max-buffer-size\fP below.
+.TP
+\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
+Output \fInumber\fP lines of context both before and after each matching line.
+This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value.
+.TP
+\fB-c\fP, \fB--count\fP
+Do not output lines from the files that are being scanned; instead output the
+number of lines that would have been shown, either because they matched, or, if
+\fB-v\fP is set, because they failed to match. By default, this count is
+exactly the same as the number of lines that would have been output, but if the
+\fB-M\fP (multiline) option is used (without \fB-v\fP), there may be more
+suppressed lines than the count (that is, the number of matches).
+.sp
+If no lines are selected, the number zero is output. If several files are are
+being scanned, a count is output for each of them and the \fB-t\fP option can
+be used to cause a total to be output at the end. However, if the
+\fB--files-with-matches\fP option is also used, only those files whose counts
+are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP,
+\fB-B\fP, and \fB-C\fP options are ignored.
+.TP
+\fB--colour\fP, \fB--color\fP
+If this option is given without any data, it is equivalent to "--colour=auto".
+If data is required, it must be given in the same shell item, separated by an
+equals sign.
+.TP
+\fB--colour=\fP\fIvalue\fP, \fB--color=\fP\fIvalue\fP
+This option specifies under what circumstances the parts of a line that matched
+a pattern should be coloured in the output. It is ignored if
+\fB--file-offsets\fP, \fB--line-offsets\fP, or \fB--output\fP is set. By
+default, output is not coloured. The value for the \fB--colour\fP option (which
+is optional, see above) may be "never", "always", or "auto". In the latter
+case, colouring happens only if the standard output is connected to a terminal.
+More resources are used when colouring is enabled, because \fBpcre2grep\fP has
+to search for all possible matches in a line, not just one, in order to colour
+them all.
+.sp
+The colour that is used can be specified by setting one of the environment
+variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
+PCREGREP_COLOR, which are checked in that order. If none of these are set,
+\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value
+of the variable should be a string of two numbers, separated by a semicolon,
+except in the case of GREP_COLORS, which must start with "ms=" or "mt="
+followed by two semicolon-separated colours, terminated by the end of the
+string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is
+ignored, and GREP_COLOR is checked.
+.sp
+If the string obtained from one of the above variables contains any characters
+other than semicolon or digits, the setting is ignored and the default colour
+is used. The string is copied directly into the control string for setting
+colour on a terminal, so it is your responsibility to ensure that the values
+make sense. If no relevant environment variable is set, the default is "1;31",
+which gives red.
+.TP
+\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP
+If an input path is not a regular file or a directory, "action" specifies how
+it is to be processed. Valid values are "read" (the default) or "skip"
+(silently skip the path).
+.TP
+\fB-d\fP \fIaction\fP, \fB--directories=\fP\fIaction\fP
+If an input path is a directory, "action" specifies how it is to be processed.
+Valid values are "read" (the default in non-Windows environments, for
+compatibility with GNU grep), "recurse" (equivalent to the \fB-r\fP option), or
+"skip" (silently skip the path, the default in Windows environments). In the
+"read" case, directories are read as if they were ordinary files. In some
+operating systems the effect of reading a directory like this is an immediate
+end-of-file; in others it may provoke an error.
+.TP
+\fB--depth-limit\fP=\fInumber\fP
+See \fB--match-limit\fP below.
+.TP
+\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP
+Specify a pattern to be matched. This option can be used multiple times in
+order to specify several patterns. It can also be used as a way of specifying a
+single pattern that starts with a hyphen. When \fB-e\fP is used, no argument
+pattern is taken from the command line; all arguments are treated as file
+names. There is no limit to the number of patterns. They are applied to each
+line in the order in which they are defined.
+.sp
+If \fB-f\fP is used with \fB-e\fP, the command line patterns are matched first,
+followed by the patterns from the file(s), independent of the order in which
+these options are specified.
+.TP
+\fB--exclude\fP=\fIpattern\fP
+Files (but not directories) whose names match the pattern are skipped without
+being processed. This applies to all files, whether listed on the command line,
+obtained from \fB--file-list\fP, or by scanning a directory. The pattern is a
+PCRE2 regular expression, and is matched against the final component of the
+file name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do
+not apply to this pattern. The option may be given any number of times in order
+to specify multiple patterns. If a file name matches both an \fB--include\fP
+and an \fB--exclude\fP pattern, it is excluded. There is no short form for this
+option.
+.TP
+\fB--exclude-from=\fP\fIfilename\fP
+Treat each non-empty line of the file as the data for an \fB--exclude\fP
+option. What constitutes a newline when reading the file is the operating
+system's default. The \fB--newline\fP option has no effect on this option. This
+option may be given more than once in order to specify a number of files to
+read.
+.TP
+\fB--exclude-dir\fP=\fIpattern\fP
+Directories whose names match the pattern are skipped without being processed,
+whatever the setting of the \fB--recursive\fP option. This applies to all
+directories, whether listed on the command line, obtained from
+\fB--file-list\fP, or by scanning a parent directory. The pattern is a PCRE2
+regular expression, and is matched against the final component of the directory
+name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not
+apply to this pattern. The option may be given any number of times in order to
+specify more than one pattern. If a directory matches both \fB--include-dir\fP
+and \fB--exclude-dir\fP, it is excluded. There is no short form for this
+option.
+.TP
+\fB-F\fP, \fB--fixed-strings\fP
+Interpret each data-matching pattern as a list of fixed strings, separated by
+newlines, instead of as a regular expression. What constitutes a newline for
+this purpose is controlled by the \fB--newline\fP option. The \fB-w\fP (match
+as a word) and \fB-x\fP (match whole line) options can be used with \fB-F\fP.
+They apply to each of the fixed strings. A line is selected if any of the fixed
+strings are found in it (subject to \fB-w\fP or \fB-x\fP, if present). This
+option applies only to the patterns that are matched against the contents of
+files; it does not apply to patterns specified by any of the \fB--include\fP or
+\fB--exclude\fP options.
+.TP
+\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP
+Read patterns from the file, one per line. As is the case with patterns on the
+command line, no delimiters should be used. What constitutes a newline when
+reading the file is the operating system's default interpretation of \en. The
+\fB--newline\fP option has no effect on this option. Trailing white space is
+removed from each line, and blank lines are ignored. An empty file contains no
+patterns and therefore matches nothing. Patterns read from a file in this way
+may contain binary zeros, which are treated as ordinary data characters.
+.sp
+If this option is given more than once, all the specified files are read. A
+data line is output if any of the patterns match it. A file name can be given
+as "-" to refer to the standard input. When \fB-f\fP is used, patterns
+specified on the command line using \fB-e\fP may also be present; they are
+matched before the file's patterns. However, no pattern is taken from the
+command line; all arguments are treated as the names of paths to be searched.
+.TP
+\fB--file-list\fP=\fIfilename\fP
+Read a list of files and/or directories that are to be scanned from the given
+file, one per line. What constitutes a newline when reading the file is the
+operating system's default. Trailing white space is removed from each line, and
+blank lines are ignored. These paths are processed before any that are listed
+on the command line. The file name can be given as "-" to refer to the standard
+input. If \fB--file\fP and \fB--file-list\fP are both specified as "-",
+patterns are read first. This is useful only when the standard input is a
+terminal, from which further lines (the list of files) can be read after an
+end-of-file indication. If this option is given more than once, all the
+specified files are read.
+.TP
+\fB--file-offsets\fP
+Instead of showing lines or parts of lines that match, show each match as an
+offset from the start of the file and a length, separated by a comma. In this
+mode, \fB--colour\fP has no effect, and no context is shown. That is, the
+\fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is more than one
+match in a line, each of them is shown separately. This option is mutually
+exclusive with \fB--output\fP, \fB--line-offsets\fP, and \fB--only-matching\fP.
+.TP
+\fB-H\fP, \fB--with-filename\fP
+Force the inclusion of the file name at the start of output lines when
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the \fB-M\fP option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
+.TP
+\fB-h\fP, \fB--no-filename\fP
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The \fB-Z\fP option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
+.TP
+\fB--heap-limit\fP=\fInumber\fP
+See \fB--match-limit\fP below.
+.TP
+\fB--help\fP
+Output a help message, giving brief details of the command options and file
+type support, and then exit. Anything else on the command line is
+ignored.
+.TP
+\fB-I\fP
+Ignore binary files. This is equivalent to
+\fB--binary-files\fP=\fIwithout-match\fP.
+.TP
+\fB-i\fP, \fB--ignore-case\fP
+Ignore upper/lower case distinctions during comparisons.
+.TP
+\fB--include\fP=\fIpattern\fP
+If any \fB--include\fP patterns are specified, the only files that are
+processed are those whose names match one of the patterns and do not match an
+\fB--exclude\fP pattern. This option does not affect directories, but it
+applies to all files, whether listed on the command line, obtained from
+\fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular
+expression, and is matched against the final component of the file name, not
+the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not apply to
+this pattern. The option may be given any number of times. If a file name
+matches both an \fB--include\fP and an \fB--exclude\fP pattern, it is excluded.
+There is no short form for this option.
+.TP
+\fB--include-from=\fP\fIfilename\fP
+Treat each non-empty line of the file as the data for an \fB--include\fP
+option. What constitutes a newline for this purpose is the operating system's
+default. The \fB--newline\fP option has no effect on this option. This option
+may be given any number of times; all the files are read.
+.TP
+\fB--include-dir\fP=\fIpattern\fP
+If any \fB--include-dir\fP patterns are specified, the only directories that
+are processed are those whose names match one of the patterns and do not match
+an \fB--exclude-dir\fP pattern. This applies to all directories, whether listed
+on the command line, obtained from \fB--file-list\fP, or by scanning a parent
+directory. The pattern is a PCRE2 regular expression, and is matched against
+the final component of the directory name, not the entire path. The \fB-F\fP,
+\fB-w\fP, and \fB-x\fP options do not apply to this pattern. The option may be
+given any number of times. If a directory matches both \fB--include-dir\fP and
+\fB--exclude-dir\fP, it is excluded. There is no short form for this option.
+.TP
+\fB-L\fP, \fB--files-without-match\fP
+Instead of outputting lines from the files, just output the names of the files
+that do not contain any lines that would have been output. Each file name is
+output once, on a separate line by default, but if the \fB-Z\fP option is set,
+they are separated by zero bytes instead of newlines. This option overrides any
+previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
+.TP
+\fB-l\fP, \fB--files-with-matches\fP
+Instead of outputting lines from the files, just output the names of the files
+containing lines that would have been output. Each file name is output once, on
+a separate line, but if the \fB-Z\fP option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches that
+occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
+\fB-h\fP, or \fB-L\fP options.
+.TP
+\fB--label\fP=\fIname\fP
+This option supplies a name to be used for the standard input when file names
+are being output. If not supplied, "(standard input)" is used. There is no
+short form for this option.
+.TP
+\fB--line-buffered\fP
+When this option is given, non-compressed input is read and processed line by
+line, and the output is flushed after each write. By default, input is read in
+large chunks, unless \fBpcre2grep\fP can determine that it is reading from a
+terminal, which is currently possible only in Unix-like environments or
+Windows. Output to terminal is normally automatically flushed by the operating
+system. This option can be useful when the input or output is attached to a
+pipe and you do not want \fBpcre2grep\fP to buffer up large amounts of data.
+However, its use will affect performance, and the \fB-M\fP (multiline) option
+ceases to work. When input is from a compressed .gz or .bz2 file,
+\fB--line-buffered\fP is ignored.
+.TP
+\fB--line-offsets\fP
+Instead of showing lines or parts of lines that match, show each match as a
+line number, the offset from the start of the line, and a length. The line
+number is terminated by a colon (as usual; see the \fB-n\fP option), and the
+offset and length are separated by a comma. In this mode, \fB--colour\fP has no
+effect, and no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
+options are ignored. If there is more than one match in a line, each of them is
+shown separately. This option is mutually exclusive with \fB--output\fP,
+\fB--file-offsets\fP, and \fB--only-matching\fP.
+.TP
+\fB--locale\fP=\fIlocale-name\fP
+This option specifies a locale to be used for pattern matching. It overrides
+the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no
+locale is specified, the PCRE2 library's default (usually the "C" locale) is
+used. There is no short form for this option.
+.TP
+\fB-M\fP, \fB--multiline\fP
+Allow patterns to match more than one line. When this option is set, the PCRE2
+library is called in "multiline" mode. This allows a matched string to extend
+past the end of a line and continue on one or more subsequent lines. Patterns
+used with \fB-M\fP may usefully contain literal newline characters and internal
+occurrences of ^ and $ characters. The output for a successful match may
+consist of more than one line. The first line is the line in which the match
+started, and the last line is the line in which the match ended. If the matched
+string ends with a newline sequence, the output ends at the end of that line.
+If \fB-v\fP is set, none of the lines in a multi-line match are output. Once a
+match has been handled, scanning restarts at the beginning of the line after
+the one in which the match ended.
+.sp
+The newline sequence that separates multiple lines must be matched as part of
+the pattern. For example, to find the phrase "regular expression" in a file
+where "regular" might be at the end of a line and "expression" at the start of
+the next line, you could use this command:
+.sp
+  pcre2grep -M 'regular\es+expression' <file>
+.sp
+The \es escape sequence matches any white space character, including newlines,
+and is followed by + so as to match trailing white space on the first line as
+well as possibly handling a two-character newline sequence.
+.sp
+There is a limit to the number of lines that can be matched, imposed by the way
+that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
+large processing buffer, this should not be a problem, but the \fB-M\fP option
+does not work when input is read line by line (see \fB--line-buffered\fP.)
+.TP
+\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
+Stop processing after finding \fInumber\fP matching lines, or non-matching
+lines if \fB-v\fP is also set. Any trailing context lines are output after the
+final match. In multiline mode, each multiline match counts as just one line
+for this purpose. If this limit is reached when reading the standard input from
+a regular file, the file is left positioned just after the last matching line.
+If \fB-c\fP is also set, the count that is output is never greater than
+\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or
+\fB-q\fP, or when just checking for a match in a binary file.
+.TP
+\fB--match-limit\fP=\fInumber\fP
+Processing some regular expression patterns may take a very long time to search
+for all possible matching strings. Others may require a very large amount of
+memory. There are three options that set resource limits for matching.
+.sp
+The \fB--match-limit\fP option provides a means of limiting computing resource
+usage when processing patterns that are not going to match, but which have a
+very large number of possibilities in their search trees. The classic example
+is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
+counter that is incremented each time around its main processing loop. If the
+value set by \fB--match-limit\fP is reached, an error occurs.
+.sp
+The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
+1024 bytes), the maximum amount of heap memory that may be used for matching.
+.sp
+The \fB--depth-limit\fP option limits the depth of nested backtracking points,
+which indirectly limits the amount of memory that is used. The amount of memory
+needed for each backtracking point depends on the number of capturing
+parentheses in the pattern, so the amount of memory that is used before this
+limit acts varies from pattern to pattern. This limit is of use only if it is
+set smaller than \fB--match-limit\fP.
+.sp
+There are no short forms for these options. The default limits can be set
+when the PCRE2 library is compiled; if they are not specified, the defaults
+are very large and so effectively unlimited.
+.TP
+\fB--max-buffer-size\fP=\fInumber\fP
+This limits the expansion of the processing buffer, whose initial size can be
+set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
+smaller than the starting buffer size.
+.TP
+\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
+Six different conventions for indicating the ends of lines in scanned files are
+supported. For example:
+.sp
+  pcre2grep -N CRLF 'some pattern' <file>
+.sp
+The newline type may be specified in upper, lower, or mixed case. If the
+newline type is NUL, lines are separated by binary zero characters. The other
+types are the single-character sequences CR (carriage return) and LF
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
+recognizes any of the preceding three types, and an "any" type, for which any
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
+(paragraph separator, U+2029).
+.sp
+When the PCRE2 library is built, a default line-ending sequence is specified.
+This is normally the standard sequence for the operating system. Unless
+otherwise specified by this option, \fBpcre2grep\fP uses the library's default.
+.sp
+This option makes it possible to use \fBpcre2grep\fP to scan files that have
+come from other environments without having to modify their line endings. If
+the data that is being scanned does not agree with the convention set by this
+option, \fBpcre2grep\fP may behave in strange ways. Note that this option does
+not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
+\fB--include-from\fP options, which are expected to use the operating system's
+standard newline sequence.
+.TP
+\fB-n\fP, \fB--line-number\fP
+Precede each output line by its line number in the file, followed by a colon
+for matching lines or a hyphen for context lines. If the file name is also
+being output, it precedes the line number. When the \fB-M\fP option causes a
+pattern to match more than one line, only the first is preceded by its line
+number. This option is forced if \fB--line-offsets\fP is used.
+.TP
+\fB--no-jit\fP
+If the PCRE2 library is built with support for just-in-time compiling (which
+speeds up matching), \fBpcre2grep\fP automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+.TP
+\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
+When there is a match, instead of outputting the line that matched, output just
+the text specified in this option, followed by an operating-system standard
+newline. In this mode, \fB--colour\fP has no effect, and no context is shown.
+That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. The
+\fB--newline\fP option has no effect on this option, which is mutually
+exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and
+\fB--line-offsets\fP. However, like \fB--only-matching\fP, if there is more
+than one match in a line, each of them causes a line of output.
+.sp
+Escape sequences starting with a dollar character may be used to insert the
+contents of the matched part of the line and/or captured substrings into the
+text.
+.sp
+$<digits> or ${<digits>} is replaced by the captured substring of the given
+decimal number; zero substitutes the whole match. If the number is greater than
+the number of capturing substrings, or if the capture is unset, the replacement
+is empty.
+.sp
+$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by
+newline; $r by carriage return; $t by tab; $v by vertical tab.
+.sp
+$o<digits> or $o{<digits>} is replaced by the character whose code point is the
+given octal number. In the first form, up to three octal digits are processed.
+When more digits are needed in Unicode mode to specify a wide character, the
+second form must be used.
+.sp
+$x<digits> or $x{<digits>} is replaced by the character represented by the
+given hexadecimal number. In the first form, up to two hexadecimal digits are
+processed. When more digits are needed in Unicode mode to specify a wide
+character, the second form must be used.
+.sp
+Any other character is substituted by itself. In particular, $$ is replaced by
+a single dollar.
+.TP
+\fB-o\fP, \fB--only-matching\fP
+Show only the part of the line that matched a pattern instead of the whole
+line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
+\fB-C\fP options are ignored. If there is more than one match in a line, each
+of them is shown separately, on a separate line of output. If \fB-o\fP is
+combined with \fB-v\fP (invert the sense of the match to find non-matching
+lines), no output is generated, but the return code is set appropriately. If
+the matched portion of the line is empty, nothing is output unless the file
+name or line number are being printed, in which case they are shown on an
+otherwise empty line. This option is mutually exclusive with \fB--output\fP,
+\fB--file-offsets\fP and \fB--line-offsets\fP.
+.TP
+\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
+Show only the part of the line that matched the capturing parentheses of the
+given number. Up to 50 capturing parentheses are supported by default. This
+limit can be changed via the \fB--om-capture\fP option. A pattern may contain
+any number of capturing parentheses, but only those whose number is within the
+limit can be accessed by \fB-o\fP. An error occurs if the number specified by
+\fB-o\fP is greater than the limit.
+.sp
+-o0 is the same as \fB-o\fP without a number. Because these options can be
+given without an argument (see above), if an argument is present, it must be
+given in the same shell item, for example, -o3 or --only-matching=2. The
+comments given for the non-argument case above also apply to this option. If
+the specified capturing parentheses do not exist in the pattern, or were not
+set in the match, nothing is output unless the file name or line number are
+being output.
+.sp
+If this option is given multiple times, multiple substrings are output for each
+match, in the order the options are given, and all on one line. For example,
+-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
+then 3 again to be output. By default, there is no separator (but see the next
+but one option).
+.TP
+\fB--om-capture\fP=\fInumber\fP
+Set the number of capturing parentheses that can be accessed by \fB-o\fP. The
+default is 50.
+.TP
+\fB--om-separator\fP=\fItext\fP
+Specify a separating string for multiple occurrences of \fB-o\fP. The default
+is an empty string. Separating strings are never coloured.
+.TP
+\fB-q\fP, \fB--quiet\fP
+Work quietly, that is, display nothing except error messages. The exit
+status indicates whether or not any matches were found.
+.TP
+\fB-r\fP, \fB--recursive\fP
+If any given path is a directory, recursively scan the files it contains,
+taking note of any \fB--include\fP and \fB--exclude\fP settings. By default, a
+directory is read as a normal file; in some operating systems this gives an
+immediate end-of-file. This option is a shorthand for setting the \fB-d\fP
+option to "recurse".
+.TP
+\fB--recursion-limit\fP=\fInumber\fP
+This is an obsolete synonym for \fB--depth-limit\fP. See \fB--match-limit\fP
+above for details.
+.TP
+\fB-s\fP, \fB--no-messages\fP
+Suppress error messages about non-existent or unreadable files. Such files are
+quietly skipped. However, the return code is still 2, even if matches were
+found in other files.
+.TP
+\fB-t\fP, \fB--total-count\fP
+This option is useful when scanning more than one file. If used on its own,
+\fB-t\fP suppresses all output except for a grand total number of matching
+lines (or non-matching lines if \fB-v\fP is used) in all the files. If \fB-t\fP
+is used with \fB-c\fP, a grand total is output except when the previous output
+is just one line. In other words, it is not output when just one file's count
+is listed. If file names are being output, the grand total is preceded by
+"TOTAL:". Otherwise, it appears as just another number. The \fB-t\fP option is
+ignored when used with \fB-L\fP (list files without matches), because the grand
+total would always be zero.
+.TP
+\fB-u\fP, \fB--utf\fP
+Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
+with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
+\fB--include\fP options) and all lines that are scanned must be valid strings
+of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
+occurs.
+.TP
+\fB-U\fP, \fB--utf-allow-invalid\fP
+As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
+unit sequences. These can never form part of any pattern match. Patterns
+themselves, however, must still be valid UTF-8 strings. This facility allows
+valid UTF-8 strings to be sought within arbitrary byte sequences in executable
+or other binary files. For more details about matching in non-valid UTF-8
+strings, see the
+.\" HREF
+\fBpcre2unicode\fP(3)
+.\"
+documentation.
+.TP
+\fB-V\fP, \fB--version\fP
+Write the version numbers of \fBpcre2grep\fP and the PCRE2 library to the
+standard output and then exit. Anything else on the command line is
+ignored.
+.TP
+\fB-v\fP, \fB--invert-match\fP
+Invert the sense of the match, so that lines which do \fInot\fP match any of
+the patterns are the ones that are found. When this option is set, options such
+as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match
+that are to be output, are ignored.
+.TP
+\fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP
+Force the patterns only to match "words". That is, there must be a word
+boundary at the start and end of each matched string. This is equivalent to
+having "\eb(?:" at the start of each pattern, and ")\eb" at the end. This
+option applies only to the patterns that are matched against the contents of
+files; it does not apply to patterns specified by any of the \fB--include\fP or
+\fB--exclude\fP options.
+.TP
+\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP
+Force the patterns to start matching only at the beginnings of lines, and in
+addition, require them to match entire lines. In multiline mode the match may
+be more than one line. This is equivalent to having "^(?:" at the start of each
+pattern and ")$" at the end. This option applies only to the patterns that are
+matched against the contents of files; it does not apply to patterns specified
+by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-Z\fP, \fB--null\fP
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
+.
+.
+.SH "ENVIRONMENT VARIABLES"
+.rs
+.sp
+The environment variables \fBLC_ALL\fP and \fBLC_CTYPE\fP are examined, in that
+order, for a locale. The first one that is set is used. This can be overridden
+by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default
+(usually the "C" locale) is used.
+.
+.
+.SH "NEWLINES"
+.rs
+.sp
+The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
+newline conventions that differ from the default. This option affects only the
+way scanned files are processed. It does not affect the interpretation of files
+specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or
+\fB--include-from\fP options.
+.P
+Any parts of the scanned input files that are written to the standard output
+are copied with whatever newline sequences they have in the input. However, if
+the final line of a file is output, and it does not end with a newline
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
+single NL is used.
+.P
+The newline setting does not affect the way in which \fBpcre2grep\fP writes
+newlines in informational messages to the standard output and error streams.
+Under Windows, the standard output is set to be binary, so that "\er\en" at the
+ends of output lines that are copied from the input is not converted to
+"\er\er\en" by the C I/O library. This means that any messages written to the
+standard output must end with "\er\en". For all other operating systems, and
+for all messages to the standard error stream, "\en" is used.
+.
+.
+.SH "OPTIONS COMPATIBILITY"
+.rs
+.sp
+Many of the short and long forms of \fBpcre2grep\fP's options are the same
+as in the GNU \fBgrep\fP program. Any long option of the form
+\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
+(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
+\fB--file-offsets\fP, \fB--heap-limit\fP, \fB--include-dir\fP,
+\fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
+\fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
+\fB--output\fP, \fB-u\fP, \fB--utf\fP, \fB-U\fP, and \fB--utf-allow-invalid\fP
+options are specific to \fBpcre2grep\fP, as is the use of the
+\fB--only-matching\fP option with a capturing parentheses number.
+.P
+Although most of the common options work the same way, a few are different in
+\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
+for GNU \fBgrep\fP, but a regular expression for \fBpcre2grep\fP. If both the
+\fB-c\fP and \fB-l\fP options are given, GNU grep lists only file names,
+without counts, but \fBpcre2grep\fP gives the counts as well.
+.
+.
+.SH "OPTIONS WITH DATA"
+.rs
+.sp
+There are four different ways in which an option with data can be specified.
+If a short form option is used, the data may follow immediately, or (with one
+exception) in the next command line item. For example:
+.sp
+  -f/some/file
+  -f /some/file
+.sp
+The exception is the \fB-o\fP option, which may appear with or without data.
+Because of this, if data is present, it must follow immediately in the same
+item, for example -o3.
+.P
+If a long form option is used, the data may appear in the same command line
+item, separated by an equals character, or (with two exceptions) it may appear
+in the next command line item. For example:
+.sp
+  --file=/some/file
+  --file /some/file
+.sp
+Note, however, that if you want to supply a file name beginning with ~ as data
+in a shell command, and have the shell expand ~ to a home directory, you must
+separate the file name from the option, because the shell does not treat ~
+specially unless it is at the start of an item.
+.P
+The exceptions to the above are the \fB--colour\fP (or \fB--color\fP) and
+\fB--only-matching\fP options, for which the data is optional. If one of these
+options does have data, it must be given in the first form, using an equals
+character. Otherwise \fBpcre2grep\fP will assume that it has no data.
+.
+.
+.SH "USING PCRE2'S CALLOUT FACILITY"
+.rs
+.sp
+\fBpcre2grep\fP has, by default, support for calling external programs or
+scripts or echoing specific strings during matching by making use of PCRE2's
+callout facility. However, this support can be completely or partially disabled
+when \fBpcre2grep\fP is built. You can find out whether your binary has support
+for callouts by running it with the \fB--help\fP option. If callout support is
+completely disabled, all callouts in patterns are ignored by \fBpcre2grep\fP.
+If the facility is partially disabled, calling external programs is not
+supported, and callouts that request it are ignored.
+.P
+A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
+either a number or a quoted string (see the
+.\" HREF
+\fBpcre2callout\fP
+.\"
+documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP;
+only callouts with string arguments are useful.
+.
+.
+.SS "Echoing a specific string"
+.rs
+.sp
+Starting the callout string with a pipe character invokes an echoing facility
+that avoids calling an external program or script. This facility is always
+available, provided that callouts were not completely disabled when
+\fBpcre2grep\fP was built. The rest of the callout string is processed as a
+zero-terminated string, which means it should not contain any internal binary
+zeros. It is written to the output, having first been passed through the same
+escape processing as text from the \fB--output\fP (\fB-O\fP) option (see
+above). However, $0 cannot be used to insert a matched substring because the
+match is still in progress. Instead, the single character '0' is inserted. Any
+syntax errors in the string (for example, a dollar not followed by another
+character) causes the callout to be ignored. No terminator is added to the
+output string, so if you want a newline, you must include it explicitly using
+the escape $n. For example:
+.sp
+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
+.sp
+Matching continues normally after the string is output. If you want to see only
+the callout output but not any output from an actual match, you should end the
+pattern with (*FAIL).
+.
+.
+.SS "Calling external programs or scripts"
+.rs
+.sp
+This facility can be independently disabled when \fBpcre2grep\fP is built. It
+is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
+where \fBlib$spawn()\fP is used, and for any Unix-like environment where
+\fBfork()\fP and \fBexecv()\fP are available.
+.P
+If the callout string does not start with a pipe (vertical bar) character, it
+is parsed into a list of substrings separated by pipe characters. The first
+substring must be an executable name, with the following substrings specifying
+arguments:
+.sp
+  executable_name|arg1|arg2|...
+.sp
+Any substring (including the executable name) may contain escape sequences
+started by a dollar character. These are the same as for the \fB--output\fP
+(\fB-O\fP) option documented above, except that $0 cannot insert the matched
+string because the match is still in progress. Instead, the character '0'
+is inserted. If you need a literal dollar or pipe character in any
+substring, use $$ or $| respectively. Here is an example:
+.sp
+  echo -e "abcde\en12345" | pcre2grep \e
+    '(?x)(.)(..(.))
+    (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
+.sp
+  Output:
+.sp
+    Arg1: [a] [bcd] [d] Arg2: |a| ()
+    abcde
+    Arg1: [1] [234] [4] Arg2: |1| ()
+    12345
+.sp
+The parameters for the system call that is used to run the program or script
+are zero-terminated strings. This means that binary zero characters in the
+callout argument will cause premature termination of their substrings, and
+therefore should not be present. Any syntax errors in the string (for example,
+a dollar not followed by another character) causes the callout to be ignored.
+If running the program fails for any reason (including the non-existence of the
+executable), a local matching failure occurs and the matcher backtracks in the
+normal way.
+.
+.
+.SH "MATCHING ERRORS"
+.rs
+.sp
+It is possible to supply a regular expression that takes a very long time to
+fail to match certain lines. Such patterns normally involve nested indefinite
+repeats, for example: (a+)*\ed when matched against a line of a's with no final
+digit. The PCRE2 matching function has a resource limit that causes it to abort
+in these circumstances. If this happens, \fBpcre2grep\fP outputs an error
+message and the line that caused the problem to the standard error stream. If
+there are more than 20 such errors, \fBpcre2grep\fP gives up.
+.P
+The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the
+overall resource limit. There are also other limits that affect the amount of
+memory used during matching; see the discussion of \fB--heap-limit\fP and
+\fB--depth-limit\fP above.
+.
+.
+.SH DIAGNOSTICS
+.rs
+.sp
+Exit status is 0 if any matches were found, 1 if no matches were found, and 2
+for syntax errors, overlong lines, non-existent or inaccessible files (even if
+matches were found in other files) or too many matching errors. Using the
+\fB-s\fP option to suppress error messages about inaccessible files does not
+affect the return code.
+.P
+When run under VMS, the return code is placed in the symbol PCRE2GREP_RC
+because VMS does not distinguish between exit(0) and exit(1).
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3),
+\fBpcre2unicode\fP(3).
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+Retired from University Computing Service
+Cambridge, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 21 November 2022
+Copyright (c) 1997-2022 University of Cambridge.
+.fi
--- a/third_party/pcre/pcre2grep.c
+++ b/third_party/pcre/pcre2grep.c
--- a/third_party/pcre/pcre2posix.3
+++ b/third_party/pcre/pcre2posix.3
@ -0,0 +1,329 @@
+.TH PCRE2POSIX 3 "26 April 2021" "PCRE2 10.37"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH "SYNOPSIS"
+.rs
+.sp
+.B #include <pcre2posix.h>
+.PP
+.nf
+.B int pcre2_regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP,
+.B "     int \fIcflags\fP);"
+.sp
+.B int pcre2_regexec(const regex_t *\fIpreg\fP, const char *\fIstring\fP,
+.B "     size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);"
+.sp
+.B "size_t pcre2_regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,"
+.B "     char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);"
+.sp
+.B void pcre2_regfree(regex_t *\fIpreg\fP);
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This set of functions provides a POSIX-style API for the PCRE2 regular
+expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit
+and 32-bit libraries. See the
+.\" HREF
+\fBpcre2api\fP
+.\"
+documentation for a description of PCRE2's native API, which contains much
+additional functionality.
+.P
+The functions described here are wrapper functions that ultimately call the
+PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP header
+file, and they all have unique names starting with \fBpcre2_\fP. However, the
+\fBpcre2posix.h\fP header also contains macro definitions that convert the
+standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This
+means that a program can use the usual POSIX names without running the risk of
+accidentally linking with POSIX functions from a different library.
+.P
+On Unix-like systems the PCRE2 POSIX library is called \fBlibpcre2-posix\fP, so
+can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an
+application. Because the POSIX functions call the native ones, it is also
+necessary to add \fB-lpcre2-8\fP.
+.P
+Although they were not defined as protypes in \fBpcre2posix.h\fP, releases
+10.33 to 10.36 of the library contained functions with the POSIX names
+\fBregcomp()\fP etc. These simply passed their arguments to the PCRE2
+functions. These functions were provided for backwards compatibility with
+earlier versions of PCRE2, which had only POSIX names. However, this has proved
+troublesome in situations where a program links with several libraries, some of
+which use PCRE2's POSIX interface while others use the real POSIX functions.
+For this reason, the POSIX names have been removed since release 10.37.
+.P
+Calling the header file \fBpcre2posix.h\fP avoids any conflict with other POSIX
+libraries. It can, of course, be renamed or aliased as \fBregex.h\fP, which is
+the "correct" name, if there is no clash. It provides two structure types,
+\fIregex_t\fP for compiled internal forms, and \fIregmatch_t\fP for returning
+captured substrings. It also defines some constants whose names start with
+"REG_"; these are used for setting options and identifying error codes.
+.
+.
+.SH "USING THE POSIX FUNCTIONS"
+.rs
+.sp
+Those POSIX option bits that can reasonably be mapped to PCRE2 native options
+have been implemented. In addition, the option REG_EXTENDED is defined with the
+value zero. This has no effect, but since programs that are written to the
+POSIX interface often use it, this makes it easier to slot in PCRE2 as a
+replacement library. Other POSIX options are not even defined.
+.P
+There are also some options that are not defined by POSIX. These have been
+added at the request of users who want to make use of certain PCRE2-specific
+features via the POSIX calling interface or to add BSD or GNU functionality.
+.P
+When PCRE2 is called via these functions, it is only the API that is POSIX-like
+in style. The syntax and semantics of the regular expressions themselves are
+still those of Perl, subject to the setting of various PCRE2 options, as
+described below. "POSIX-like in style" means that the API approximates to the
+POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding
+domains it is probably even less compatible.
+.P
+The descriptions below use the actual names of the functions, but, as described
+above, the standard POSIX names (without the \fBpcre2_\fP prefix) may also be
+used.
+.
+.
+.SH "COMPILING A PATTERN"
+.rs
+.sp
+The function \fBpcre2_regcomp()\fP is called to compile a pattern into an
+internal form. By default, the pattern is a C string terminated by a binary
+zero (but see REG_PEND below). The \fIpreg\fP argument is a pointer to a
+\fBregex_t\fP structure that is used as a base for storing information about
+the compiled regular expression. (It is also used for input when REG_PEND is
+set.)
+.P
+The argument \fIcflags\fP is either zero, or contains one or more of the bits
+defined by the following macros:
+.sp
+  REG_DOTALL
+.sp
+The PCRE2_DOTALL option is set when the regular expression is passed for
+compilation to the native function. Note that REG_DOTALL is not part of the
+POSIX standard.
+.sp
+  REG_ICASE
+.sp
+The PCRE2_CASELESS option is set when the regular expression is passed for
+compilation to the native function.
+.sp
+  REG_NEWLINE
+.sp
+The PCRE2_MULTILINE option is set when the regular expression is passed for
+compilation to the native function. Note that this does \fInot\fP mimic the
+defined POSIX behaviour for REG_NEWLINE (see the following section).
+.sp
+  REG_NOSPEC
+.sp
+The PCRE2_LITERAL option is set when the regular expression is passed for
+compilation to the native function. This disables all meta characters in the
+pattern, causing it to be treated as a literal string. The only other options
+that are allowed with REG_NOSPEC are REG_ICASE, REG_NOSUB, REG_PEND, and
+REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard.
+.sp
+  REG_NOSUB
+.sp
+When a pattern that is compiled with this flag is passed to
+\fBpcre2_regexec()\fP for matching, the \fInmatch\fP and \fIpmatch\fP arguments
+are ignored, and no captured strings are returned. Versions of the PCRE library
+prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this
+no longer happens because it disables the use of backreferences.
+.sp
+  REG_PEND
+.sp
+If this option is set, the \fBreg_endp\fP field in the \fIpreg\fP structure
+(which has the type const char *) must be set to point to the character beyond
+the end of the pattern before calling \fBpcre2_regcomp()\fP. The pattern itself
+may now contain binary zeros, which are treated as data characters. Without
+REG_PEND, a binary zero terminates the pattern and the \fBre_endp\fP field is
+ignored. This is a GNU extension to the POSIX standard and should be used with
+caution in software intended to be portable to other systems.
+.sp
+  REG_UCP
+.sp
+The PCRE2_UCP option is set when the regular expression is passed for
+compilation to the native function. This causes PCRE2 to use Unicode properties
+when matchine \ed, \ew, etc., instead of just recognizing ASCII values. Note
+that REG_UCP is not part of the POSIX standard.
+.sp
+  REG_UNGREEDY
+.sp
+The PCRE2_UNGREEDY option is set when the regular expression is passed for
+compilation to the native function. Note that REG_UNGREEDY is not part of the
+POSIX standard.
+.sp
+  REG_UTF
+.sp
+The PCRE2_UTF option is set when the regular expression is passed for
+compilation to the native function. This causes the pattern itself and all data
+strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF
+is not part of the POSIX standard.
+.P
+In the absence of these flags, no options are passed to the native function.
+This means the the regex is compiled with PCRE2 default semantics. In
+particular, the way it handles newline characters in the subject string is the
+Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only
+\fIsome\fP of the effects specified for REG_NEWLINE. It does not affect the way
+newlines are matched by the dot metacharacter (they are not) or by a negative
+class such as [^a] (they are).
+.P
+The yield of \fBpcre2_regcomp()\fP is zero on success, and non-zero otherwise.
+The \fIpreg\fP structure is filled in on success, and one other member of the
+structure (as well as \fIre_endp\fP) is public: \fIre_nsub\fP contains the
+number of capturing subpatterns in the regular expression. Various error codes
+are defined in the header file.
+.P
+NOTE: If the yield of \fBpcre2_regcomp()\fP is non-zero, you must not attempt
+to use the contents of the \fIpreg\fP structure. If, for example, you pass it
+to \fBpcre2_regexec()\fP, the result is undefined and your program is likely to
+crash.
+.
+.
+.SH "MATCHING NEWLINE CHARACTERS"
+.rs
+.sp
+This area is not simple, because POSIX and Perl take different views of things.
+It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was
+never intended to be a POSIX engine. The following table lists the different
+possibilities for matching newline characters in Perl and PCRE2:
+.sp
+                          Default   Change with
+.sp
+  . matches newline          no     PCRE2_DOTALL
+  newline matches [^a]       yes    not changeable
+  $ matches \en at end        yes    PCRE2_DOLLAR_ENDONLY
+  $ matches \en in middle     no     PCRE2_MULTILINE
+  ^ matches \en in middle     no     PCRE2_MULTILINE
+.sp
+This is the equivalent table for a POSIX-compatible pattern matcher:
+.sp
+                          Default   Change with
+.sp
+  . matches newline          yes    REG_NEWLINE
+  newline matches [^a]       yes    REG_NEWLINE
+  $ matches \en at end        no     REG_NEWLINE
+  $ matches \en in middle     no     REG_NEWLINE
+  ^ matches \en in middle     no     REG_NEWLINE
+.sp
+This behaviour is not what happens when PCRE2 is called via its POSIX
+API. By default, PCRE2's behaviour is the same as Perl's, except that there is
+no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there
+is no way to stop newline from matching [^a].
+.P
+Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and
+PCRE2_DOLLAR_ENDONLY when calling \fBpcre2_compile()\fP directly, but there is
+no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using
+the POSIX API, passing REG_NEWLINE to PCRE2's \fBpcre2_regcomp()\fP function
+causes PCRE2_MULTILINE to be passed to \fBpcre2_compile()\fP, and REG_DOTALL
+passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY.
+.
+.
+.SH "MATCHING A PATTERN"
+.rs
+.sp
+The function \fBpcre2_regexec()\fP is called to match a compiled pattern
+\fIpreg\fP against a given \fIstring\fP, which is by default terminated by a
+zero byte (but see REG_STARTEND below), subject to the options in \fIeflags\fP.
+These can be:
+.sp
+  REG_NOTBOL
+.sp
+The PCRE2_NOTBOL option is set when calling the underlying PCRE2 matching
+function.
+.sp
+  REG_NOTEMPTY
+.sp
+The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 matching
+function. Note that REG_NOTEMPTY is not part of the POSIX standard. However,
+setting this option can give more POSIX-like behaviour in some situations.
+.sp
+  REG_NOTEOL
+.sp
+The PCRE2_NOTEOL option is set when calling the underlying PCRE2 matching
+function.
+.sp
+  REG_STARTEND
+.sp
+When this option is set, the subject string starts at \fIstring\fP +
+\fIpmatch[0].rm_so\fP and ends at \fIstring\fP + \fIpmatch[0].rm_eo\fP, which
+should point to the first character beyond the string. There may be binary
+zeros within the subject string, and indeed, using REG_STARTEND is the only
+way to pass a subject string that contains a binary zero.
+.P
+Whatever the value of \fIpmatch[0].rm_so\fP, the offsets of the matched string
+and any captured substrings are still given relative to the start of
+\fIstring\fP itself. (Before PCRE2 release 10.30 these were given relative to
+\fIstring\fP + \fIpmatch[0].rm_so\fP, but this differs from other
+implementations.)
+.P
+This is a BSD extension, compatible with but not specified by IEEE Standard
+1003.2 (POSIX.2), and should be used with caution in software intended to be
+portable to other systems. Note that a non-zero \fIrm_so\fP does not imply
+REG_NOTBOL; REG_STARTEND affects only the location and length of the string,
+not how it is matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL
+are mutually exclusive; the error REG_INVARG is returned.
+.P
+If the pattern was compiled with the REG_NOSUB flag, no data about any matched
+strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of
+\fBpcre2_regexec()\fP are ignored (except possibly as input for REG_STARTEND).
+.P
+The value of \fInmatch\fP may be zero, and the value \fIpmatch\fP may be NULL
+(unless REG_STARTEND is set); in both these cases no data about any matched
+strings is returned.
+.P
+Otherwise, the portion of the string that was matched, and also any captured
+substrings, are returned via the \fIpmatch\fP argument, which points to an
+array of \fInmatch\fP structures of type \fIregmatch_t\fP, containing the
+members \fIrm_so\fP and \fIrm_eo\fP. These contain the byte offset to the first
+character of each substring and the offset to the first character after the end
+of each substring, respectively. The 0th element of the vector relates to the
+entire portion of \fIstring\fP that was matched; subsequent elements relate to
+the capturing subpatterns of the regular expression. Unused entries in the
+array have both structure members set to -1.
+.P
+A successful match yields a zero return; various error codes are defined in the
+header file, of which REG_NOMATCH is the "expected" failure code.
+.
+.
+.SH "ERROR MESSAGES"
+.rs
+.sp
+The \fBpcre2_regerror()\fP function maps a non-zero errorcode from either
+\fBpcre2_regcomp()\fP or \fBpcre2_regexec()\fP to a printable message. If
+\fIpreg\fP is not NULL, the error should have arisen from the use of that
+structure. A message terminated by a binary zero is placed in \fIerrbuf\fP. If
+the buffer is too short, only the first \fIerrbuf_size\fP - 1 characters of the
+error message are used. The yield of the function is the size of buffer needed
+to hold the whole message, including the terminating zero. This value is
+greater than \fIerrbuf_size\fP if the message was truncated.
+.
+.
+.SH MEMORY USAGE
+.rs
+.sp
+Compiling a regular expression causes memory to be allocated and associated
+with the \fIpreg\fP structure. The function \fBpcre2_regfree()\fP frees all
+such memory, after which \fIpreg\fP may no longer be used as a compiled
+expression.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 26 April 2021
+Copyright (c) 1997-2021 University of Cambridge.
+.fi
--- a/third_party/pcre/pcre2posix.c
+++ b/third_party/pcre/pcre2posix.c
@ -0,0 +1,435 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module is a wrapper that provides a POSIX API to the underlying PCRE2
+functions. The operative functions are called pcre2_regcomp(), etc., with
+wrappers that use the plain POSIX names. In addition, pcre2posix.h defines the
+POSIX names as macros for the pcre2_xxx functions, so any program that includes
+it and uses the POSIX names will call the base functions directly. This makes
+it easier for an application to be sure it gets the PCRE2 versions in the
+presence of other POSIX regex libraries. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+/* Ensure that the PCRE2POSIX_EXP_xxx macros are set appropriately for
+compiling these functions. This must come before including pcre2posix.h, where
+they are set for an application (using these functions) if they have not
+previously been set. */
+
+#if defined(_WIN32) && !defined(PCRE2_STATIC)
+#  define PCRE2POSIX_EXP_DECL extern __declspec(dllexport)
+#  define PCRE2POSIX_EXP_DEFN __declspec(dllexport)
+#endif
+
+/* Older versions of MSVC lack snprintf(). This define allows for
+warning/error-free compilation and testing with MSVC compilers back to at least
+MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define snprintf _snprintf
+#endif
+
+
+/* Compile-time error numbers start at this value. It should probably never be
+changed. This #define is a copy of the one in pcre2_internal.h. */
+
+#define COMPILE_ERROR_BASE 100
+
+
+/* Standard C headers */
+
+#include <ctype.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* PCRE2 headers */
+
+#include "pcre2.h"
+#include "pcre2posix.h"
+
+/* Table to translate PCRE2 compile time error codes into POSIX error codes.
+Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
+codes: most go to REG_BADPAT. The second table lists, in pairs, those that
+don't. */
+
+static const int eint1[] = {
+  0,           /* No error */
+  REG_EESCAPE, /* \ at end of pattern */
+  REG_EESCAPE, /* \c at end of pattern */
+  REG_EESCAPE, /* unrecognized character follows \ */
+  REG_BADBR,   /* numbers out of order in {} quantifier */
+  /* 5 */
+  REG_BADBR,   /* number too big in {} quantifier */
+  REG_EBRACK,  /* missing terminating ] for character class */
+  REG_ECTYPE,  /* invalid escape sequence in character class */
+  REG_ERANGE,  /* range out of order in character class */
+  REG_BADRPT,  /* nothing to repeat */
+  /* 10 */
+  REG_ASSERT,  /* internal error: unexpected repeat */
+  REG_BADPAT,  /* unrecognized character after (? or (?- */
+  REG_BADPAT,  /* POSIX named classes are supported only within a class */
+  REG_BADPAT,  /* POSIX collating elements are not supported */
+  REG_EPAREN,  /* missing ) */
+  /* 15 */
+  REG_ESUBREG, /* reference to non-existent subpattern */
+  REG_INVARG,  /* pattern passed as NULL */
+  REG_INVARG,  /* unknown compile-time option bit(s) */
+  REG_EPAREN,  /* missing ) after (?# comment */
+  REG_ESIZE,   /* parentheses nested too deeply */
+  /* 20 */
+  REG_ESIZE,   /* regular expression too large */
+  REG_ESPACE,  /* failed to get memory */
+  REG_EPAREN,  /* unmatched closing parenthesis */
+  REG_ASSERT   /* internal error: code overflow */
+  };
+
+static const int eint2[] = {
+  30, REG_ECTYPE,  /* unknown POSIX class name */
+  32, REG_INVARG,  /* this version of PCRE2 does not have Unicode support */
+  37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
+  56, REG_INVARG,  /* internal error: unknown newline setting */
+  92, REG_INVARG,  /* invalid option bits with PCRE2_LITERAL */
+  99, REG_EESCAPE  /* \K in lookaround */
+};
+
+/* Table of texts corresponding to POSIX error codes */
+
+static const char *const pstring[] = {
+  "",                                /* Dummy for value 0 */
+  "internal error",                  /* REG_ASSERT */
+  "invalid repeat counts in {}",     /* BADBR      */
+  "pattern error",                   /* BADPAT     */
+  "? * + invalid",                   /* BADRPT     */
+  "unbalanced {}",                   /* EBRACE     */
+  "unbalanced []",                   /* EBRACK     */
+  "collation error - not relevant",  /* ECOLLATE   */
+  "bad class",                       /* ECTYPE     */
+  "bad escape sequence",             /* EESCAPE    */
+  "empty expression",                /* EMPTY      */
+  "unbalanced ()",                   /* EPAREN     */
+  "bad range inside []",             /* ERANGE     */
+  "expression too big",              /* ESIZE      */
+  "failed to get memory",            /* ESPACE     */
+  "bad back reference",              /* ESUBREG    */
+  "bad argument",                    /* INVARG     */
+  "match failed"                     /* NOMATCH    */
+};
+
+
+
+#if 0  /* REMOVE THIS CODE */
+
+The code below was created for 10.33 (see ChangeLog 10.33 #4) when the
+POSIX functions were given pcre2_... names instead of the traditional POSIX
+names. However, it has proved to be more troublesome than useful. There have
+been at least two cases where a program links with two others, one of which
+uses the POSIX library and the other uses the PCRE2 POSIX functions, thus
+causing two instances of the POSIX runctions to exist, leading to trouble. For
+10.37 this code is commented out. In due course it can be removed if there are
+no issues. The only small worry is the comment below about languages that do
+not include pcre2posix.h. If there are any such cases, they will have to use
+the PCRE2 names.
+
+
+/*************************************************
+*      Wrappers with traditional POSIX names     *
+*************************************************/
+
+/* Keep defining them to preseve the ABI for applications linked to the pcre2
+POSIX library before these names were changed into macros in pcre2posix.h.
+This also ensures that the POSIX names are callable from languages that do not
+include pcre2posix.h. It is vital to #undef the macro definitions from
+pcre2posix.h! */
+
+#undef regerror
+PCRE2POSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
+PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
+regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+return pcre2_regerror(errcode, preg, errbuf, errbuf_size);
+}
+
+#undef regfree
+PCRE2POSIX_EXP_DECL void regfree(regex_t *);
+PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
+regfree(regex_t *preg)
+{
+pcre2_regfree(preg);
+}
+
+#undef regcomp
+PCRE2POSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
+PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
+regcomp(regex_t *preg, const char *pattern, int cflags)
+{
+return pcre2_regcomp(preg, pattern, cflags);
+}
+
+#undef regexec
+PCRE2POSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
+  regmatch_t *, int);
+PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
+regexec(const regex_t *preg, const char *string, size_t nmatch,
+  regmatch_t pmatch[], int eflags)
+{
+return pcre2_regexec(preg, string, nmatch, pmatch, eflags);
+}
+#endif
+
+
+/*************************************************
+*          Translate error code to string        *
+*************************************************/
+
+PCRE2POSIX_EXP_DEFN size_t PCRE2_CALL_CONVENTION
+pcre2_regerror(int errcode, const regex_t *preg, char *errbuf,
+  size_t errbuf_size)
+{
+int used;
+const char *message;
+
+message = (errcode <= 0 || errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
+  "unknown error code" : pstring[errcode];
+
+if (preg != NULL && (int)preg->re_erroffset != -1)
+  {
+  used = snprintf(errbuf, errbuf_size, "%s at offset %-6d", message,
+    (int)preg->re_erroffset);
+  }
+else
+  {
+  used = snprintf(errbuf, errbuf_size, "%s", message);
+  }
+
+return used + 1;
+}
+
+
+
+/*************************************************
+*           Free store held by a regex           *
+*************************************************/
+
+PCRE2POSIX_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_regfree(regex_t *preg)
+{
+pcre2_match_data_free(preg->re_match_data);
+pcre2_code_free(preg->re_pcre2_code);
+}
+
+
+
+/*************************************************
+*            Compile a regular expression        *
+*************************************************/
+
+/*
+Arguments:
+  preg        points to a structure for recording the compiled expression
+  pattern     the pattern to compile
+  cflags      compilation flags
+
+Returns:      0 on success
+              various non-zero codes on failure
+*/
+
+PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_regcomp(regex_t *preg, const char *pattern, int cflags)
+{
+PCRE2_SIZE erroffset;
+PCRE2_SIZE patlen;
+int errorcode;
+int options = 0;
+int re_nsub = 0;
+
+patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) :
+  PCRE2_ZERO_TERMINATED;
+
+if ((cflags & REG_ICASE) != 0)    options |= PCRE2_CASELESS;
+if ((cflags & REG_NEWLINE) != 0)  options |= PCRE2_MULTILINE;
+if ((cflags & REG_DOTALL) != 0)   options |= PCRE2_DOTALL;
+if ((cflags & REG_NOSPEC) != 0)   options |= PCRE2_LITERAL;
+if ((cflags & REG_UTF) != 0)      options |= PCRE2_UTF;
+if ((cflags & REG_UCP) != 0)      options |= PCRE2_UCP;
+if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
+
+preg->re_cflags = cflags;
+preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, patlen, options,
+  &errorcode, &erroffset, NULL);
+preg->re_erroffset = erroffset;
+
+if (preg->re_pcre2_code == NULL)
+  {
+  unsigned int i;
+
+  /* A negative value is a UTF error; otherwise all error codes are greater
+  than COMPILE_ERROR_BASE, but check, just in case. */
+
+  if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT;
+  errorcode -= COMPILE_ERROR_BASE;
+
+  if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
+    return eint1[errorcode];
+  for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2)
+    if (errorcode == eint2[i]) return eint2[i+1];
+  return REG_BADPAT;
+  }
+
+(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
+  PCRE2_INFO_CAPTURECOUNT, &re_nsub);
+preg->re_nsub = (size_t)re_nsub;
+preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
+preg->re_erroffset = (size_t)(-1);  /* No meaning after successful compile */
+
+if (preg->re_match_data == NULL)
+  {
+  /* LCOV_EXCL_START */
+  pcre2_code_free(preg->re_pcre2_code);
+  return REG_ESPACE;
+  /* LCOV_EXCL_STOP */
+  }
+
+return 0;
+}
+
+
+
+/*************************************************
+*              Match a regular expression        *
+*************************************************/
+
+/* A suitable match_data block, large enough to hold all possible captures, was
+obtained when the pattern was compiled, to save having to allocate and free it
+for each match. If REG_NOSUB was specified at compile time, the nmatch and
+pmatch arguments are ignored, and the only result is yes/no/error. */
+
+PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_regexec(const regex_t *preg, const char *string, size_t nmatch,
+  regmatch_t pmatch[], int eflags)
+{
+int rc, so, eo;
+int options = 0;
+pcre2_match_data *md = (pcre2_match_data *)preg->re_match_data;
+
+if (string == NULL) return REG_INVARG;
+
+if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
+if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
+if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
+
+/* When REG_NOSUB was specified, or if no vector has been passed in which to
+put captured strings, ensure that nmatch is zero. This will stop any attempt to
+write to pmatch. */
+
+if ((preg->re_cflags & REG_NOSUB) != 0 || pmatch == NULL) nmatch = 0;
+
+/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
+The man page from OS X says "REG_STARTEND affects only the location of the
+string, not how it is matched". That is why the "so" value is used to bump the
+start location rather than being passed as a PCRE2 "starting offset". */
+
+if ((eflags & REG_STARTEND) != 0)
+  {
+  if (pmatch == NULL) return REG_INVARG;
+  so = pmatch[0].rm_so;
+  eo = pmatch[0].rm_eo;
+  }
+else
+  {
+  so = 0;
+  eo = (int)strlen(string);
+  }
+
+rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
+  (PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
+
+/* Successful match */
+
+if (rc >= 0)
+  {
+  size_t i;
+  PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
+  if ((size_t)rc > nmatch) rc = (int)nmatch;
+  for (i = 0; i < (size_t)rc; i++)
+    {
+    pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
+      (int)(ovector[i*2] + so);
+    pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
+      (int)(ovector[i*2+1] + so);
+    }
+  for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+  return 0;
+  }
+
+/* Unsuccessful match */
+
+if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21)
+  return REG_INVARG;
+
+/* Most of these are events that won't occur during testing, so exclude them
+from coverage. */
+
+switch(rc)
+  {
+  case PCRE2_ERROR_HEAPLIMIT: return REG_ESPACE;
+  case PCRE2_ERROR_NOMATCH: return REG_NOMATCH;
+
+  /* LCOV_EXCL_START */
+  case PCRE2_ERROR_BADMODE: return REG_INVARG;
+  case PCRE2_ERROR_BADMAGIC: return REG_INVARG;
+  case PCRE2_ERROR_BADOPTION: return REG_INVARG;
+  case PCRE2_ERROR_BADUTFOFFSET: return REG_INVARG;
+  case PCRE2_ERROR_MATCHLIMIT: return REG_ESPACE;
+  case PCRE2_ERROR_NOMEMORY: return REG_ESPACE;
+  case PCRE2_ERROR_NULL: return REG_INVARG;
+  default: return REG_ASSERT;
+  /* LCOV_EXCL_STOP */
+  }
+}
+
+/* End of pcre2posix.c */
--- a/third_party/pcre/pcre2posix.h
+++ b/third_party/pcre/pcre2posix.h
@ -0,0 +1,184 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language. This is
+the public header file to be #included by applications that call PCRE2 via the
+POSIX wrapper interface.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* Have to include stdlib.h in order to ensure that size_t is defined. */
+
+#include <stdlib.h>
+
+/* Allow for C++ users */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Options, mostly defined by POSIX, but with some extras. */
+
+#define REG_ICASE     0x0001  /* Maps to PCRE2_CASELESS */
+#define REG_NEWLINE   0x0002  /* Maps to PCRE2_MULTILINE */
+#define REG_NOTBOL    0x0004  /* Maps to PCRE2_NOTBOL */
+#define REG_NOTEOL    0x0008  /* Maps to PCRE2_NOTEOL */
+#define REG_DOTALL    0x0010  /* NOT defined by POSIX; maps to PCRE2_DOTALL */
+#define REG_NOSUB     0x0020  /* Do not report what was matched */
+#define REG_UTF       0x0040  /* NOT defined by POSIX; maps to PCRE2_UTF */
+#define REG_STARTEND  0x0080  /* BSD feature: pass subject string by so,eo */
+#define REG_NOTEMPTY  0x0100  /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */
+#define REG_UNGREEDY  0x0200  /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */
+#define REG_UCP       0x0400  /* NOT defined by POSIX; maps to PCRE2_UCP */
+#define REG_PEND      0x0800  /* GNU feature: pass end pattern by re_endp */
+#define REG_NOSPEC    0x1000  /* Maps to PCRE2_LITERAL */
+
+/* This is not used by PCRE2, but by defining it we make it easier
+to slot PCRE2 into existing programs that make POSIX calls. */
+
+#define REG_EXTENDED  0
+
+/* Error values. Not all these are relevant or used by the wrapper. */
+
+enum {
+  REG_ASSERT = 1,  /* internal error ? */
+  REG_BADBR,       /* invalid repeat counts in {} */
+  REG_BADPAT,      /* pattern error */
+  REG_BADRPT,      /* ? * + invalid */
+  REG_EBRACE,      /* unbalanced {} */
+  REG_EBRACK,      /* unbalanced [] */
+  REG_ECOLLATE,    /* collation error - not relevant */
+  REG_ECTYPE,      /* bad class */
+  REG_EESCAPE,     /* bad escape sequence */
+  REG_EMPTY,       /* empty expression */
+  REG_EPAREN,      /* unbalanced () */
+  REG_ERANGE,      /* bad range inside [] */
+  REG_ESIZE,       /* expression too big */
+  REG_ESPACE,      /* failed to get memory */
+  REG_ESUBREG,     /* bad back reference */
+  REG_INVARG,      /* bad argument */
+  REG_NOMATCH      /* match failed */
+};
+
+
+/* The structure representing a compiled regular expression. It is also used
+for passing the pattern end pointer when REG_PEND is set. */
+
+typedef struct {
+  void *re_pcre2_code;
+  void *re_match_data;
+  const char *re_endp;
+  size_t re_nsub;
+  size_t re_erroffset;
+  int re_cflags;
+} regex_t;
+
+/* The structure in which a captured offset is returned. */
+
+typedef int regoff_t;
+
+typedef struct {
+  regoff_t rm_so;
+  regoff_t rm_eo;
+} regmatch_t;
+
+/* When compiling with the MSVC compiler, it is sometimes necessary to include
+a "calling convention" before exported function names. (This is secondhand
+information; I know nothing about MSVC myself). For example, something like
+
+  void __cdecl function(....)
+
+might be needed. In order to make this easy, all the exported functions have
+PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
+set, we ensure here that it has no effect. */
+
+#ifndef PCRE2_CALL_CONVENTION
+#define PCRE2_CALL_CONVENTION
+#endif
+
+/* When an application links to a PCRE2 DLL in Windows, the symbols that are
+imported have to be identified as such. When building PCRE2, the appropriate
+export settings are needed, and are set in pcre2posix.c before including this
+file. */
+
+#if defined(_WIN32) && !defined(PCRE2_STATIC) && !defined(PCRE2POSIX_EXP_DECL)
+#  define PCRE2POSIX_EXP_DECL  extern __declspec(dllimport)
+#  define PCRE2POSIX_EXP_DEFN  __declspec(dllimport)
+#endif
+
+/* By default, we use the standard "extern" declarations. */
+
+#ifndef PCRE2POSIX_EXP_DECL
+#  ifdef __cplusplus
+#    define PCRE2POSIX_EXP_DECL  extern "C"
+#    define PCRE2POSIX_EXP_DEFN  extern "C"
+#  else
+#    define PCRE2POSIX_EXP_DECL  extern
+#    define PCRE2POSIX_EXP_DEFN  extern
+#  endif
+#endif
+
+/* The functions. The actual code is in functions with pcre2_xxx names for
+uniqueness. POSIX names are provided as macros for API compatibility with POSIX
+regex functions. It's done this way to ensure to they are always linked from
+the PCRE2 library and not by accident from elsewhere (regex_t differs in size
+elsewhere). */
+
+PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regcomp(regex_t *, const char *, int);
+PCRE2POSIX_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_regexec(const regex_t *, const char *, size_t,
+                     regmatch_t *, int);
+PCRE2POSIX_EXP_DECL size_t PCRE2_CALL_CONVENTION pcre2_regerror(int, const regex_t *, char *, size_t);
+PCRE2POSIX_EXP_DECL void PCRE2_CALL_CONVENTION pcre2_regfree(regex_t *);
+
+#define regcomp  pcre2_regcomp
+#define regexec  pcre2_regexec
+#define regerror pcre2_regerror
+#define regfree  pcre2_regfree
+
+/* Debian had a patch that used different names. These are now here to save
+them having to maintain their own patch, but are not documented by PCRE2. */
+
+#define PCRE2regcomp  pcre2_regcomp
+#define PCRE2regexec  pcre2_regexec
+#define PCRE2regerror pcre2_regerror
+#define PCRE2regfree  pcre2_regfree
+
+#ifdef __cplusplus
+}   /* extern "C" */
+#endif
+
+/* End of pcre2posix.h */
--- a/third_party/pcre/pcre2posix_test.c
+++ b/third_party/pcre/pcre2posix_test.c
@ -0,0 +1,209 @@
+/*************************************************
+*        PCRE2 POSIX interface test program      *
+*************************************************/
+
+/*
+Written by Philip Hazel, December 2022
+Copyright (c) 2022
+File last edited: December 2022
+
+This program tests the POSIX wrapper to the PCRE2 regular expression library.
+The main PCRE2 test program is pcre2test, which also tests these function
+calls. This little program is needed to test the case where the client includes
+pcre2posix.h but not pcre2.h, mainly to make sure that it builds successfully.
+However, the code is written as a flexible test program to which extra tests
+can be added.
+
+Compile with -lpcre2-posix -lpcre2-8
+
+If run with no options, there is no output on success, and the return code is
+zero. If any test fails there is output to stderr, and the return code is 1.
+
+For testing purposes, the "-v" option causes verification output to be written
+to stdout. */
+
+#include <stdio.h>
+#include <string.h>
+#include "pcre2posix.h"
+
+#define CAPCOUNT 5               /* Number of captures supported */
+#define PRINTF if (v) printf     /* Shorthand for testing output */
+
+/* This vector contains compiler flags for each pattern that is tested. */
+
+static int cflags[] = {
+  0,           /* Test 0 */
+  REG_ICASE,   /* Test 1 */
+  0,           /* Test 2 */
+  REG_NEWLINE, /* Test 3 */
+  0            /* Test 4 */
+};
+
+/* This vector contains match flags for each pattern that is tested. */
+
+static int mflags[] = {
+  0,           /* Test 0 */
+  0,           /* Test 1 */
+  0,           /* Test 2 */
+  REG_NOTBOL,  /* Test 3 */
+  0            /* Test 4 */
+};
+
+/* Automate the number of patterns */
+
+#define count (int)(sizeof(cflags)/sizeof(int))
+
+/* The data for each pattern consists of a pattern string, followed by any
+number of subject strings, terminated by NULL. Some tests share data, but use
+different flags. */
+
+static const char *data0_1[] = { "posix", "lower posix", "upper POSIX", NULL };
+static const char *data2_3[] = { "^(cat|dog)", "catastrophic\ncataclysm",
+  "dogfight", "no animals", NULL };
+static const char *data4[] = { "*badpattern", NULL };
+
+/* Index the data strings */
+
+static char **data[] = {
+  (char **)(&data0_1),
+  (char **)(&data0_1),
+  (char **)(&data2_3),
+  (char **)(&data2_3),
+  (char **)(&data4)
+};
+
+/* The expected results for each pattern consist of a compiler return code,
+optionally followed, for each subject string, by a match return code and, for a
+successful match, up to CAPCOUNT pairs of returned match data. */
+
+static int results0[] = {
+  0,             /* Compiler rc */
+  0, 6, 11,      /* 1st match */
+  REG_NOMATCH    /* 2nd match */
+};
+
+static int results1[] = {
+  0,             /* Compiler rc */
+  0, 6, 11,      /* 1st match */
+  0, 6, 11       /* 2nd match */
+};
+
+static int results2[] = {
+  0,             /* Compiler rc */
+  0, 0, 3, 0, 3, /* 1st match */
+  0, 0, 3, 0, 3, /* 2nd match */
+  REG_NOMATCH    /* 3rd match */
+};
+
+static int results3[] = {
+  0,                 /* Compiler rc */
+  0, 13, 16, 13, 16, /* 1st match */
+  REG_NOMATCH,       /* 2nd match */
+  REG_NOMATCH        /* 3rd match */
+};
+
+static int results4[] = {
+  REG_BADRPT         /* Compiler rc */
+};
+
+/* Index the result vectors */
+
+static int *results[] = {
+  (int *)(&results0),
+  (int *)(&results1),
+  (int *)(&results2),
+  (int *)(&results3),
+  (int *)(&results4)
+};
+
+/* And here is the program */
+
+int main(int argc, char **argv)
+{
+regex_t re;
+regmatch_t match[CAPCOUNT];
+int v = argc > 1 && strcmp(argv[1], "-v") == 0;
+
+PRINTF("Test of pcre2posix.h without pcre2.h\n");
+
+for (int i = 0; i < count; i++)
+  {
+  char *pattern = data[i][0];
+  char **subjects = data[i] + 1;
+  int *rd = results[i];
+  int rc = regcomp(&re, pattern, cflags[i]);
+
+  PRINTF("Pattern: %s flags=0x%02x\n", pattern, cflags[i]);
+
+  if (rc != *rd)
+    {
+    fprintf(stderr, "Unexpected compile error %d (expected %d)\n", rc, *rd);
+    fprintf(stderr, "Pattern is: %s\n", pattern);
+    return 1;
+    }
+
+  if (rc != 0)
+    {
+    if (v)
+      {
+      char buffer[256];
+      (void)regerror(rc, &re, buffer, sizeof(buffer));
+      PRINTF("Compile error %d: %s (expected)\n", rc, buffer);
+      }
+    continue;
+    }
+
+  for (; *subjects != NULL; subjects++)
+    {
+    rc = regexec(&re, *subjects, CAPCOUNT, match, mflags[i]);
+
+    PRINTF("Subject: %s\n", *subjects);
+    PRINTF("Return:  %d", rc);
+
+    if (rc != *(++rd))
+      {
+      PRINTF("\n");
+      fprintf(stderr, "Unexpected match error %d (expected %d)\n", rc, *rd);
+      fprintf(stderr, "Pattern is: %s\n", pattern);
+      fprintf(stderr, "Subject is: %s\n", *subjects);
+      return 1;
+      }
+
+    if (rc == 0)
+      {
+      for (int j = 0; j < CAPCOUNT; j++)
+        {
+        regmatch_t *m = match + j;
+        if (m->rm_so < 0) continue;
+        if (m->rm_so != *(++rd) || m->rm_eo != *(++rd))
+          {
+          PRINTF("\n");
+          fprintf(stderr, "Mismatched results for successful match\n");
+          fprintf(stderr, "Pattern is: %s\n", pattern);
+          fprintf(stderr, "Subject is: %s\n", *subjects);
+          fprintf(stderr, "Result %d: expected %d %d received %d %d\n",
+            j, rd[-1], rd[0], m->rm_so, m->rm_eo);
+          return 1;
+          }
+        PRINTF(" (%d %d %d)", j, m->rm_so, m->rm_eo);
+        }
+      }
+
+    else if (v)
+      {
+      char buffer[256];
+      (void)regerror(rc, &re, buffer, sizeof(buffer));
+      PRINTF(": %s (expected)", buffer);
+      }
+
+    PRINTF("\n");
+    }
+
+  regfree(&re);
+  }
+
+PRINTF("End of test\n");
+return 0;
+}
+
+/* End of pcre2posix_test.c */