From 8f50e5721df662884e908000f4e4a29ee6816f53 Mon Sep 17 00:00:00 2001 From: David Adam Date: Tue, 12 May 2020 11:19:19 +0800 Subject: [PATCH] pcre2: re-import some sources overzealously removed in 73ecf1576b4a0 These are required for the build to succeed without CMake surgery. --- pcre2/libpcre2-16.pc.in | 13 + pcre2/libpcre2-32.pc.in | 13 + pcre2/libpcre2-8.pc.in | 13 + pcre2/libpcre2-posix.pc.in | 13 + pcre2/pcre2-config.in | 121 + pcre2/src/pcre2_jit_compile.c | 14154 ++++++++++++++++++++++++++++++++ pcre2/src/pcre2_jit_match.c | 186 + pcre2/src/pcre2_jit_misc.c | 232 + 8 files changed, 14745 insertions(+) create mode 100644 pcre2/libpcre2-16.pc.in create mode 100644 pcre2/libpcre2-32.pc.in create mode 100644 pcre2/libpcre2-8.pc.in create mode 100644 pcre2/libpcre2-posix.pc.in create mode 100644 pcre2/pcre2-config.in create mode 100644 pcre2/src/pcre2_jit_compile.c create mode 100644 pcre2/src/pcre2_jit_match.c create mode 100644 pcre2/src/pcre2_jit_misc.c diff --git a/pcre2/libpcre2-16.pc.in b/pcre2/libpcre2-16.pc.in new file mode 100644 index 000000000..978040dfe --- /dev/null +++ b/pcre2/libpcre2-16.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-16 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-16 +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/pcre2/libpcre2-32.pc.in b/pcre2/libpcre2-32.pc.in new file mode 100644 index 000000000..d8fb18713 --- /dev/null +++ b/pcre2/libpcre2-32.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-32 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-32 +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/pcre2/libpcre2-8.pc.in b/pcre2/libpcre2-8.pc.in new file mode 100644 index 000000000..5c872d0b3 --- /dev/null +++ b/pcre2/libpcre2-8.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-8 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-8 +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/pcre2/libpcre2-posix.pc.in b/pcre2/libpcre2-posix.pc.in new file mode 100644 index 000000000..96415558e --- /dev/null +++ b/pcre2/libpcre2-posix.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-posix +Description: Posix compatible interface to libpcre2-8 +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-posix +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ +Requires.private: libpcre2-8 diff --git a/pcre2/pcre2-config.in b/pcre2/pcre2-config.in new file mode 100644 index 000000000..74271c088 --- /dev/null +++ b/pcre2/pcre2-config.in @@ -0,0 +1,121 @@ +#!/bin/sh + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +exec_prefix_set=no + +cflags="[--cflags]" +libs= + +if test @enable_pcre2_16@ = yes ; then + libs="[--libs16] $libs" +fi + +if test @enable_pcre2_32@ = yes ; then + libs="[--libs32] $libs" +fi + +if test @enable_pcre2_8@ = yes ; then + libs="[--libs8] [--libs-posix] $libs" + cflags="$cflags [--cflags-posix]" +fi + +usage="Usage: pcre2-config [--prefix] [--exec-prefix] [--version] $libs $cflags" + +if test $# -eq 0; then + echo "${usage}" 1>&2 + exit 1 +fi + +libR= +case `uname -s` in + *SunOS*) + libR=" -R@libdir@" + ;; + *BSD*) + libR=" -Wl,-R@libdir@" + ;; +esac + +libS= +if test @libdir@ != /usr/lib ; then + libS=-L@libdir@ +fi + +while test $# -gt 0; do + case "$1" in + -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; + *) optarg= ;; + esac + + case $1 in + --prefix=*) + prefix=$optarg + if test $exec_prefix_set = no ; then + exec_prefix=$optarg + fi + ;; + --prefix) + echo $prefix + ;; + --exec-prefix=*) + exec_prefix=$optarg + exec_prefix_set=yes + ;; + --exec-prefix) + echo $exec_prefix + ;; + --version) + echo @PACKAGE_VERSION@ + ;; + --cflags) + if test @includedir@ != /usr/include ; then + includes=-I@includedir@ + fi + echo $includes @PCRE2_STATIC_CFLAG@ + ;; + --cflags-posix) + if test @enable_pcre2_8@ = yes ; then + if test @includedir@ != /usr/include ; then + includes=-I@includedir@ + fi + echo $includes @PCRE2_STATIC_CFLAG@ + else + echo "${usage}" 1>&2 + fi + ;; + --libs-posix) + if test @enable_pcre2_8@ = yes ; then + echo $libS$libR -lpcre2-posix -lpcre2-8 + else + echo "${usage}" 1>&2 + fi + ;; + --libs8) + if test @enable_pcre2_8@ = yes ; then + echo $libS$libR -lpcre2-8 + else + echo "${usage}" 1>&2 + fi + ;; + --libs16) + if test @enable_pcre2_16@ = yes ; then + echo $libS$libR -lpcre2-16 + else + echo "${usage}" 1>&2 + fi + ;; + --libs32) + if test @enable_pcre2_32@ = yes ; then + echo $libS$libR -lpcre2-32 + else + echo "${usage}" 1>&2 + fi + ;; + *) + echo "${usage}" 1>&2 + exit 1 + ;; + esac + shift +done diff --git a/pcre2/src/pcre2_jit_compile.c b/pcre2/src/pcre2_jit_compile.c new file mode 100644 index 000000000..33ad7e655 --- /dev/null +++ b/pcre2/src/pcre2_jit_compile.c @@ -0,0 +1,14154 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + This module by Zoltan Herczeg + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +#ifdef SUPPORT_JIT + +/* All-in-one: Since we use the JIT compiler only from here, +we just include it. This way we don't need to touch the build +system files. */ + +#define SLJIT_CONFIG_AUTO 1 +#define SLJIT_CONFIG_STATIC 1 +#define SLJIT_VERBOSE 0 + +#ifdef PCRE2_DEBUG +#define SLJIT_DEBUG 1 +#else +#define SLJIT_DEBUG 0 +#endif + +#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data) +#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data) + +static void * pcre2_jit_malloc(size_t size, void *allocator_data) +{ +pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data); +return allocator->malloc(size, allocator->memory_data); +} + +static void pcre2_jit_free(void *ptr, void *allocator_data) +{ +pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data); +allocator->free(ptr, allocator->memory_data); +} + +#include "sljit/sljitLir.c" + +#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED +#error Unsupported architecture +#endif + +/* Defines for debugging purposes. */ + +/* 1 - Use unoptimized capturing brackets. + 2 - Enable capture_last_ptr (includes option 1). */ +/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */ + +/* 1 - Always have a control head. */ +/* #define DEBUG_FORCE_CONTROL_HEAD 1 */ + +/* Allocate memory for the regex stack on the real machine stack. +Fast, but limited size. */ +#define MACHINE_STACK_SIZE 32768 + +/* Growth rate for stack allocated by the OS. Should be the multiply +of page size. */ +#define STACK_GROWTH_RATE 8192 + +/* Enable to check that the allocation could destroy temporaries. */ +#if defined SLJIT_DEBUG && SLJIT_DEBUG +#define DESTROY_REGISTERS 1 +#endif + +/* +Short summary about the backtracking mechanism empolyed by the jit code generator: + +The code generator follows the recursive nature of the PERL compatible regular +expressions. The basic blocks of regular expressions are condition checkers +whose execute different commands depending on the result of the condition check. +The relationship between the operators can be horizontal (concatenation) and +vertical (sub-expression) (See struct backtrack_common for more details). + + 'ab' - 'a' and 'b' regexps are concatenated + 'a+' - 'a' is the sub-expression of the '+' operator + +The condition checkers are boolean (true/false) checkers. Machine code is generated +for the checker itself and for the actions depending on the result of the checker. +The 'true' case is called as the matching path (expected path), and the other is called as +the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken +branches on the matching path. + + Greedy star operator (*) : + Matching path: match happens. + Backtrack path: match failed. + Non-greedy star operator (*?) : + Matching path: no need to perform a match. + Backtrack path: match is required. + +The following example shows how the code generated for a capturing bracket +with two alternatives. Let A, B, C, D are arbirary regular expressions, and +we have the following regular expression: + + A(B|C)D + +The generated code will be the following: + + A matching path + '(' matching path (pushing arguments to the stack) + B matching path + ')' matching path (pushing arguments to the stack) + D matching path + return with successful match + + D backtrack path + ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") + B backtrack path + C expected path + jump to D matching path + C backtrack path + A backtrack path + + Notice, that the order of backtrack code paths are the opposite of the fast + code paths. In this way the topmost value on the stack is always belong + to the current backtrack code path. The backtrack path must check + whether there is a next alternative. If so, it needs to jump back to + the matching path eventually. Otherwise it needs to clear out its own stack + frame and continue the execution on the backtrack code paths. +*/ + +/* +Saved stack frames: + +Atomic blocks and asserts require reloading the values of private data +when the backtrack mechanism performed. Because of OP_RECURSE, the data +are not necessarly known in compile time, thus we need a dynamic restore +mechanism. + +The stack frames are stored in a chain list, and have the following format: +([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] + +Thus we can restore the private data to a particular point in the stack. +*/ + +typedef struct jit_arguments { + /* Pointers first. */ + struct sljit_stack *stack; + PCRE2_SPTR str; + PCRE2_SPTR begin; + PCRE2_SPTR end; + pcre2_match_data *match_data; + PCRE2_SPTR startchar_ptr; + PCRE2_UCHAR *mark_ptr; + int (*callout)(pcre2_callout_block *, void *); + void *callout_data; + /* Everything else after. */ + sljit_uw offset_limit; + sljit_u32 limit_match; + sljit_u32 oveccount; + sljit_u32 options; +} jit_arguments; + +#define JIT_NUMBER_OF_COMPILE_MODES 3 + +typedef struct executable_functions { + void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; + void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES]; + sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; + sljit_u32 top_bracket; + sljit_u32 limit_match; +} executable_functions; + +typedef struct jump_list { + struct sljit_jump *jump; + struct jump_list *next; +} jump_list; + +typedef struct stub_list { + struct sljit_jump *start; + struct sljit_label *quit; + struct stub_list *next; +} stub_list; + +enum frame_types { + no_frame = -1, + no_stack = -2 +}; + +enum control_types { + type_mark = 0, + type_then_trap = 1 +}; + +enum early_fail_types { + type_skip = 0, + type_fail = 1, + type_fail_range = 2 +}; + +typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args); + +/* The following structure is the key data type for the recursive +code generator. It is allocated by compile_matchingpath, and contains +the arguments for compile_backtrackingpath. Must be the first member +of its descendants. */ +typedef struct backtrack_common { + /* Concatenation stack. */ + struct backtrack_common *prev; + jump_list *nextbacktracks; + /* Internal stack (for component operators). */ + struct backtrack_common *top; + jump_list *topbacktracks; + /* Opcode pointer. */ + PCRE2_SPTR cc; +} backtrack_common; + +typedef struct assert_backtrack { + backtrack_common common; + jump_list *condfailed; + /* Less than 0 if a frame is not needed. */ + int framesize; + /* Points to our private memory word on the stack. */ + int private_data_ptr; + /* For iterators. */ + struct sljit_label *matchingpath; +} assert_backtrack; + +typedef struct bracket_backtrack { + backtrack_common common; + /* Where to coninue if an alternative is successfully matched. */ + struct sljit_label *alternative_matchingpath; + /* For rmin and rmax iterators. */ + struct sljit_label *recursive_matchingpath; + /* For greedy ? operator. */ + struct sljit_label *zero_matchingpath; + /* Contains the branches of a failed condition. */ + union { + /* Both for OP_COND, OP_SCOND. */ + jump_list *condfailed; + assert_backtrack *assert; + /* For OP_ONCE. Less than 0 if not needed. */ + int framesize; + /* For brackets with >3 alternatives. */ + struct sljit_put_label *matching_put_label; + } u; + /* Points to our private memory word on the stack. */ + int private_data_ptr; +} bracket_backtrack; + +typedef struct bracketpos_backtrack { + backtrack_common common; + /* Points to our private memory word on the stack. */ + int private_data_ptr; + /* Reverting stack is needed. */ + int framesize; + /* Allocated stack size. */ + int stacksize; +} bracketpos_backtrack; + +typedef struct braminzero_backtrack { + backtrack_common common; + struct sljit_label *matchingpath; +} braminzero_backtrack; + +typedef struct char_iterator_backtrack { + backtrack_common common; + /* Next iteration. */ + struct sljit_label *matchingpath; + union { + jump_list *backtracks; + struct { + unsigned int othercasebit; + PCRE2_UCHAR chr; + BOOL enabled; + } charpos; + } u; +} char_iterator_backtrack; + +typedef struct ref_iterator_backtrack { + backtrack_common common; + /* Next iteration. */ + struct sljit_label *matchingpath; +} ref_iterator_backtrack; + +typedef struct recurse_entry { + struct recurse_entry *next; + /* Contains the function entry label. */ + struct sljit_label *entry_label; + /* Contains the function entry label. */ + struct sljit_label *backtrack_label; + /* Collects the entry calls until the function is not created. */ + jump_list *entry_calls; + /* Collects the backtrack calls until the function is not created. */ + jump_list *backtrack_calls; + /* Points to the starting opcode. */ + sljit_sw start; +} recurse_entry; + +typedef struct recurse_backtrack { + backtrack_common common; + /* Return to the matching path. */ + struct sljit_label *matchingpath; + /* Recursive pattern. */ + recurse_entry *entry; + /* Pattern is inlined. */ + BOOL inlined_pattern; +} recurse_backtrack; + +#define OP_THEN_TRAP OP_TABLE_LENGTH + +typedef struct then_trap_backtrack { + backtrack_common common; + /* If then_trap is not NULL, this structure contains the real + then_trap for the backtracking path. */ + struct then_trap_backtrack *then_trap; + /* Points to the starting opcode. */ + sljit_sw start; + /* Exit point for the then opcodes of this alternative. */ + jump_list *quit; + /* Frame size of the current alternative. */ + int framesize; +} then_trap_backtrack; + +#define MAX_N_CHARS 12 +#define MAX_DIFF_CHARS 5 + +typedef struct fast_forward_char_data { + /* Number of characters in the chars array, 255 for any character. */ + sljit_u8 count; + /* Number of last UTF-8 characters in the chars array. */ + sljit_u8 last_count; + /* Available characters in the current position. */ + PCRE2_UCHAR chars[MAX_DIFF_CHARS]; +} fast_forward_char_data; + +#define MAX_CLASS_RANGE_SIZE 4 +#define MAX_CLASS_CHARS_SIZE 3 + +typedef struct compiler_common { + /* The sljit ceneric compiler. */ + struct sljit_compiler *compiler; + /* Compiled regular expression. */ + pcre2_real_code *re; + /* First byte code. */ + PCRE2_SPTR start; + /* Maps private data offset to each opcode. */ + sljit_s32 *private_data_ptrs; + /* Chain list of read-only data ptrs. */ + void *read_only_data_head; + /* Tells whether the capturing bracket is optimized. */ + sljit_u8 *optimized_cbracket; + /* Tells whether the starting offset is a target of then. */ + sljit_u8 *then_offsets; + /* Current position where a THEN must jump. */ + then_trap_backtrack *then_trap; + /* Starting offset of private data for capturing brackets. */ + sljit_s32 cbra_ptr; + /* Output vector starting point. Must be divisible by 2. */ + sljit_s32 ovector_start; + /* Points to the starting character of the current match. */ + sljit_s32 start_ptr; + /* Last known position of the requested byte. */ + sljit_s32 req_char_ptr; + /* Head of the last recursion. */ + sljit_s32 recursive_head_ptr; + /* First inspected character for partial matching. + (Needed for avoiding zero length partial matches.) */ + sljit_s32 start_used_ptr; + /* Starting pointer for partial soft matches. */ + sljit_s32 hit_start; + /* Pointer of the match end position. */ + sljit_s32 match_end_ptr; + /* Points to the marked string. */ + sljit_s32 mark_ptr; + /* Recursive control verb management chain. */ + sljit_s32 control_head_ptr; + /* Points to the last matched capture block index. */ + sljit_s32 capture_last_ptr; + /* Fast forward skipping byte code pointer. */ + PCRE2_SPTR fast_forward_bc_ptr; + /* Locals used by fast fail optimization. */ + sljit_s32 early_fail_start_ptr; + sljit_s32 early_fail_end_ptr; + + /* Flipped and lower case tables. */ + const sljit_u8 *fcc; + sljit_sw lcc; + /* Mode can be PCRE2_JIT_COMPLETE and others. */ + int mode; + /* TRUE, when empty match is accepted for partial matching. */ + BOOL allow_empty_partial; + /* TRUE, when minlength is greater than 0. */ + BOOL might_be_empty; + /* \K is found in the pattern. */ + BOOL has_set_som; + /* (*SKIP:arg) is found in the pattern. */ + BOOL has_skip_arg; + /* (*THEN) is found in the pattern. */ + BOOL has_then; + /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */ + BOOL has_skip_in_assert_back; + /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */ + BOOL local_quit_available; + /* Currently in a positive assertion. */ + BOOL in_positive_assertion; + /* Newline control. */ + int nltype; + sljit_u32 nlmax; + sljit_u32 nlmin; + int newline; + int bsr_nltype; + sljit_u32 bsr_nlmax; + sljit_u32 bsr_nlmin; + /* Dollar endonly. */ + int endonly; + /* Tables. */ + sljit_sw ctypes; + /* Named capturing brackets. */ + PCRE2_SPTR name_table; + sljit_sw name_count; + sljit_sw name_entry_size; + + /* Labels and jump lists. */ + struct sljit_label *partialmatchlabel; + struct sljit_label *quit_label; + struct sljit_label *abort_label; + struct sljit_label *accept_label; + struct sljit_label *ff_newline_shortcut; + stub_list *stubs; + recurse_entry *entries; + recurse_entry *currententry; + jump_list *partialmatch; + jump_list *quit; + jump_list *positive_assertion_quit; + jump_list *abort; + jump_list *failed_match; + jump_list *accept; + jump_list *calllimit; + jump_list *stackalloc; + jump_list *revertframes; + jump_list *wordboundary; + jump_list *anynewline; + jump_list *hspace; + jump_list *vspace; + jump_list *casefulcmp; + jump_list *caselesscmp; + jump_list *reset_match; + BOOL unset_backref; + BOOL alt_circumflex; +#ifdef SUPPORT_UNICODE + BOOL utf; + BOOL invalid_utf; + BOOL ucp; + /* Points to saving area for iref. */ + sljit_s32 iref_ptr; + jump_list *getucd; + jump_list *getucdtype; +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump_list *utfreadchar; + jump_list *utfreadtype8; + jump_list *utfpeakcharback; +#endif +#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 + jump_list *utfreadchar_invalid; + jump_list *utfreadnewline_invalid; + jump_list *utfmoveback_invalid; + jump_list *utfpeakcharback_invalid; +#endif +#endif /* SUPPORT_UNICODE */ +} compiler_common; + +/* For byte_sequence_compare. */ + +typedef struct compare_context { + int length; + int sourcereg; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + int ucharptr; + union { + sljit_s32 asint; + sljit_u16 asushort; +#if PCRE2_CODE_UNIT_WIDTH == 8 + sljit_u8 asbyte; + sljit_u8 asuchars[4]; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + sljit_u16 asuchars[2]; +#elif PCRE2_CODE_UNIT_WIDTH == 32 + sljit_u32 asuchars[1]; +#endif + } c; + union { + sljit_s32 asint; + sljit_u16 asushort; +#if PCRE2_CODE_UNIT_WIDTH == 8 + sljit_u8 asbyte; + sljit_u8 asuchars[4]; +#elif PCRE2_CODE_UNIT_WIDTH == 16 + sljit_u16 asuchars[2]; +#elif PCRE2_CODE_UNIT_WIDTH == 32 + sljit_u32 asuchars[1]; +#endif + } oc; +#endif +} compare_context; + +/* Undefine sljit macros. */ +#undef CMP + +/* Used for accessing the elements of the stack. */ +#define STACK(i) ((i) * (int)sizeof(sljit_sw)) + +#ifdef SLJIT_PREF_SHIFT_REG +#if SLJIT_PREF_SHIFT_REG == SLJIT_R2 +/* Nothing. */ +#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3 +#define SHIFT_REG_IS_R3 +#else +#error "Unsupported shift register" +#endif +#endif + +#define TMP1 SLJIT_R0 +#ifdef SHIFT_REG_IS_R3 +#define TMP2 SLJIT_R3 +#define TMP3 SLJIT_R2 +#else +#define TMP2 SLJIT_R2 +#define TMP3 SLJIT_R3 +#endif +#define STR_PTR SLJIT_R1 +#define STR_END SLJIT_S0 +#define STACK_TOP SLJIT_S1 +#define STACK_LIMIT SLJIT_S2 +#define COUNT_MATCH SLJIT_S3 +#define ARGUMENTS SLJIT_S4 +#define RETURN_ADDR SLJIT_R4 + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define HAS_VIRTUAL_REGISTERS 1 +#else +#define HAS_VIRTUAL_REGISTERS 0 +#endif + +/* Local space layout. */ +/* These two locals can be used by the current opcode. */ +#define LOCALS0 (0 * sizeof(sljit_sw)) +#define LOCALS1 (1 * sizeof(sljit_sw)) +/* Two local variables for possessive quantifiers (char1 cannot use them). */ +#define POSSESSIVE0 (2 * sizeof(sljit_sw)) +#define POSSESSIVE1 (3 * sizeof(sljit_sw)) +/* Max limit of recursions. */ +#define LIMIT_MATCH (4 * sizeof(sljit_sw)) +/* The output vector is stored on the stack, and contains pointers +to characters. The vector data is divided into two groups: the first +group contains the start / end character pointers, and the second is +the start pointers when the end of the capturing group has not yet reached. */ +#define OVECTOR_START (common->ovector_start) +#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) +#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) +#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define MOV_UCHAR SLJIT_MOV_U8 +#define IN_UCHARS(x) (x) +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define MOV_UCHAR SLJIT_MOV_U16 +#define UCHAR_SHIFT (1) +#define IN_UCHARS(x) ((x) * 2) +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define MOV_UCHAR SLJIT_MOV_U32 +#define UCHAR_SHIFT (2) +#define IN_UCHARS(x) ((x) * 4) +#else +#error Unsupported compiling mode +#endif + +/* Shortcuts. */ +#define DEFINE_COMPILER \ + struct sljit_compiler *compiler = common->compiler +#define OP1(op, dst, dstw, src, srcw) \ + sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) +#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ + sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) +#define OP_SRC(op, src, srcw) \ + sljit_emit_op_src(compiler, (op), (src), (srcw)) +#define LABEL() \ + sljit_emit_label(compiler) +#define JUMP(type) \ + sljit_emit_jump(compiler, (type)) +#define JUMPTO(type, label) \ + sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) +#define JUMPHERE(jump) \ + sljit_set_label((jump), sljit_emit_label(compiler)) +#define SET_LABEL(jump, label) \ + sljit_set_label((jump), (label)) +#define CMP(type, src1, src1w, src2, src2w) \ + sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) +#define CMPTO(type, src1, src1w, src2, src2w, label) \ + sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) +#define OP_FLAGS(op, dst, dstw, type) \ + sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type)) +#define CMOV(type, dst_reg, src, srcw) \ + sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw)) +#define GET_LOCAL_BASE(dst, dstw, offset) \ + sljit_get_local_base(compiler, (dst), (dstw), (offset)) + +#define READ_CHAR_MAX 0x7fffffff + +#define INVALID_UTF_CHAR -1 +#define UNASSIGNED_UTF_CHAR 888 + +#if defined SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + +#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \ + { \ + if (ptr[0] <= 0x7f) \ + c = *ptr++; \ + else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \ + { \ + c = ptr[1] - 0x80; \ + \ + if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \ + { \ + c |= (ptr[0] - 0xc0) << 6; \ + ptr += 2; \ + } \ + else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \ + { \ + c = c << 6 | (ptr[2] - 0x80); \ + \ + if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \ + { \ + c |= (ptr[0] - 0xe0) << 12; \ + ptr += 3; \ + \ + if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \ + { \ + invalid_action; \ + } \ + } \ + else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \ + { \ + c = c << 6 | (ptr[3] - 0x80); \ + \ + if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \ + { \ + c |= (ptr[0] - 0xf0) << 18; \ + ptr += 4; \ + \ + if (c >= 0x110000 || c < 0x10000) \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ + { \ + c = ptr[-1]; \ + if (c <= 0x7f) \ + ptr--; \ + else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \ + { \ + c -= 0x80; \ + \ + if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \ + { \ + c |= (ptr[-2] - 0xc0) << 6; \ + ptr -= 2; \ + } \ + else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \ + { \ + c = c << 6 | (ptr[-2] - 0x80); \ + \ + if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \ + { \ + c |= (ptr[-3] - 0xe0) << 12; \ + ptr -= 3; \ + \ + if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \ + { \ + invalid_action; \ + } \ + } \ + else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \ + { \ + c = c << 6 | (ptr[-3] - 0x80); \ + \ + if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \ + { \ + c |= (ptr[-4] - 0xf0) << 18; \ + ptr -= 4; \ + \ + if (c >= 0x110000 || c < 0x10000) \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + +#elif PCRE2_CODE_UNIT_WIDTH == 16 + +#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \ + { \ + if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \ + c = *ptr++; \ + else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \ + { \ + c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \ + ptr += 2; \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ + { \ + c = ptr[-1]; \ + if (c < 0xd800 || c >= 0xe000) \ + ptr--; \ + else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \ + { \ + c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \ + ptr -= 2; \ + } \ + else \ + { \ + invalid_action; \ + } \ + } + + +#elif PCRE2_CODE_UNIT_WIDTH == 32 + +#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \ + { \ + if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \ + c = *ptr++; \ + else \ + { \ + invalid_action; \ + } \ + } + +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ + { \ + c = ptr[-1]; \ + if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \ + ptr--; \ + else \ + { \ + invalid_action; \ + } \ + } + +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ + +static PCRE2_SPTR bracketend(PCRE2_SPTR cc) +{ +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +do cc += GET(cc, 1); while (*cc == OP_ALT); +SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); +cc += 1 + LINK_SIZE; +return cc; +} + +static int no_alternatives(PCRE2_SPTR cc) +{ +int count = 0; +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +do + { + cc += GET(cc, 1); + count++; + } +while (*cc == OP_ALT); +SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); +return count; +} + +/* Functions whose might need modification for all new supported opcodes: + next_opcode + check_opcode_types + set_private_data_ptrs + get_framesize + init_frame + get_recurse_data_length + copy_recurse_data + compile_matchingpath + compile_backtrackingpath +*/ + +static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc) +{ +SLJIT_UNUSED_ARG(common); +switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_NOTPROP: + case OP_PROP: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + case OP_CRPOSRANGE: + case OP_CLASS: + case OP_NCLASS: + case OP_REF: + case OP_REFI: + case OP_DNREF: + case OP_DNREFI: + case OP_RECURSE: + case OP_CALLOUT: + case OP_ALT: + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_REVERSE: + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRA: + case OP_BRAPOS: + case OP_CBRA: + case OP_CBRAPOS: + case OP_COND: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + case OP_SCOND: + case OP_CREF: + case OP_DNCREF: + case OP_RREF: + case OP_DNRREF: + case OP_FALSE: + case OP_TRUE: + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + case OP_PRUNE: + case OP_SKIP: + case OP_THEN: + case OP_COMMIT: + case OP_FAIL: + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + case OP_CLOSE: + case OP_SKIPZERO: + return cc + PRIV(OP_lengths)[*cc]; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + cc += PRIV(OP_lengths)[*cc]; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + return cc; + + /* Special cases. */ + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + return cc + PRIV(OP_lengths)[*cc] - 1; + + case OP_ANYBYTE: +#ifdef SUPPORT_UNICODE + if (common->utf) return NULL; +#endif + return cc + 1; + + case OP_CALLOUT_STR: + return cc + GET(cc, 1 + 2*LINK_SIZE); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + return cc + GET(cc, 1); +#endif + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + return cc + 1 + 2 + cc[1]; + + default: + SLJIT_UNREACHABLE(); + return NULL; + } +} + +static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend) +{ +int count; +PCRE2_SPTR slot; +PCRE2_SPTR assert_back_end = cc - 1; +PCRE2_SPTR assert_na_end = cc - 1; + +/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ +while (cc < ccend) + { + switch(*cc) + { + case OP_SET_SOM: + common->has_set_som = TRUE; + common->might_be_empty = TRUE; + cc += 1; + break; + + case OP_REFI: +#ifdef SUPPORT_UNICODE + if (common->iref_ptr == 0) + { + common->iref_ptr = common->ovector_start; + common->ovector_start += 3 * sizeof(sljit_sw); + } +#endif /* SUPPORT_UNICODE */ + /* Fall through. */ + case OP_REF: + common->optimized_cbracket[GET2(cc, 1)] = 0; + cc += 1 + IMM2_SIZE; + break; + + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + slot = bracketend(cc); + if (slot > assert_na_end) + assert_na_end = slot; + cc += 1 + LINK_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + case OP_SCOND: + /* Only AUTO_CALLOUT can insert this opcode. We do + not intend to support this case. */ + if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR) + return FALSE; + cc += 1 + LINK_SIZE; + break; + + case OP_CREF: + common->optimized_cbracket[GET2(cc, 1)] = 0; + cc += 1 + IMM2_SIZE; + break; + + case OP_DNREF: + case OP_DNREFI: + case OP_DNCREF: + count = GET2(cc, 1 + IMM2_SIZE); + slot = common->name_table + GET2(cc, 1) * common->name_entry_size; + while (count-- > 0) + { + common->optimized_cbracket[GET2(slot, 0)] = 0; + slot += common->name_entry_size; + } + cc += 1 + 2 * IMM2_SIZE; + break; + + case OP_RECURSE: + /* Set its value only once. */ + if (common->recursive_head_ptr == 0) + { + common->recursive_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += 1 + LINK_SIZE; + break; + + case OP_CALLOUT: + case OP_CALLOUT_STR: + if (common->capture_last_ptr == 0) + { + common->capture_last_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); + break; + + case OP_ASSERTBACK: + slot = bracketend(cc); + if (slot > assert_back_end) + assert_back_end = slot; + cc += 1 + LINK_SIZE; + break; + + case OP_THEN_ARG: + common->has_then = TRUE; + common->control_head_ptr = 1; + /* Fall through. */ + + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + if (cc < assert_na_end) + return FALSE; + /* Fall through */ + case OP_MARK: + if (common->mark_ptr == 0) + { + common->mark_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + cc += 1 + 2 + cc[1]; + break; + + case OP_THEN: + common->has_then = TRUE; + common->control_head_ptr = 1; + cc += 1; + break; + + case OP_SKIP: + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; + if (cc < assert_na_end) + return FALSE; + cc += 1; + break; + + case OP_SKIP_ARG: + common->control_head_ptr = 1; + common->has_skip_arg = TRUE; + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; + if (cc < assert_na_end) + return FALSE; + cc += 1 + 2 + cc[1]; + break; + + case OP_PRUNE: + case OP_COMMIT: + case OP_ASSERT_ACCEPT: + if (cc < assert_na_end) + return FALSE; + cc++; + break; + + default: + cc = next_opcode(common, cc); + if (cc == NULL) + return FALSE; + break; + } + } +return TRUE; +} + +#define EARLY_FAIL_ENHANCE_MAX (1 + 1) + +/* +start: + 0 - skip / early fail allowed + 1 - only early fail with range allowed + >1 - (start - 1) early fail is processed + +return: current number of iterators enhanced with fast fail +*/ +static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start) +{ +PCRE2_SPTR next_alt; +PCRE2_SPTR end; +PCRE2_SPTR accelerated_start; +int result = 0; +int count; +BOOL fast_forward_allowed = TRUE; + +SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA); +SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0); +SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX); + +do + { + count = start; + next_alt = cc + GET(cc, 1); + cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); + + while (TRUE) + { + accelerated_start = NULL; + + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + /* Zero width assertions. */ + cc++; + continue; + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + fast_forward_allowed = FALSE; + cc++; + continue; + + case OP_ANYNL: + case OP_EXTUNI: + fast_forward_allowed = FALSE; + if (count == 0) + count = 1; + cc++; + continue; + + case OP_NOTPROP: + case OP_PROP: + fast_forward_allowed = FALSE; + cc += 1 + 2; + continue; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + fast_forward_allowed = FALSE; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + /* The type or prop opcode is skipped in the next iteration. */ + cc += 1; + + if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI) + { + accelerated_start = cc - 1; + break; + } + + if (count == 0) + count = 1; + fast_forward_allowed = FALSE; + continue; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSUPTO: + cc += IMM2_SIZE; + /* Fall through */ + + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + /* The type or prop opcode is skipped in the next iteration. */ + fast_forward_allowed = FALSE; + if (count == 0) + count = 1; + cc += 1; + continue; + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSSTAR: + case OP_POSPLUS: + + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSSTARI: + case OP_POSPLUSI: + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + accelerated_start = cc; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSUPTO: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSUPTOI: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSUPTO: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSUPTOI: + cc += IMM2_SIZE; + /* Fall through */ + + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTPOSQUERYI: + fast_forward_allowed = FALSE; + if (count == 0) + count = 1; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + accelerated_start = cc; + cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)))); +#else + accelerated_start = cc; + cc += (1 + (32 / sizeof(PCRE2_UCHAR))); +#endif + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + cc += 2 * IMM2_SIZE; + /* Fall through */ + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + cc++; + if (count == 0) + count = 1; + /* Fall through */ + default: + accelerated_start = NULL; + fast_forward_allowed = FALSE; + break; + } + continue; + + case OP_ONCE: + case OP_BRA: + case OP_CBRA: + end = cc + GET(cc, 1); + + if (*end == OP_KET && PRIVATE_DATA(end) == 0) + { + if (*cc == OP_CBRA) + { + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + break; + cc += IMM2_SIZE; + } + + cc += 1 + LINK_SIZE; + continue; + } + + fast_forward_allowed = FALSE; + if (depth >= 4) + break; + + end = bracketend(cc) - (1 + LINK_SIZE); + if (*end != OP_KET || PRIVATE_DATA(end) != 0) + break; + + if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + break; + + count = detect_early_fail(common, cc, private_data_start, depth + 1, count); + if (count < EARLY_FAIL_ENHANCE_MAX) + { + cc = end + (1 + LINK_SIZE); + continue; + } + break; + + case OP_KET: + SLJIT_ASSERT(PRIVATE_DATA(cc) == 0); + if (cc >= next_alt) + break; + cc += 1 + LINK_SIZE; + continue; + } + + if (accelerated_start != NULL) + { + if (count == 0) + { + count++; + + if (fast_forward_allowed && *next_alt == OP_KET) + { + common->fast_forward_bc_ptr = accelerated_start; + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip; + *private_data_start += sizeof(sljit_sw); + } + else + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail; + + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; + + *private_data_start += sizeof(sljit_sw); + common->early_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return EARLY_FAIL_ENHANCE_MAX; + } + } + else + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range; + + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; + + *private_data_start += 2 * sizeof(sljit_sw); + common->early_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return EARLY_FAIL_ENHANCE_MAX; + } + + count++; + + if (count < EARLY_FAIL_ENHANCE_MAX) + continue; + } + + break; + } + + if (*cc != OP_ALT && *cc != OP_KET) + result = EARLY_FAIL_ENHANCE_MAX; + else if (result < count) + result = count; + + fast_forward_allowed = FALSE; + cc = next_alt; + } +while (*cc == OP_ALT); + +return result; +} + +static int get_class_iterator_size(PCRE2_SPTR cc) +{ +sljit_u32 min; +sljit_u32 max; +switch(*cc) + { + case OP_CRSTAR: + case OP_CRPLUS: + return 2; + + case OP_CRMINSTAR: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + return 1; + + case OP_CRRANGE: + case OP_CRMINRANGE: + min = GET2(cc, 1); + max = GET2(cc, 1 + IMM2_SIZE); + if (max == 0) + return (*cc == OP_CRRANGE) ? 2 : 1; + max -= min; + if (max > 2) + max = 2; + return max; + + default: + return 0; + } +} + +static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin) +{ +PCRE2_SPTR end = bracketend(begin); +PCRE2_SPTR next; +PCRE2_SPTR next_end; +PCRE2_SPTR max_end; +PCRE2_UCHAR type; +sljit_sw length = end - begin; +sljit_s32 min, max, i; + +/* Detect fixed iterations first. */ +if (end[-(1 + LINK_SIZE)] != OP_KET) + return FALSE; + +/* Already detected repeat. */ +if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) + return TRUE; + +next = end; +min = 1; +while (1) + { + if (*next != *begin) + break; + next_end = bracketend(next); + if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0) + break; + next = next_end; + min++; + } + +if (min == 2) + return FALSE; + +max = 0; +max_end = next; +if (*next == OP_BRAZERO || *next == OP_BRAMINZERO) + { + type = *next; + while (1) + { + if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin) + break; + next_end = bracketend(next + 2 + LINK_SIZE); + if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0) + break; + next = next_end; + max++; + } + + if (next[0] == type && next[1] == *begin && max >= 1) + { + next_end = bracketend(next + 1); + if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0) + { + for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE) + if (*next_end != OP_KET) + break; + + if (i == max) + { + common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end; + common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO; + /* +2 the original and the last. */ + common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2; + if (min == 1) + return TRUE; + min--; + max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE); + } + } + } + } + +if (min >= 3) + { + common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end; + common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT; + common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min; + return TRUE; + } + +return FALSE; +} + +#define CASE_ITERATOR_PRIVATE_DATA_1 \ + case OP_MINSTAR: \ + case OP_MINPLUS: \ + case OP_QUERY: \ + case OP_MINQUERY: \ + case OP_MINSTARI: \ + case OP_MINPLUSI: \ + case OP_QUERYI: \ + case OP_MINQUERYI: \ + case OP_NOTMINSTAR: \ + case OP_NOTMINPLUS: \ + case OP_NOTQUERY: \ + case OP_NOTMINQUERY: \ + case OP_NOTMINSTARI: \ + case OP_NOTMINPLUSI: \ + case OP_NOTQUERYI: \ + case OP_NOTMINQUERYI: + +#define CASE_ITERATOR_PRIVATE_DATA_2A \ + case OP_STAR: \ + case OP_PLUS: \ + case OP_STARI: \ + case OP_PLUSI: \ + case OP_NOTSTAR: \ + case OP_NOTPLUS: \ + case OP_NOTSTARI: \ + case OP_NOTPLUSI: + +#define CASE_ITERATOR_PRIVATE_DATA_2B \ + case OP_UPTO: \ + case OP_MINUPTO: \ + case OP_UPTOI: \ + case OP_MINUPTOI: \ + case OP_NOTUPTO: \ + case OP_NOTMINUPTO: \ + case OP_NOTUPTOI: \ + case OP_NOTMINUPTOI: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ + case OP_TYPEMINSTAR: \ + case OP_TYPEMINPLUS: \ + case OP_TYPEQUERY: \ + case OP_TYPEMINQUERY: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ + case OP_TYPESTAR: \ + case OP_TYPEPLUS: + +#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ + case OP_TYPEUPTO: \ + case OP_TYPEMINUPTO: + +static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend) +{ +PCRE2_SPTR cc = common->start; +PCRE2_SPTR alternative; +PCRE2_SPTR end = NULL; +int private_data_ptr = *private_data_start; +int space, size, bracketlen; +BOOL repeat_check = TRUE; + +while (cc < ccend) + { + space = 0; + size = 0; + bracketlen = 0; + if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) + break; + + if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) + { + if (detect_repeat(common, cc)) + { + /* These brackets are converted to repeats, so no global + based single character repeat is allowed. */ + if (cc >= end) + end = bracketend(cc); + } + } + repeat_check = TRUE; + + switch(*cc) + { + case OP_KET: + if (common->private_data_ptrs[cc + 1 - common->start] != 0) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + cc += common->private_data_ptrs[cc + 1 - common->start]; + } + cc += 1 + LINK_SIZE; + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCOND: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + bracketlen = 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + /* Might be a hidden SCOND. */ + alternative = cc + GET(cc, 1); + if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + } + bracketlen = 1 + LINK_SIZE; + break; + + case OP_BRA: + bracketlen = 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_SCBRA: + bracketlen = 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + repeat_check = FALSE; + size = 1; + break; + + CASE_ITERATOR_PRIVATE_DATA_1 + space = 1; + size = -2; + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + space = 2; + size = -2; + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + space = 2; + size = -(2 + IMM2_SIZE); + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + space = 1; + size = 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) + space = 2; + size = 1; + break; + + case OP_TYPEUPTO: + if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) + space = 2; + size = 1 + IMM2_SIZE; + break; + + case OP_TYPEMINUPTO: + space = 2; + size = 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: + space = get_class_iterator_size(cc + size); + size = 1 + 32 / sizeof(PCRE2_UCHAR); + break; + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + space = get_class_iterator_size(cc + size); + size = GET(cc, 1); + break; +#endif + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + + /* Character iterators, which are not inside a repeated bracket, + gets a private slot instead of allocating it on the stack. */ + if (space > 0 && cc >= end) + { + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw) * space; + } + + if (size != 0) + { + if (size < 0) + { + cc += -size; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + } + else + cc += size; + } + + if (bracketlen > 0) + { + if (cc >= end) + { + end = bracketend(cc); + if (end[-1 - LINK_SIZE] == OP_KET) + end = NULL; + } + cc += bracketlen; + } + } +*private_data_start = private_data_ptr; +} + +/* Returns with a frame_types (always < 0) if no need for frame. */ +static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head) +{ +int length = 0; +int possessive = 0; +BOOL stack_restore = FALSE; +BOOL setsom_found = recursive; +BOOL setmark_found = recursive; +/* The last capture is a local variable even for recursions. */ +BOOL capture_last_found = FALSE; + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +SLJIT_ASSERT(common->control_head_ptr != 0); +*needs_control_head = TRUE; +#else +*needs_control_head = FALSE; +#endif + +if (ccend == NULL) + { + ccend = bracketend(cc) - (1 + LINK_SIZE); + if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) + { + possessive = length = (common->capture_last_ptr != 0) ? 5 : 3; + /* This is correct regardless of common->capture_last_ptr. */ + capture_last_found = TRUE; + } + cc = next_opcode(common, cc); + } + +SLJIT_ASSERT(cc != NULL); +while (cc < ccend) + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + stack_restore = TRUE; + if (!setsom_found) + { + length += 2; + setsom_found = TRUE; + } + cc += 1; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + stack_restore = TRUE; + if (!setmark_found) + { + length += 2; + setmark_found = TRUE; + } + if (common->control_head_ptr != 0) + *needs_control_head = TRUE; + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + stack_restore = TRUE; + if (common->has_set_som && !setsom_found) + { + length += 2; + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + length += 2; + setmark_found = TRUE; + } + if (common->capture_last_ptr != 0 && !capture_last_found) + { + length += 2; + capture_last_found = TRUE; + } + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + stack_restore = TRUE; + if (common->capture_last_ptr != 0 && !capture_last_found) + { + length += 2; + capture_last_found = TRUE; + } + length += 3; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_THEN: + stack_restore = TRUE; + if (common->control_head_ptr != 0) + *needs_control_head = TRUE; + cc ++; + break; + + default: + stack_restore = TRUE; + /* Fall through. */ + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOTPROP: + case OP_PROP: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + + case OP_CALLOUT: + case OP_CALLOUT_STR: + + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + +/* Possessive quantifiers can use a special case. */ +if (SLJIT_UNLIKELY(possessive == length)) + return stack_restore ? no_frame : no_stack; + +if (length > 0) + return length + 1; +return stack_restore ? no_frame : no_stack; +} + +static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop) +{ +DEFINE_COMPILER; +BOOL setsom_found = FALSE; +BOOL setmark_found = FALSE; +/* The last capture is a local variable even for recursions. */ +BOOL capture_last_found = FALSE; +int offset; + +/* >= 1 + shortest item size (2) */ +SLJIT_UNUSED_ARG(stacktop); +SLJIT_ASSERT(stackpos >= stacktop + 2); + +stackpos = STACK(stackpos); +if (ccend == NULL) + { + ccend = bracketend(cc) - (1 + LINK_SIZE); + if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS) + cc = next_opcode(common, cc); + } + +SLJIT_ASSERT(cc != NULL); +while (cc < ccend) + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + if (!setsom_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= (int)sizeof(sljit_sw); + setsom_found = TRUE; + } + cc += 1; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + if (!setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= (int)sizeof(sljit_sw); + setmark_found = TRUE; + } + cc += 1 + 2 + cc[1]; + break; + + case OP_RECURSE: + if (common->has_set_som && !setsom_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= (int)sizeof(sljit_sw); + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= (int)sizeof(sljit_sw); + setmark_found = TRUE; + } + if (common->capture_last_ptr != 0 && !capture_last_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= (int)sizeof(sljit_sw); + capture_last_found = TRUE; + } + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + if (common->capture_last_ptr != 0 && !capture_last_found) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= (int)sizeof(sljit_sw); + capture_last_found = TRUE; + } + offset = (GET2(cc, 1 + LINK_SIZE)) << 1; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); + stackpos -= (int)sizeof(sljit_sw); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); + stackpos -= (int)sizeof(sljit_sw); + + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0); +SLJIT_ASSERT(stackpos == STACK(stacktop)); +} + +#define RECURSE_TMP_REG_COUNT 3 + +typedef struct delayed_mem_copy_status { + struct sljit_compiler *compiler; + int store_bases[RECURSE_TMP_REG_COUNT]; + int store_offsets[RECURSE_TMP_REG_COUNT]; + int tmp_regs[RECURSE_TMP_REG_COUNT]; + int saved_tmp_regs[RECURSE_TMP_REG_COUNT]; + int next_tmp_reg; +} delayed_mem_copy_status; + +static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common) +{ +int i; + +for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) + { + SLJIT_ASSERT(status->tmp_regs[i] >= 0); + SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]); + + status->store_bases[i] = -1; + } +status->next_tmp_reg = 0; +status->compiler = common->compiler; +} + +static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset, + int store_base, sljit_sw store_offset) +{ +struct sljit_compiler *compiler = status->compiler; +int next_tmp_reg = status->next_tmp_reg; +int tmp_reg = status->tmp_regs[next_tmp_reg]; + +SLJIT_ASSERT(load_base > 0 && store_base > 0); + +if (status->store_bases[next_tmp_reg] == -1) + { + /* Preserve virtual registers. */ + if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0) + OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0); + } +else + OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0); + +OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset); +status->store_bases[next_tmp_reg] = store_base; +status->store_offsets[next_tmp_reg] = store_offset; + +status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT; +} + +static void delayed_mem_copy_finish(delayed_mem_copy_status *status) +{ +struct sljit_compiler *compiler = status->compiler; +int next_tmp_reg = status->next_tmp_reg; +int tmp_reg, saved_tmp_reg, i; + +for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) + { + if (status->store_bases[next_tmp_reg] != -1) + { + tmp_reg = status->tmp_regs[next_tmp_reg]; + saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg]; + + OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0); + + /* Restore virtual registers. */ + if (sljit_get_register_index(saved_tmp_reg) < 0) + OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0); + } + + next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT; + } +} + +#undef RECURSE_TMP_REG_COUNT + +static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, + BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept) +{ +int length = 1; +int size; +PCRE2_SPTR alternative; +BOOL quit_found = FALSE; +BOOL accept_found = FALSE; +BOOL setsom_found = FALSE; +BOOL setmark_found = FALSE; +BOOL capture_last_found = FALSE; +BOOL control_head_found = FALSE; + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +SLJIT_ASSERT(common->control_head_ptr != 0); +control_head_found = TRUE; +#endif + +/* Calculate the sum of the private machine words. */ +while (cc < ccend) + { + size = 0; + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + setsom_found = TRUE; + cc += 1; + break; + + case OP_RECURSE: + if (common->has_set_som) + setsom_found = TRUE; + if (common->mark_ptr != 0) + setmark_found = TRUE; + if (common->capture_last_ptr != 0) + capture_last_found = TRUE; + cc += 1 + LINK_SIZE; + break; + + case OP_KET: + if (PRIVATE_DATA(cc) != 0) + { + length++; + SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); + cc += PRIVATE_DATA(cc + 1); + } + cc += 1 + LINK_SIZE; + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCOND: + length++; + SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_SCBRA: + length += 2; + if (common->capture_last_ptr != 0) + capture_last_found = TRUE; + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + length++; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + length += 2 + 2; + if (common->capture_last_ptr != 0) + capture_last_found = TRUE; + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + /* Might be a hidden SCOND. */ + alternative = cc + GET(cc, 1); + if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + length++; + cc += 1 + LINK_SIZE; + break; + + CASE_ITERATOR_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc) != 0) + length++; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc) != 0) + length += 2; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc) != 0) + length += 2; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc) != 0) + length++; + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc) != 0) + length += 2; + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc) != 0) + length += 2; + cc += 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#else + size = 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#endif + if (PRIVATE_DATA(cc) != 0) + length += get_class_iterator_size(cc + size); + cc += size; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + if (!setmark_found) + setmark_found = TRUE; + if (common->control_head_ptr != 0) + control_head_found = TRUE; + if (*cc != OP_MARK) + quit_found = TRUE; + + cc += 1 + 2 + cc[1]; + break; + + case OP_PRUNE: + case OP_SKIP: + case OP_COMMIT: + quit_found = TRUE; + cc++; + break; + + case OP_SKIP_ARG: + quit_found = TRUE; + cc += 1 + 2 + cc[1]; + break; + + case OP_THEN: + SLJIT_ASSERT(common->control_head_ptr != 0); + quit_found = TRUE; + if (!control_head_found) + control_head_found = TRUE; + cc++; + break; + + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + accept_found = TRUE; + cc++; + break; + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + } +SLJIT_ASSERT(cc == ccend); + +if (control_head_found) + length++; +if (capture_last_found) + length++; +if (quit_found) + { + if (setsom_found) + length++; + if (setmark_found) + length++; + } + +*needs_control_head = control_head_found; +*has_quit = quit_found; +*has_accept = accept_found; +return length; +} + +enum copy_recurse_data_types { + recurse_copy_from_global, + recurse_copy_private_to_global, + recurse_copy_shared_to_global, + recurse_copy_kept_shared_to_global, + recurse_swap_global +}; + +static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, + int type, int stackptr, int stacktop, BOOL has_quit) +{ +delayed_mem_copy_status status; +PCRE2_SPTR alternative; +sljit_sw private_srcw[2]; +sljit_sw shared_srcw[3]; +sljit_sw kept_shared_srcw[2]; +int private_count, shared_count, kept_shared_count; +int from_sp, base_reg, offset, i; +BOOL setsom_found = FALSE; +BOOL setmark_found = FALSE; +BOOL capture_last_found = FALSE; +BOOL control_head_found = FALSE; + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +SLJIT_ASSERT(common->control_head_ptr != 0); +control_head_found = TRUE; +#endif + +switch (type) + { + case recurse_copy_from_global: + from_sp = TRUE; + base_reg = STACK_TOP; + break; + + case recurse_copy_private_to_global: + case recurse_copy_shared_to_global: + case recurse_copy_kept_shared_to_global: + from_sp = FALSE; + base_reg = STACK_TOP; + break; + + default: + SLJIT_ASSERT(type == recurse_swap_global); + from_sp = FALSE; + base_reg = TMP2; + break; + } + +stackptr = STACK(stackptr); +stacktop = STACK(stacktop); + +status.tmp_regs[0] = TMP1; +status.saved_tmp_regs[0] = TMP1; + +if (base_reg != TMP2) + { + status.tmp_regs[1] = TMP2; + status.saved_tmp_regs[1] = TMP2; + } +else + { + status.saved_tmp_regs[1] = RETURN_ADDR; + if (HAS_VIRTUAL_REGISTERS) + status.tmp_regs[1] = STR_PTR; + else + status.tmp_regs[1] = RETURN_ADDR; + } + +status.saved_tmp_regs[2] = TMP3; +if (HAS_VIRTUAL_REGISTERS) + status.tmp_regs[2] = STR_END; +else + status.tmp_regs[2] = TMP3; + +delayed_mem_copy_init(&status, common); + +if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr); + } + +stackptr += sizeof(sljit_sw); + +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +if (type != recurse_copy_shared_to_global) + { + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr); + } + +stackptr += sizeof(sljit_sw); +#endif + +while (cc < ccend) + { + private_count = 0; + shared_count = 0; + kept_shared_count = 0; + + switch(*cc) + { + case OP_SET_SOM: + SLJIT_ASSERT(common->has_set_som); + if (has_quit && !setsom_found) + { + kept_shared_srcw[0] = OVECTOR(0); + kept_shared_count = 1; + setsom_found = TRUE; + } + cc += 1; + break; + + case OP_RECURSE: + if (has_quit) + { + if (common->has_set_som && !setsom_found) + { + kept_shared_srcw[0] = OVECTOR(0); + kept_shared_count = 1; + setsom_found = TRUE; + } + if (common->mark_ptr != 0 && !setmark_found) + { + kept_shared_srcw[kept_shared_count] = common->mark_ptr; + kept_shared_count++; + setmark_found = TRUE; + } + } + if (common->capture_last_ptr != 0 && !capture_last_found) + { + shared_srcw[0] = common->capture_last_ptr; + shared_count = 1; + capture_last_found = TRUE; + } + cc += 1 + LINK_SIZE; + break; + + case OP_KET: + if (PRIVATE_DATA(cc) != 0) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); + SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); + cc += PRIVATE_DATA(cc + 1); + } + cc += 1 + LINK_SIZE; + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRAPOS: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCOND: + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); + cc += 1 + LINK_SIZE; + break; + + case OP_CBRA: + case OP_SCBRA: + offset = (GET2(cc, 1 + LINK_SIZE)) << 1; + shared_srcw[0] = OVECTOR(offset); + shared_srcw[1] = OVECTOR(offset + 1); + shared_count = 2; + + if (common->capture_last_ptr != 0 && !capture_last_found) + { + shared_srcw[2] = common->capture_last_ptr; + shared_count = 3; + capture_last_found = TRUE; + } + + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + { + private_count = 1; + private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + } + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + offset = (GET2(cc, 1 + LINK_SIZE)) << 1; + shared_srcw[0] = OVECTOR(offset); + shared_srcw[1] = OVECTOR(offset + 1); + shared_count = 2; + + if (common->capture_last_ptr != 0 && !capture_last_found) + { + shared_srcw[2] = common->capture_last_ptr; + shared_count = 3; + capture_last_found = TRUE; + } + + private_count = 2; + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + cc += 1 + LINK_SIZE + IMM2_SIZE; + break; + + case OP_COND: + /* Might be a hidden SCOND. */ + alternative = cc + GET(cc, 1); + if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); + } + cc += 1 + LINK_SIZE; + break; + + CASE_ITERATOR_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc)) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); + } + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc)) + { + private_count = 2; + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + } + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc)) + { + private_count = 2; + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + } + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_1 + if (PRIVATE_DATA(cc)) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); + } + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + if (PRIVATE_DATA(cc)) + { + private_count = 2; + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + } + cc += 1; + break; + + CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + if (PRIVATE_DATA(cc)) + { + private_count = 2; + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + } + cc += 1 + IMM2_SIZE; + break; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#else + i = 1 + 32 / (int)sizeof(PCRE2_UCHAR); +#endif + if (PRIVATE_DATA(cc) != 0) + switch(get_class_iterator_size(cc + i)) + { + case 1: + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); + break; + + case 2: + private_count = 2; + private_srcw[0] = PRIVATE_DATA(cc); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + cc += i; + break; + + case OP_MARK: + case OP_COMMIT_ARG: + case OP_PRUNE_ARG: + case OP_THEN_ARG: + SLJIT_ASSERT(common->mark_ptr != 0); + if (has_quit && !setmark_found) + { + kept_shared_srcw[0] = common->mark_ptr; + kept_shared_count = 1; + setmark_found = TRUE; + } + if (common->control_head_ptr != 0 && !control_head_found) + { + private_srcw[0] = common->control_head_ptr; + private_count = 1; + control_head_found = TRUE; + } + cc += 1 + 2 + cc[1]; + break; + + case OP_THEN: + SLJIT_ASSERT(common->control_head_ptr != 0); + if (!control_head_found) + { + private_srcw[0] = common->control_head_ptr; + private_count = 1; + control_head_found = TRUE; + } + cc++; + break; + + default: + cc = next_opcode(common, cc); + SLJIT_ASSERT(cc != NULL); + break; + } + + if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global); + + for (i = 0; i < private_count; i++) + { + SLJIT_ASSERT(private_srcw[i] != 0); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr); + + stackptr += sizeof(sljit_sw); + } + } + else + stackptr += sizeof(sljit_sw) * private_count; + + if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global); + + for (i = 0; i < shared_count; i++) + { + SLJIT_ASSERT(shared_srcw[i] != 0); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr); + + stackptr += sizeof(sljit_sw); + } + } + else + stackptr += sizeof(sljit_sw) * shared_count; + + if (type != recurse_copy_private_to_global && type != recurse_swap_global) + { + SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global); + + for (i = 0; i < kept_shared_count; i++) + { + SLJIT_ASSERT(kept_shared_srcw[i] != 0); + + if (!from_sp) + delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]); + + if (from_sp || type == recurse_swap_global) + delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr); + + stackptr += sizeof(sljit_sw); + } + } + else + stackptr += sizeof(sljit_sw) * kept_shared_count; + } + +SLJIT_ASSERT(cc == ccend && stackptr == stacktop); + +delayed_mem_copy_finish(&status); +} + +static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset) +{ +PCRE2_SPTR end = bracketend(cc); +BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; + +/* Assert captures then. */ +if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) + current_offset = NULL; +/* Conditional block does not. */ +if (*cc == OP_COND || *cc == OP_SCOND) + has_alternatives = FALSE; + +cc = next_opcode(common, cc); +if (has_alternatives) + current_offset = common->then_offsets + (cc - common->start); + +while (cc < end) + { + if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) + cc = set_then_offsets(common, cc, current_offset); + else + { + if (*cc == OP_ALT && has_alternatives) + current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); + if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) + *current_offset = 1; + cc = next_opcode(common, cc); + } + } + +return end; +} + +#undef CASE_ITERATOR_PRIVATE_DATA_1 +#undef CASE_ITERATOR_PRIVATE_DATA_2A +#undef CASE_ITERATOR_PRIVATE_DATA_2B +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A +#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + +static SLJIT_INLINE BOOL is_powerof2(unsigned int value) +{ +return (value & (value - 1)) == 0; +} + +static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) +{ +while (list) + { + /* sljit_set_label is clever enough to do nothing + if either the jump or the label is NULL. */ + SET_LABEL(list->jump, label); + list = list->next; + } +} + +static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump) +{ +jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); +if (list_item) + { + list_item->next = *list; + list_item->jump = jump; + *list = list_item; + } +} + +static void add_stub(compiler_common *common, struct sljit_jump *start) +{ +DEFINE_COMPILER; +stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); + +if (list_item) + { + list_item->start = start; + list_item->quit = LABEL(); + list_item->next = common->stubs; + common->stubs = list_item; + } +} + +static void flush_stubs(compiler_common *common) +{ +DEFINE_COMPILER; +stub_list *list_item = common->stubs; + +while (list_item) + { + JUMPHERE(list_item->start); + add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); + JUMPTO(SLJIT_JUMP, list_item->quit); + list_item = list_item->next; + } +common->stubs = NULL; +} + +static SLJIT_INLINE void count_match(compiler_common *common) +{ +DEFINE_COMPILER; + +OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); +add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO)); +} + +static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) +{ +/* May destroy all locals and registers except TMP2. */ +DEFINE_COMPILER; + +SLJIT_ASSERT(size > 0); +OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +#ifdef DESTROY_REGISTERS +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); +OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); +OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); +#endif +add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0)); +} + +static SLJIT_INLINE void free_stack(compiler_common *common, int size) +{ +DEFINE_COMPILER; + +SLJIT_ASSERT(size > 0); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +} + +static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) +{ +DEFINE_COMPILER; +sljit_uw *result; + +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + +result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data); +if (SLJIT_UNLIKELY(result == NULL)) + { + sljit_set_compiler_memory_error(compiler); + return NULL; + } + +*(void**)result = common->read_only_data_head; +common->read_only_data_head = (void *)result; +return result + 1; +} + +static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +sljit_s32 i; + +/* At this point we can freely use all temporary registers. */ +SLJIT_ASSERT(length > 1); +/* TMP1 returns with begin - 1. */ +OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); +if (length < 8) + { + for (i = 1; i < length; i++) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0); + } +else + { + if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) + { + GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); + loop = LABEL(); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + else + { + GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); + loop = LABEL(); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); + OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + } +} + +static SLJIT_INLINE void reset_early_fail(compiler_common *common) +{ +DEFINE_COMPILER; +sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr); +sljit_u32 uncleared_size; +sljit_s32 src = SLJIT_IMM; +sljit_s32 i; +struct sljit_label *loop; + +SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr); + +if (size == sizeof(sljit_sw)) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0); + return; + } + +if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER)) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); + src = TMP3; + } + +if (size <= 6 * sizeof(sljit_sw)) + { + for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0); + return; + } + +GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr); + +uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw); + +OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size); + +loop = LABEL(); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0); +CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop); + +if (uncleared_size >= sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); + +if (uncleared_size >= 2 * sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0); +} + +static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +int i; + +SLJIT_ASSERT(length > 1); +/* OVECTOR(1) contains the "string begin - 1" constant. */ +if (length > 2) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); +if (length < 8) + { + for (i = 2; i < length; i++) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0); + } +else + { + if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) + { + GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); + loop = LABEL(); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + else + { + GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); + loop = LABEL(); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); + } + } + +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack)); +else + OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); + +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); +if (common->control_head_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); +if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end)); +} + +static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg) +{ +while (current != NULL) + { + switch (current[1]) + { + case type_then_trap: + break; + + case type_mark: + if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0) + return current[3]; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]); + current = (sljit_sw*)current[0]; + } +return 0; +} + +static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +BOOL has_pre; + +/* At this point we can freely use all registers. */ +OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); + OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data), + SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); + } +else + { + OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data)); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount)); + OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0); + OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); + } + +has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; + +GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0)); +OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + +loop = LABEL(); + +if (has_pre) + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); +else + { + OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0); + OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw)); + } + +OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE)); +OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0); +/* Copy the integer value to the output buffer */ +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + +SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8); +OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0); + +OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); +JUMPTO(SLJIT_NOT_ZERO, loop); + +/* Calculate the return value, which is the maximum ovector value. */ +if (topbracket > 1) + { + if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS) + { + GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); + + /* OVECTOR(0) is never equal to SLJIT_S2. */ + loop = LABEL(); + sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); + OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); + } + else + { + GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); + + /* OVECTOR(0) is never equal to SLJIT_S2. */ + loop = LABEL(); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0); + OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw)); + OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); + } + } +else + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); +} + +static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) +{ +DEFINE_COMPILER; +sljit_s32 mov_opcode; +sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1; + +SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0); +SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 + && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0)); + +if (arguments_reg != ARGUMENTS) + OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), + common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr); +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL); + +/* Store match begin and end. */ +OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0); +OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data)); + +mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; + +OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0); + +OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif +OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0); + +JUMPTO(SLJIT_JUMP, quit); +} + +static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + /* The value of -1 must be kept for start_used_ptr! */ + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1); + /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting + is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ + jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +else if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +} + +static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc) +{ +/* Detects if the character has an othercase. */ +unsigned int c; + +#ifdef SUPPORT_UNICODE +if (common->utf || common->ucp) + { + if (common->utf) + { + GETCHAR(c, cc); + } + else + c = *cc; + + if (c > 127) + return c != UCD_OTHERCASE(c); + + return common->fcc[c] != c; + } +else +#endif + c = *cc; +return MAX_255(c) ? common->fcc[c] != c : FALSE; +} + +static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) +{ +/* Returns with the othercase. */ +#ifdef SUPPORT_UNICODE +if ((common->utf || common->ucp) && c > 127) + return UCD_OTHERCASE(c); +#endif +return TABLE_GET(c, common->fcc, c); +} + +static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc) +{ +/* Detects if the character and its othercase has only 1 bit difference. */ +unsigned int c, oc, bit; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +int n; +#endif + +#ifdef SUPPORT_UNICODE +if (common->utf || common->ucp) + { + if (common->utf) + { + GETCHAR(c, cc); + } + else + c = *cc; + + if (c <= 127) + oc = common->fcc[c]; + else + oc = UCD_OTHERCASE(c); + } +else + { + c = *cc; + oc = TABLE_GET(c, common->fcc, c); + } +#else +c = *cc; +oc = TABLE_GET(c, common->fcc, c); +#endif + +SLJIT_ASSERT(c != oc); + +bit = c ^ oc; +/* Optimized for English alphabet. */ +if (c <= 127 && bit == 0x20) + return (0 << 8) | 0x20; + +/* Since c != oc, they must have at least 1 bit difference. */ +if (!is_powerof2(bit)) + return 0; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + +#ifdef SUPPORT_UNICODE +if (common->utf && c > 127) + { + n = GET_EXTRALEN(*cc); + while ((bit & 0x3f) == 0) + { + n--; + bit >>= 6; + } + return (n << 8) | bit; + } +#endif /* SUPPORT_UNICODE */ +return (0 << 8) | bit; + +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + +#ifdef SUPPORT_UNICODE +if (common->utf && c > 65535) + { + if (bit >= (1u << 10)) + bit >>= 10; + else + return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); + } +#endif /* SUPPORT_UNICODE */ +return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8)); + +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +} + +static void check_partial(compiler_common *common, BOOL force) +{ +/* Checks whether a partial matching is occurred. Does not modify registers. */ +DEFINE_COMPILER; +struct sljit_jump *jump = NULL; + +SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE); + +if (common->mode == PCRE2_JIT_COMPLETE) + return; + +if (!force && !common->allow_empty_partial) + jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); +else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); + +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } + +if (jump != NULL) + JUMPHERE(jump); +} + +static void check_str_end(compiler_common *common, jump_list **end_reached) +{ +/* Does not affect registers. Usually used in a tight spot. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == PCRE2_JIT_COMPLETE) + { + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + return; + } + +jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); + } +else + { + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); +} + +static void detect_partial_match(compiler_common *common, jump_list **backtracks) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (common->mode == PCRE2_JIT_COMPLETE) + { + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + return; + } + +/* Partial matching mode. */ +jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); +if (!common->allow_empty_partial) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1)); + +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + } +else + { + if (common->partialmatchlabel != NULL) + JUMPTO(SLJIT_JUMP, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); + } +JUMPHERE(jump); +} + +static void process_partial_match(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +/* Partial matching mode. */ +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + JUMPHERE(jump); + } +else if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + if (common->partialmatchlabel != NULL) + CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + } +} + +static void detect_partial_match_to(compiler_common *common, struct sljit_label *label) +{ +DEFINE_COMPILER; + +CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label); +process_partial_match(common); +} + +static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks) +{ +/* Reads the character into TMP1, keeps STR_PTR. +Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */ +DEFINE_COMPILER; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ + +SLJIT_UNUSED_ARG(max); +SLJIT_UNUSED_ARG(dst); +SLJIT_UNUSED_ARG(dstw); +SLJIT_UNUSED_ARG(backtracks); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + if (max < 128) return; + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw); + if (backtracks && common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + if (max < 0xd800) return; + + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + + if (common->invalid_utf) + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw); + if (backtracks && common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + } + else + { + /* TMP2 contains the high surrogate. */ + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + } + + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf) + { + if (max < 0xd800) return; + + if (backtracks != NULL) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + } + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ +} + +static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks) +{ +/* Reads one character back without moving STR_PTR. TMP2 must +contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */ +DEFINE_COMPILER; + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ + +SLJIT_UNUSED_ARG(max); +SLJIT_UNUSED_ARG(backtracks); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + if (max < 128) return; + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + if (common->invalid_utf) + { + add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + } + else + add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL)); + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + if (max < 0xd800) return; + + if (common->invalid_utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00); + /* TMP2 contains the low surrogate. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + } + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ +} + +#define READ_CHAR_UPDATE_STR_PTR 0x1 +#define READ_CHAR_UTF8_NEWLINE 0x2 +#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE) +#define READ_CHAR_VALID_UTF 0x4 + +static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max, + jump_list **backtracks, sljit_u32 options) +{ +/* Reads the precise value of a character into TMP1, if the character is +between min and max (c >= min && c <= max). Otherwise it returns with a value +outside the range. Does not check STR_END. */ +DEFINE_COMPILER; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +struct sljit_jump *jump2; +#endif + +SLJIT_UNUSED_ARG(min); +SLJIT_UNUSED_ARG(max); +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(options); +SLJIT_ASSERT(min <= max); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return; + + if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF)) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + + if (options & READ_CHAR_UTF8_NEWLINE) + add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL)); + else + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + JUMPHERE(jump); + return; + } + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + if (min >= 0x10000) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); + if (!(options & READ_CHAR_UPDATE_STR_PTR)) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump2); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + else if (min >= 0x800 && max <= 0xffff) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + if (!(options & READ_CHAR_UPDATE_STR_PTR)) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump2); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + else if (max >= 0x800) + { + add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); + } + else if (max < 128) + { + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + } + else + { + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (!(options & READ_CHAR_UPDATE_STR_PTR)) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + else + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return; + + if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF)) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + + if (options & READ_CHAR_UTF8_NEWLINE) + add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL)); + else + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + JUMPHERE(jump); + return; + } + + if (max >= 0x10000) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800); + /* TMP2 contains the high surrogate. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump); + return; + } + + /* Skip low surrogate if necessary. */ + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) + { + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); + if (options & READ_CHAR_UPDATE_STR_PTR) + CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); + if (max >= 0xd800) + CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000); + } + else + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400); + if (options & READ_CHAR_UPDATE_STR_PTR) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (max >= 0xd800) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); + JUMPHERE(jump); + } + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf) + { + if (backtracks != NULL) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + } + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ +} + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + +static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) +{ +/* Tells whether the character codes below 128 are enough +to determine a match. */ +const sljit_u8 value = nclass ? 0xff : 0; +const sljit_u8 *end = bitset + 32; + +bitset += 16; +do + { + if (*bitset++ != value) + return FALSE; + } +while (bitset < end); +return TRUE; +} + +static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated) +{ +/* Reads the precise character type of a character into TMP1, if the character +is less than 128. Otherwise it returns with zero. Does not check STR_END. The +full_read argument tells whether characters above max are accepted or not. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +SLJIT_ASSERT(common->utf); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +/* All values > 127 are zero in ctypes. */ +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + +if (negated) + { + jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80); + + if (common->invalid_utf) + { + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + } + else + { + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + } + JUMPHERE(jump); + } +} + +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ + +static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated) +{ +/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ +DEFINE_COMPILER; +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 +struct sljit_jump *jump; +#endif +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +struct sljit_jump *jump2; +#endif + +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(negated); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { + /* The result of this read may be unused, but saves an "else" part. */ + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80); + + if (!negated) + { + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2)); + + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40)); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + JUMPHERE(jump2); + } + else if (common->invalid_utf) + { + add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + JUMPHERE(jump2); + } + else + add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); + + JUMPHERE(jump); + return; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf && negated) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000)); +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 +/* The ctypes array contains only 256 values. */ +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */ +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +#if PCRE2_CODE_UNIT_WIDTH != 8 +JUMPHERE(jump); +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */ + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf && negated) + { + /* Skip low surrogate if necessary. */ + if (!common->invalid_utf) + { + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) + { + OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); + CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); + } + else + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(jump); + } + return; + } + + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400)); + + JUMPHERE(jump); + return; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */ +} + +static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid) +{ +/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE, +TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer, +and it is destroyed. Does not modify STR_PTR for invalid character sequences. */ +DEFINE_COMPILER; + +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(must_be_valid); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_jump *jump; +#endif + +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +struct sljit_label *label; + +if (common->utf) + { + if (!must_be_valid && common->invalid_utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80); + add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); + JUMPHERE(jump); + return; + } + + label = LABEL(); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); + return; + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + if (!must_be_valid && common->invalid_utf) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800); + add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL)); + if (backtracks != NULL) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); + JUMPHERE(jump); + return; + } + + /* Skip low surrogate if necessary. */ + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + return; + } +#elif PCRE2_CODE_UNIT_WIDTH == 32 +if (common->invalid_utf && !must_be_valid) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + if (backtracks != NULL) + { + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + return; + } + + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + return; + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ +#endif /* SUPPORT_UNICODE */ +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +} + +static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch) +{ +/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +if (nltype == NLTYPE_ANY) + { + add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + } +else if (nltype == NLTYPE_ANYCRLF) + { + if (jumpifmatch) + { + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + JUMPHERE(jump); + } + } +else + { + SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); + add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + } +} + +#ifdef SUPPORT_UNICODE + +#if PCRE2_CODE_UNIT_WIDTH == 8 +static void do_utfreadchar(compiler_common *common) +{ +/* Fast decoding a UTF-8 character. TMP1 contains the first byte +of the character (>= 0xc0). Return char value in TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Searching for the first zero. */ +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); +/* Two byte sequence. */ +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +jump = JUMP(SLJIT_NOT_ZERO); +/* Three byte sequence. */ +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Four byte sequence. */ +JUMPHERE(jump); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadtype8(compiler_common *common) +{ +/* Fast decoding a UTF-8 character type. TMP2 contains the first byte +of the character (>= 0xc0). Return value in TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_jump *compare; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); +jump = JUMP(SLJIT_NOT_ZERO); +/* Two byte sequence. */ +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); +/* The upper 5 bits are known at this point. */ +compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(compare); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* We only have types for characters less than 256. */ +JUMPHERE(jump); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadchar_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-8 character. TMP1 contains the first byte +of the character (>= 0xc0). Return char value in TMP1. STR_PTR is +undefined for invalid characters. */ +DEFINE_COMPILER; +sljit_s32 i; +sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV); +struct sljit_jump *jump; +struct sljit_jump *buffer_end_close; +struct sljit_label *three_byte_entry; +struct sljit_label *exit_invalid_label; +struct sljit_jump *exit_invalid[11]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2); + +/* Usually more than 3 characters remained in the subject buffer. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); + +/* Not a valid start of a multi-byte sequence, no more bytes read. */ +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2); + +buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); + +/* Three-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); + CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000); + exit_invalid[2] = NULL; + } +else + exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +jump = JUMP(SLJIT_NOT_ZERO); + +three_byte_entry = LABEL(); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800); +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800); + exit_invalid[3] = NULL; + } +else + exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + exit_invalid[4] = NULL; + } +else + exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); + +/* Four-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); + CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0); + exit_invalid[5] = NULL; + } +else + exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000); +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); + CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); + exit_invalid[6] = NULL; + } +else + exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(buffer_end_close); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Three-byte sequence. */ +JUMPHERE(jump); +exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); + CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + exit_invalid[10] = NULL; + } +else + exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + +/* One will be substracted from STR_PTR later. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + +/* Four byte sequences are not possible. */ +CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry); + +exit_invalid_label = LABEL(); +for (i = 0; i < 11; i++) + sljit_set_label(exit_invalid[i], exit_invalid_label); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadnewline_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-8 character, specialized for newlines. +TMP1 contains the first byte of the character (>= 0xc0). Return +char value in TMP1. */ +DEFINE_COMPILER; +struct sljit_label *loop; +struct sljit_label *skip_start; +struct sljit_label *three_byte_exit; +struct sljit_jump *jump[5]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +if (common->nltype != NLTYPE_ANY) + { + SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128); + + /* All newlines are ascii, just skip intermediate octets. */ + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + loop = LABEL(); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + JUMPHERE(jump[0]); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); + OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + return; + } + +jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2); +jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2); + +skip_start = LABEL(); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); +jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80); + +/* Skip intermediate octets. */ +loop = LABEL(); +jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); +CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop); + +JUMPHERE(jump[3]); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +three_byte_exit = LABEL(); +JUMPHERE(jump[0]); +JUMPHERE(jump[4]); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Two byte long newline: 0x85. */ +JUMPHERE(jump[1]); +CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Three byte long newlines: 0x2028 and 0x2029. */ +JUMPHERE(jump[2]); +CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start); +CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80); +CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfmoveback_invalid(compiler_common *common) +{ +/* Goes one character back. */ +DEFINE_COMPILER; +sljit_s32 i; +struct sljit_jump *jump; +struct sljit_jump *buffer_start_close; +struct sljit_label *exit_ok_label; +struct sljit_label *exit_invalid_label; +struct sljit_jump *exit_invalid[7]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); + +/* Two-byte sequence. */ +buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); +jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Three-byte sequence. */ +JUMPHERE(jump); +exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0); +jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Four-byte sequence. */ +JUMPHERE(jump); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0); +exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05); + +exit_ok_label = LABEL(); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +/* Two-byte sequence. */ +JUMPHERE(buffer_start_close); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + +exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); +CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label); + +/* Three-byte sequence. */ +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40); +exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0); +CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label); + +/* Four-byte sequences are not possible. */ + +exit_invalid_label = LABEL(); +sljit_set_label(exit_invalid[5], exit_invalid_label); +sljit_set_label(exit_invalid[6], exit_invalid_label); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[4]); +/* -2 + 4 = 2 */ +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + +exit_invalid_label = LABEL(); +for (i = 0; i < 4; i++) + sljit_set_label(exit_invalid[i], exit_invalid_label); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4)); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfpeakcharback(compiler_common *common) +{ +/* Peak a character back. Does not modify STR_PTR. */ +DEFINE_COMPILER; +struct sljit_jump *jump[2]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); +jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0); +jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +JUMPHERE(jump[1]); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +JUMPHERE(jump[0]); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfpeakcharback_invalid(compiler_common *common) +{ +/* Peak a character back. Does not modify STR_PTR. */ +DEFINE_COMPILER; +sljit_s32 i; +sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV); +struct sljit_jump *jump[2]; +struct sljit_label *two_byte_entry; +struct sljit_label *three_byte_entry; +struct sljit_label *exit_invalid_label; +struct sljit_jump *exit_invalid[8]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); +jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); +jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e); + +two_byte_entry = LABEL(); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[1]); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Three-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0); +jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10); + +three_byte_entry = LABEL(); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800); + exit_invalid[2] = NULL; + } +else + exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + exit_invalid[3] = NULL; + } +else + exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[1]); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80); +exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Four-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18); +/* ADD is used instead of OR because of the SUB 0x10000 above. */ +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + +if (has_cmov) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); + CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); + exit_invalid[5] = NULL; + } +else + exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[0]); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); +jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); +CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry); + +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); +exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* Three-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0); +CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump[0]); +exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0); + +/* Two-byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2); +CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry); + +exit_invalid_label = LABEL(); +for (i = 0; i < 8; i++) + sljit_set_label(exit_invalid[i], exit_invalid_label); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + +#if PCRE2_CODE_UNIT_WIDTH == 16 + +static void do_utfreadchar_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-16 character. TMP1 contains the first half +of the character (>= 0xd800). Return char value in TMP1. STR_PTR is +undefined for invalid characters. */ +DEFINE_COMPILER; +struct sljit_jump *exit_invalid[3]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +/* TMP2 contains the high surrogate. */ +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +JUMPHERE(exit_invalid[2]); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfreadnewline_invalid(compiler_common *common) +{ +/* Slow decoding a UTF-16 character, specialized for newlines. +TMP1 contains the first half of the character (>= 0xd800). Return +char value in TMP1. */ + +DEFINE_COMPILER; +struct sljit_jump *exit_invalid[2]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +/* TMP2 contains the high surrogate. */ +exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); + +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00); +OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfmoveback_invalid(compiler_common *common) +{ +/* Goes one character back. */ +DEFINE_COMPILER; +struct sljit_jump *exit_invalid[3]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +JUMPHERE(exit_invalid[2]); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_utfpeakcharback_invalid(compiler_common *common) +{ +/* Peak a character back. Does not modify STR_PTR. */ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_jump *exit_invalid[3]; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); +exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); +exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00); +OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); +exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + +JUMPHERE(jump); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(exit_invalid[0]); +JUMPHERE(exit_invalid[1]); +JUMPHERE(exit_invalid[2]); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */ + +/* UCD_BLOCK_SIZE must be 128 (see the assert below). */ +#define UCD_BLOCK_MASK 127 +#define UCD_BLOCK_SHIFT 7 + +static void do_getucd(compiler_common *common) +{ +/* Search the UCD record for the character comes in TMP1. +Returns chartype in TMP1 and UCD offset in TMP2. */ +DEFINE_COMPILER; +#if PCRE2_CODE_UNIT_WIDTH == 32 +struct sljit_jump *jump; +#endif + +#if defined SLJIT_DEBUG && SLJIT_DEBUG +/* dummy_ucd_record */ +const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR); +SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther); +SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); +#endif + +SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); + JUMPHERE(jump); + } +#endif + +OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_getucdtype(compiler_common *common) +{ +/* Search the UCD record for the character comes in TMP1. +Returns chartype in TMP1 and UCD offset in TMP2. */ +DEFINE_COMPILER; +#if PCRE2_CODE_UNIT_WIDTH == 32 +struct sljit_jump *jump; +#endif + +#if defined SLJIT_DEBUG && SLJIT_DEBUG +/* dummy_ucd_record */ +const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR); +SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther); +SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); +#endif + +SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (!common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); + JUMPHERE(jump); + } +#endif + +OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + +/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */ +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +#endif /* SUPPORT_UNICODE */ + +static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *mainloop; +struct sljit_label *newlinelabel = NULL; +struct sljit_jump *start; +struct sljit_jump *end = NULL; +struct sljit_jump *end2 = NULL; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *loop; +struct sljit_jump *jump; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ +jump_list *newline = NULL; +sljit_u32 overall_options = common->re->overall_options; +BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0; +BOOL newlinecheck = FALSE; +BOOL readuchar = FALSE; + +if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0) + && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) + newlinecheck = TRUE; + +SLJIT_ASSERT(common->abort_label == NULL); + +if ((overall_options & PCRE2_FIRSTLINE) != 0) + { + /* Search for the end of the first line. */ + SLJIT_ASSERT(common->match_end_ptr != 0); + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + mainloop = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); + JUMPHERE(end); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + else + { + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + mainloop = LABEL(); + /* Continual stores does not cause data dependency. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); + read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); + check_newlinechar(common, common->nltype, &newline, TRUE); + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop); + JUMPHERE(end); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); + set_jumps(newline, LABEL()); + } + + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + } +else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0) + { + /* Check whether offset limit is set and valid. */ + SLJIT_ASSERT(common->match_end_ptr != 0); + + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + + OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); + end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */ + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); + JUMPHERE(end2); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0)); + JUMPHERE(end); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0); + } + +start = JUMP(SLJIT_JUMP); + +if (newlinecheck) + { + newlinelabel = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + end2 = JUMP(SLJIT_JUMP); + } + +mainloop = LABEL(); + +/* Increasing the STR_PTR here requires one less jump in the most common case. */ +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && !common->invalid_utf) readuchar = TRUE; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ +if (newlinecheck) readuchar = TRUE; + +if (readuchar) + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + +if (newlinecheck) + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->invalid_utf) + { + /* Skip continuation code units. */ + loop = LABEL(); + jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(jump); + } +else if (common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(jump); + } +#elif PCRE2_CODE_UNIT_WIDTH == 16 +if (common->invalid_utf) + { + /* Skip continuation code units. */ + loop = LABEL(); + jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(jump); + } +else if (common->utf) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); + CMOV(SLJIT_LESS, STR_PTR, TMP2, 0); + } + else + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + } + } +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */ +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ +JUMPHERE(start); + +if (newlinecheck) + { + JUMPHERE(end); + JUMPHERE(end2); + } + +return mainloop; +} + + +static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last) +{ +sljit_u32 i, count = chars->count; + +if (count == 255) + return; + +if (count == 0) + { + chars->count = 1; + chars->chars[0] = chr; + + if (last) + chars->last_count = 1; + return; + } + +for (i = 0; i < count; i++) + if (chars->chars[i] == chr) + return; + +if (count >= MAX_DIFF_CHARS) + { + chars->count = 255; + return; + } + +chars->chars[count] = chr; +chars->count = count + 1; + +if (last) + chars->last_count++; +} + +static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count) +{ +/* Recursive function, which scans prefix literals. */ +BOOL last, any, class, caseless; +int len, repeat, len_save, consumed = 0; +sljit_u32 chr; /* Any unicode character. */ +sljit_u8 *bytes, *bytes_end, byte; +PCRE2_SPTR alternative, cc_save, oc; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +PCRE2_UCHAR othercase[4]; +#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 +PCRE2_UCHAR othercase[2]; +#else +PCRE2_UCHAR othercase[1]; +#endif + +repeat = 1; +while (TRUE) + { + if (*rec_count == 0) + return 0; + (*rec_count)--; + + last = TRUE; + any = FALSE; + class = FALSE; + caseless = FALSE; + + switch (*cc) + { + case OP_CHARI: + caseless = TRUE; + /* Fall through */ + case OP_CHAR: + last = FALSE; + cc++; + break; + + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + /* Zero width assertions. */ + cc++; + continue; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + cc = bracketend(cc); + continue; + + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + caseless = TRUE; + /* Fall through */ + case OP_PLUS: + case OP_MINPLUS: + case OP_POSPLUS: + cc++; + break; + + case OP_EXACTI: + caseless = TRUE; + /* Fall through */ + case OP_EXACT: + repeat = GET2(cc, 1); + last = FALSE; + cc += 1 + IMM2_SIZE; + break; + + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + caseless = TRUE; + /* Fall through */ + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + len = 1; + cc++; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); +#endif + max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + last = FALSE; + break; + + case OP_KET: + cc += 1 + LINK_SIZE; + continue; + + case OP_ALT: + cc += GET(cc, 1); + continue; + + case OP_ONCE: + case OP_BRA: + case OP_BRAPOS: + case OP_CBRA: + case OP_CBRAPOS: + alternative = cc + GET(cc, 1); + while (*alternative == OP_ALT) + { + max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + alternative += GET(alternative, 1); + } + + if (*cc == OP_CBRA || *cc == OP_CBRAPOS) + cc += IMM2_SIZE; + cc += 1 + LINK_SIZE; + continue; + + case OP_CLASS: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) + return consumed; +#endif + class = TRUE; + break; + + case OP_NCLASS: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + class = TRUE; + break; + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc += GET(cc, 1); + break; +#endif + + case OP_DIGIT: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_WHITESPACE: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_WORDCHAR: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_NOT: + case OP_NOTI: + cc++; + /* Fall through. */ + case OP_NOT_DIGIT: + case OP_NOT_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_ANY: + case OP_ALLANY: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc++; + break; + +#ifdef SUPPORT_UNICODE + case OP_NOTPROP: + case OP_PROP: +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc += 1 + 2; + break; +#endif + + case OP_TYPEEXACT: + repeat = GET2(cc, 1); + cc += 1 + IMM2_SIZE; + continue; + + case OP_NOTEXACT: + case OP_NOTEXACTI: +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) return consumed; +#endif + any = TRUE; + repeat = GET2(cc, 1); + cc += 1 + IMM2_SIZE + 1; + break; + + default: + return consumed; + } + + if (any) + { + do + { + chars->count = 255; + + consumed++; + if (--max_chars == 0) + return consumed; + chars++; + } + while (--repeat > 0); + + repeat = 1; + continue; + } + + if (class) + { + bytes = (sljit_u8*) (cc + 1); + cc += 1 + 32 / sizeof(PCRE2_UCHAR); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + break; + + default: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + repeat = GET2(cc, 1); + if (repeat <= 0) + return consumed; + break; + } + + do + { + if (bytes[31] & 0x80) + chars->count = 255; + else if (chars->count != 255) + { + bytes_end = bytes + 32; + chr = 0; + do + { + byte = *bytes++; + SLJIT_ASSERT((chr & 0x7) == 0); + if (byte == 0) + chr += 8; + else + { + do + { + if ((byte & 0x1) != 0) + add_prefix_char(chr, chars, TRUE); + byte >>= 1; + chr++; + } + while (byte != 0); + chr = (chr + 7) & ~7; + } + } + while (chars->count != 255 && bytes < bytes_end); + bytes = bytes_end - 32; + } + + consumed++; + if (--max_chars == 0) + return consumed; + chars++; + } + while (--repeat > 0); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + return consumed; + + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) + return consumed; + cc += 1 + 2 * IMM2_SIZE; + break; + } + + repeat = 1; + continue; + } + + len = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); +#endif + + if (caseless && char_has_othercase(common, cc)) + { +#ifdef SUPPORT_UNICODE + if (common->utf) + { + GETCHAR(chr, cc); + if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) + return consumed; + } + else +#endif + { + chr = *cc; +#ifdef SUPPORT_UNICODE + if (common->ucp && chr > 127) + othercase[0] = UCD_OTHERCASE(chr); + else +#endif + othercase[0] = TABLE_GET(chr, common->fcc, chr); + } + } + else + { + caseless = FALSE; + othercase[0] = 0; /* Stops compiler warning - PH */ + } + + len_save = len; + cc_save = cc; + while (TRUE) + { + oc = othercase; + do + { + len--; + consumed++; + + chr = *cc; + add_prefix_char(*cc, chars, len == 0); + + if (caseless) + add_prefix_char(*oc, chars, len == 0); + + if (--max_chars == 0) + return consumed; + chars++; + cc++; + oc++; + } + while (len > 0); + + if (--repeat == 0) + break; + + len = len_save; + cc = cc_save; + } + + repeat = 1; + if (last) + return consumed; + } +} + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); +CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); +CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label); +#else +#error "Unknown code width" +#endif +} +#endif + +#include "pcre2_jit_simd_inc.h" + +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD + +static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max) +{ + sljit_s32 i, j, max_i = 0, max_j = 0; + sljit_u32 max_pri = 0; + PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri; + + for (i = max - 1; i >= 1; i--) + { + if (chars[i].last_count > 2) + { + a1 = chars[i].chars[0]; + a2 = chars[i].chars[1]; + a_pri = chars[i].last_count; + + j = i - max_fast_forward_char_pair_offset(); + if (j < 0) + j = 0; + + while (j < i) + { + b_pri = chars[j].last_count; + if (b_pri > 2 && a_pri + b_pri >= max_pri) + { + b1 = chars[j].chars[0]; + b2 = chars[j].chars[1]; + + if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2) + { + max_pri = a_pri + b_pri; + max_i = i; + max_j = j; + } + } + j++; + } + } + } + +if (max_pri == 0) + return FALSE; + +fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]); +return TRUE; +} + +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */ + +static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *match; +struct sljit_jump *partial_quit; +PCRE2_UCHAR mask; +BOOL has_match_end = (common->match_end_ptr != 0); + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0); + +if (has_match_end) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + +if (offset > 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + +if (has_match_end) + { + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); + CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + } + +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD + +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD) + { + fast_forward_char_simd(common, char1, char2, offset); + + if (offset > 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + + if (has_match_end) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + return; + } + +#endif + +start = LABEL(); + +partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (char1 == char2) + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start); +else + { + mask = char1 ^ char2; + if (is_powerof2(mask)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start); + } + else + { + match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start); + JUMPHERE(match); + } + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1))); + jumpto_if_not_utf_char_start(compiler, TMP1, start); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); + +if (common->mode != PCRE2_JIT_COMPLETE) + JUMPHERE(partial_quit); + +if (has_match_end) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *match; +fast_forward_char_data chars[MAX_N_CHARS]; +sljit_s32 offset; +PCRE2_UCHAR mask; +PCRE2_UCHAR *char_set, *char_set_end; +int i, max, from; +int range_right = -1, range_len; +sljit_u8 *update_table = NULL; +BOOL in_range; +sljit_u32 rec_count; + +for (i = 0; i < MAX_N_CHARS; i++) + { + chars[i].count = 0; + chars[i].last_count = 0; + } + +rec_count = 10000; +max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); + +if (max < 1) + return FALSE; + +/* Convert last_count to priority. */ +for (i = 0; i < max; i++) + { + SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count); + + if (chars[i].count == 1) + { + chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5; + /* Simplifies algorithms later. */ + chars[i].chars[1] = chars[i].chars[0]; + } + else if (chars[i].count == 2) + { + SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]); + + if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1])) + chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4; + else + chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2; + } + else + chars[i].last_count = (chars[i].count == 255) ? 0 : 1; + } + +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max)) + return TRUE; +#endif + +in_range = FALSE; +/* Prevent compiler "uninitialized" warning */ +from = 0; +range_len = 4 /* minimum length */ - 1; +for (i = 0; i <= max; i++) + { + if (in_range && (i - from) > range_len && (chars[i - 1].count < 255)) + { + range_len = i - from; + range_right = i - 1; + } + + if (i < max && chars[i].count < 255) + { + SLJIT_ASSERT(chars[i].count > 0); + if (!in_range) + { + in_range = TRUE; + from = i; + } + } + else + in_range = FALSE; + } + +if (range_right >= 0) + { + update_table = (sljit_u8 *)allocate_read_only_data(common, 256); + if (update_table == NULL) + return TRUE; + memset(update_table, IN_UCHARS(range_len), 256); + + for (i = 0; i < range_len; i++) + { + SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255); + + char_set = chars[range_right - i].chars; + char_set_end = char_set + chars[range_right - i].count; + do + { + if (update_table[(*char_set) & 0xff] > IN_UCHARS(i)) + update_table[(*char_set) & 0xff] = IN_UCHARS(i); + char_set++; + } + while (char_set < char_set_end); + } + } + +offset = -1; +/* Scan forward. */ +for (i = 0; i < max; i++) + { + if (range_right == i) + continue; + + if (offset == -1) + { + if (chars[i].last_count >= 2) + offset = i; + } + else if (chars[offset].last_count < chars[i].last_count) + offset = i; + } + +SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2)); + +if (range_right < 0) + { + if (offset < 0) + return FALSE; + /* Works regardless the value is 1 or 2. */ + fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset); + return TRUE; + } + +SLJIT_ASSERT(range_right != offset); + +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); + CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + } +else + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); + } + +SLJIT_ASSERT(range_right >= 0); + +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); + +start = LABEL(); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + +#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); +#else +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); +#endif + +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); +else + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); + +if (offset >= 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + if (chars[offset].count == 1) + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start); + else + { + mask = chars[offset].chars[0] ^ chars[offset].chars[1]; + if (is_powerof2(mask)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start); + } + else + { + match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start); + JUMPHERE(match); + } + } + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset != 0) + { + if (offset < 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + else + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + + jumpto_if_not_utf_char_start(compiler, TMP1, start); + + if (offset < 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +#endif + +if (offset >= 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +else + OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +return TRUE; +} + +static SLJIT_INLINE void fast_forward_first_char(compiler_common *common) +{ +PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit); +PCRE2_UCHAR oc; + +oc = first_char; +if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0) + { + oc = TABLE_GET(first_char, common->fcc, first_char); +#if defined SUPPORT_UNICODE + if (first_char > 127 && (common->utf || common->ucp)) + oc = UCD_OTHERCASE(first_char); +#endif + } + +fast_forward_first_char2(common, first_char, oc, 0); +} + +static SLJIT_INLINE void fast_forward_newline(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +struct sljit_jump *lastchar; +struct sljit_jump *firstchar; +struct sljit_jump *quit; +struct sljit_jump *foundcr = NULL; +struct sljit_jump *notfoundnl; +jump_list *newline = NULL; + +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + } + +if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + } + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + + loop = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); + + JUMPHERE(quit); + JUMPHERE(firstchar); + JUMPHERE(lastchar); + + if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + return; + } + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + +/* Example: match /^/ to \r\n from offset 1. */ +firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); +move_back(common, NULL, FALSE); + +loop = LABEL(); +common->ff_newline_shortcut = loop; + +read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); +lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) + foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); +check_newlinechar(common, common->nltype, &newline, FALSE); +set_jumps(newline, loop); + +if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) + { + quit = JUMP(SLJIT_JUMP); + JUMPHERE(foundcr); + notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(notfoundnl); + JUMPHERE(quit); + } +JUMPHERE(lastchar); +JUMPHERE(firstchar); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); + +static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common) +{ +DEFINE_COMPILER; +const sljit_u8 *start_bits = common->re->start_bitmap; +struct sljit_label *start; +struct sljit_jump *partial_quit; +#if PCRE2_CODE_UNIT_WIDTH != 8 +struct sljit_jump *found = NULL; +#endif +jump_list *matches = NULL; + +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); + CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + } + +start = LABEL(); + +partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches)) + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + if ((start_bits[31] & 0x80) != 0) + found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255); + else + CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start); +#elif defined SUPPORT_UNICODE + if (common->utf && is_char7_bitset(start_bits, FALSE)) + CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start); +#endif + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); + if (!HAS_VIRTUAL_REGISTERS) + { + OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0); + } + else + { + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + } + JUMPTO(SLJIT_ZERO, start); + } +else + set_jumps(matches, start); + +#if PCRE2_CODE_UNIT_WIDTH != 8 +if (found != NULL) + JUMPHERE(found); +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (common->mode != PCRE2_JIT_COMPLETE) + JUMPHERE(partial_quit); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); +} + +static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar) +{ +DEFINE_COMPILER; +struct sljit_label *loop; +struct sljit_jump *toolong; +struct sljit_jump *already_found; +struct sljit_jump *found; +struct sljit_jump *found_oc = NULL; +jump_list *not_found = NULL; +sljit_u32 oc, bit; + +SLJIT_ASSERT(common->req_char_ptr != 0); +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); +toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0); +already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0); + +if (has_firstchar) + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +else + OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); + +oc = req_char; +if (caseless) + { + oc = TABLE_GET(req_char, common->fcc, req_char); +#if defined SUPPORT_UNICODE + if (req_char > 127 && (common->utf || common->ucp)) + oc = UCD_OTHERCASE(req_char); +#endif + } + +#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD +if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD) + { + not_found = fast_requested_char_simd(common, req_char, oc); + } +else +#endif + { + loop = LABEL(); + add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); + + if (req_char == oc) + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + else + { + bit = req_char ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); + } + else + { + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); + } + } + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_JUMP, loop); + + JUMPHERE(found); + if (found_oc) + JUMPHERE(found_oc); + } + +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); + +JUMPHERE(already_found); +JUMPHERE(toolong); +return not_found; +} + +static void do_revertframes(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *mainloop; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +GET_LOCAL_BASE(TMP1, 0, 0); + +/* Drop frames until we reach STACK_TOP. */ +mainloop = LABEL(); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw)); +jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0); + +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + } +else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); + GET_LOCAL_BASE(TMP1, 0, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0); + } +JUMPTO(SLJIT_JUMP, mainloop); + +JUMPHERE(jump); +jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0); +/* End of reverting values. */ +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); + +JUMPHERE(jump); +OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + } +else + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0); + } +JUMPTO(SLJIT_JUMP, mainloop); +} + +static void check_wordboundary(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *skipread; +jump_list *skipread_list = NULL; +#ifdef SUPPORT_UNICODE +struct sljit_label *valid_utf; +jump_list *invalid_utf1 = NULL; +#endif /* SUPPORT_UNICODE */ +jump_list *invalid_utf2 = NULL; +#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE +struct sljit_jump *jump; +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */ + +SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); + +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +/* Get type of the previous char, and put it to TMP3. */ +OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); +skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + +#ifdef SUPPORT_UNICODE +if (common->invalid_utf) + { + peek_char_back(common, READ_CHAR_MAX, &invalid_utf1); + + if (common->mode != PCRE2_JIT_COMPLETE) + { + OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + move_back(common, NULL, TRUE); + check_start_used_ptr(common); + OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + } + } +else +#endif /* SUPPORT_UNICODE */ + { + if (common->mode == PCRE2_JIT_COMPLETE) + peek_char_back(common, READ_CHAR_MAX, NULL); + else + { + move_back(common, NULL, TRUE); + check_start_used_ptr(common); + read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR); + } + } + +/* Testing char type. */ +#ifdef SUPPORT_UNICODE +if (common->ucp) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); + jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); + add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + JUMPHERE(jump); + OP1(SLJIT_MOV, TMP3, 0, TMP2, 0); + } +else +#endif /* SUPPORT_UNICODE */ + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#elif defined SUPPORT_UNICODE + /* Here TMP3 has already been zeroed. */ + jump = NULL; + if (common->utf) + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); + OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1); +#if PCRE2_CODE_UNIT_WIDTH != 8 + JUMPHERE(jump); +#elif defined SUPPORT_UNICODE + if (jump != NULL) + JUMPHERE(jump); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + } +JUMPHERE(skipread); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); +check_str_end(common, &skipread_list); +peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2); + +/* Testing char type. This is a code duplication. */ +#ifdef SUPPORT_UNICODE + +valid_utf = LABEL(); + +if (common->ucp) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); + jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); + add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + JUMPHERE(jump); + } +else +#endif /* SUPPORT_UNICODE */ + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + /* TMP2 may be destroyed by peek_char. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#elif defined SUPPORT_UNICODE + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); + jump = NULL; + if (common->utf) + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); +#endif + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +#if PCRE2_CODE_UNIT_WIDTH != 8 + JUMPHERE(jump); +#elif defined SUPPORT_UNICODE + if (jump != NULL) + JUMPHERE(jump); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + } +set_jumps(skipread_list, LABEL()); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + +#ifdef SUPPORT_UNICODE +if (common->invalid_utf) + { + set_jumps(invalid_utf1, LABEL()); + + peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1); + OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + + set_jumps(invalid_utf2, LABEL()); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP2, 0, TMP3, 0); + OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + } +#endif /* SUPPORT_UNICODE */ +} + +static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +int ranges[MAX_CLASS_RANGE_SIZE]; +sljit_u8 bit, cbit, all; +int i, byte, length = 0; + +bit = bits[0] & 0x1; +/* All bits will be zero or one (since bit is zero or one). */ +all = -bit; + +for (i = 0; i < 256; ) + { + byte = i >> 3; + if ((i & 0x7) == 0 && bits[byte] == all) + i += 8; + else + { + cbit = (bits[byte] >> (i & 0x7)) & 0x1; + if (cbit != bit) + { + if (length >= MAX_CLASS_RANGE_SIZE) + return FALSE; + ranges[length] = i; + length++; + bit = cbit; + all = -cbit; + } + i++; + } + } + +if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) + { + if (length >= MAX_CLASS_RANGE_SIZE) + return FALSE; + ranges[length] = 256; + length++; + } + +if (length < 0 || length > 4) + return FALSE; + +bit = bits[0] & 0x1; +if (invert) bit ^= 0x1; + +/* No character is accepted. */ +if (length == 0 && bit == 0) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + +switch(length) + { + case 0: + /* When bit != 0, all characters are accepted. */ + return TRUE; + + case 1: + add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + + case 2: + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + } + else + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + + case 3: + if (bit != 0) + { + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + } + + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); + if (ranges[1] + 1 != ranges[2]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); + return TRUE; + + case 4: + if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) + && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] + && (ranges[1] & (ranges[2] - ranges[0])) == 0 + && is_powerof2(ranges[2] - ranges[0])) + { + SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); + if (ranges[2] + 1 != ranges[3]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); + } + else + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + return TRUE; + } + + if (bit != 0) + { + i = 0; + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + i = ranges[0]; + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + + if (ranges[2] + 1 != ranges[3]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); + return TRUE; + } + + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); + if (ranges[1] + 1 != ranges[2]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + return TRUE; + + default: + SLJIT_UNREACHABLE(); + return FALSE; + } +} + +static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) +{ +/* May destroy TMP1. */ +DEFINE_COMPILER; +uint16_t char_list[MAX_CLASS_CHARS_SIZE]; +uint8_t byte; +sljit_s32 type; +int i, j, k, len, c; + +if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + return FALSE; + +len = 0; + +for (i = 0; i < 32; i++) + { + byte = bits[i]; + + if (nclass) + byte = ~byte; + + j = 0; + while (byte != 0) + { + if (byte & 0x1) + { + c = i * 8 + j; + + k = len; + + if ((c & 0x20) != 0) + { + for (k = 0; k < len; k++) + if (char_list[k] == c - 0x20) + { + char_list[k] |= 0x120; + break; + } + } + + if (k == len) + { + if (len >= MAX_CLASS_CHARS_SIZE) + return FALSE; + + char_list[len++] = (uint16_t) c; + } + } + + byte >>= 1; + j++; + } + } + +if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */ + +i = 0; +j = 0; + +if (char_list[0] == 0) + { + i++; + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); + +while (i < len) + { + if ((char_list[i] & 0x100) != 0) + j++; + else + { + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]); + CMOV(SLJIT_ZERO, TMP2, TMP1, 0); + } + i++; + } + +if (j != 0) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20); + + for (i = 0; i < len; i++) + if ((char_list[i] & 0x100) != 0) + { + j--; + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); + CMOV(SLJIT_ZERO, TMP2, TMP1, 0); + } + } + +if (invert) + nclass = !nclass; + +type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; +add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0)); +return TRUE; +} + +static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) +{ +/* May destroy TMP1. */ +if (optimize_class_ranges(common, bits, nclass, invert, backtracks)) + return TRUE; +return optimize_class_chars(common, bits, nclass, invert, backtracks); +} + +static void check_anynewline(compiler_common *common) +{ +/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ +DEFINE_COMPILER; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); +OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); +OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { +#endif + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); +#if PCRE2_CODE_UNIT_WIDTH == 8 + } +#endif +#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void check_hspace(compiler_common *common) +{ +/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ +DEFINE_COMPILER; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); +OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); +OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); +OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { +#endif + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); +#if PCRE2_CODE_UNIT_WIDTH == 8 + } +#endif +#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void check_vspace(compiler_common *common) +{ +/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ +DEFINE_COMPILER; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); +OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); +OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utf) + { +#endif + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); +#if PCRE2_CODE_UNIT_WIDTH == 8 + } +#endif +#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ +OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); + +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); +} + +static void do_casefulcmp(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *label; +int char1_reg; +int char2_reg; + +if (HAS_VIRTUAL_REGISTERS) + { + char1_reg = STR_END; + char2_reg = STACK_TOP; + } +else + { + char1_reg = TMP3; + char2_reg = RETURN_ADDR; + } + +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +if (char1_reg == STR_END) + { + OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0); + OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0); + } + +if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + { + label = LABEL(); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + } +else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + label = LABEL(); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +else + { + label = LABEL(); + OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0); + OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + } + +if (char1_reg == STR_END) + { + OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0); + OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0); + } + +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); +} + +static void do_caselesscmp(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *label; +int char1_reg = STR_END; +int char2_reg; +int lcc_table; +int opt_type = 0; + +if (HAS_VIRTUAL_REGISTERS) + { + char2_reg = STACK_TOP; + lcc_table = STACK_LIMIT; + } +else + { + char2_reg = RETURN_ADDR; + lcc_table = TMP3; + } + +if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + opt_type = 1; +else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) + opt_type = 2; + +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0); + +if (char2_reg == STACK_TOP) + { + OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0); + OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0); + } + +OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc); + +if (opt_type == 1) + { + label = LABEL(); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + } +else if (opt_type == 2) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + label = LABEL(); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + } +else + { + label = LABEL(); + OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0); + OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + } + +#if PCRE2_CODE_UNIT_WIDTH != 8 +jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255); +#endif +OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0); +#if PCRE2_CODE_UNIT_WIDTH != 8 +JUMPHERE(jump); +jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255); +#endif +OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0); +#if PCRE2_CODE_UNIT_WIDTH != 8 +JUMPHERE(jump); +#endif + +if (opt_type == 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); +OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); +JUMPTO(SLJIT_NOT_ZERO, label); + +JUMPHERE(jump); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + +if (opt_type == 2) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +if (char2_reg == STACK_TOP) + { + OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0); + OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0); + } + +OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); +} + +static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc, + compare_context *context, jump_list **backtracks) +{ +DEFINE_COMPILER; +unsigned int othercasebit = 0; +PCRE2_SPTR othercasechar = NULL; +#ifdef SUPPORT_UNICODE +int utflength; +#endif + +if (caseless && char_has_othercase(common, cc)) + { + othercasebit = char_get_othercase_bit(common, cc); + SLJIT_ASSERT(othercasebit); + /* Extracting bit difference info. */ +#if PCRE2_CODE_UNIT_WIDTH == 8 + othercasechar = cc + (othercasebit >> 8); + othercasebit &= 0xff; +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + /* Note that this code only handles characters in the BMP. If there + ever are characters outside the BMP whose othercase differs in only one + bit from itself (there currently are none), this code will need to be + revised for PCRE2_CODE_UNIT_WIDTH == 32. */ + othercasechar = cc + (othercasebit >> 9); + if ((othercasebit & 0x100) != 0) + othercasebit = (othercasebit & 0xff) << 8; + else + othercasebit &= 0xff; +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ + } + +if (context->sourcereg == -1) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else if (context->length >= 2) + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#elif PCRE2_CODE_UNIT_WIDTH == 32 + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ + context->sourcereg = TMP2; + } + +#ifdef SUPPORT_UNICODE +utflength = 1; +if (common->utf && HAS_EXTRALEN(*cc)) + utflength += GET_EXTRALEN(*cc); + +do + { +#endif + + context->length -= IN_UCHARS(1); +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + + /* Unaligned read is supported. */ + if (othercasebit != 0 && othercasechar == cc) + { + context->c.asuchars[context->ucharptr] = *cc | othercasebit; + context->oc.asuchars[context->ucharptr] = othercasebit; + } + else + { + context->c.asuchars[context->ucharptr] = *cc; + context->oc.asuchars[context->ucharptr] = 0; + } + context->ucharptr++; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) +#else + if (context->ucharptr >= 2 || context->length == 0) +#endif + { + if (context->length >= 4) + OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + else if (context->length >= 2) + OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#if PCRE2_CODE_UNIT_WIDTH == 8 + else if (context->length >= 1) + OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; + + switch(context->ucharptr) + { + case 4 / sizeof(PCRE2_UCHAR): + if (context->oc.asint != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); + break; + + case 2 / sizeof(PCRE2_UCHAR): + if (context->oc.asushort != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); + break; + +#if PCRE2_CODE_UNIT_WIDTH == 8 + case 1: + if (context->oc.asbyte != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); + break; +#endif + + default: + SLJIT_UNREACHABLE(); + break; + } + context->ucharptr = 0; + } + +#else + + /* Unaligned read is unsupported or in 32 bit mode. */ + if (context->length >= 1) + OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + + context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; + + if (othercasebit != 0 && othercasechar == cc) + { + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); + +#endif + + cc++; +#ifdef SUPPORT_UNICODE + utflength--; + } +while (utflength > 0); +#endif + +return cc; +} + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + +#define SET_TYPE_OFFSET(value) \ + if ((value) != typeoffset) \ + { \ + if ((value) < typeoffset) \ + OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ + else \ + OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ + } \ + typeoffset = (value); + +#define SET_CHAR_OFFSET(value) \ + if ((value) != charoffset) \ + { \ + if ((value) < charoffset) \ + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ + else \ + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ + } \ + charoffset = (value); + +static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); + +static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) +{ +DEFINE_COMPILER; +jump_list *found = NULL; +jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; +sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; +struct sljit_jump *jump = NULL; +PCRE2_SPTR ccbegin; +int compares, invertcmp, numberofcmps; +#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) +BOOL utf = common->utf; +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + +#ifdef SUPPORT_UNICODE +BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; +BOOL charsaved = FALSE; +int typereg = TMP1; +const sljit_u32 *other_cases; +sljit_uw typeoffset; +#endif /* SUPPORT_UNICODE */ + +/* Scanning the necessary info. */ +cc++; +ccbegin = cc; +compares = 0; + +if (cc[-1] & XCL_MAP) + { + min = 0; + cc += 32 / sizeof(PCRE2_UCHAR); + } + +while (*cc != XCL_END) + { + compares++; + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + if (c > max) max = c; + if (c < min) min = c; +#ifdef SUPPORT_UNICODE + needschar = TRUE; +#endif /* SUPPORT_UNICODE */ + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + if (c < min) min = c; + GETCHARINCTEST(c, cc); + if (c > max) max = c; +#ifdef SUPPORT_UNICODE + needschar = TRUE; +#endif /* SUPPORT_UNICODE */ + } +#ifdef SUPPORT_UNICODE + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_CLIST) + { + other_cases = PRIV(ucd_caseless_sets) + cc[1]; + while (*other_cases != NOTACHAR) + { + if (*other_cases > max) max = *other_cases; + if (*other_cases < min) min = *other_cases; + other_cases++; + } + } + else + { + max = READ_CHAR_MAX; + min = 0; + } + + switch(*cc) + { + case PT_ANY: + /* Any either accepts everything or ignored. */ + if (cc[-1] == XCL_PROP) + { + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list == backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } + break; + + case PT_LAMP: + case PT_GC: + case PT_PC: + case PT_ALNUM: + needstype = TRUE; + break; + + case PT_SC: + needsscript = TRUE; + break; + + case PT_SPACE: + case PT_PXSPACE: + case PT_WORD: + case PT_PXGRAPH: + case PT_PXPRINT: + case PT_PXPUNCT: + needstype = TRUE; + needschar = TRUE; + break; + + case PT_CLIST: + case PT_UCNC: + needschar = TRUE; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + cc += 2; + } +#endif /* SUPPORT_UNICODE */ + } +SLJIT_ASSERT(compares > 0); + +/* We are not necessary in utf mode even in 8 bit mode. */ +cc = ccbegin; +if ((cc[-1] & XCL_NOT) != 0) + read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); +else + { +#ifdef SUPPORT_UNICODE + read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0); +#else /* !SUPPORT_UNICODE */ + read_char(common, min, max, NULL, 0); +#endif /* SUPPORT_UNICODE */ + } + +if ((cc[-1] & XCL_HASPROP) == 0) + { + if ((cc[-1] & XCL_MAP) != 0) + { + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) + { + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); + } + + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump); + + cc += 32 / sizeof(PCRE2_UCHAR); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); + add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); + } + } +else if ((cc[-1] & XCL_MAP) != 0) + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); +#ifdef SUPPORT_UNICODE + charsaved = TRUE; +#endif /* SUPPORT_UNICODE */ + if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump = NULL; + if (common->utf) +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); + +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf) +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + JUMPHERE(jump); + } + + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + cc += 32 / sizeof(PCRE2_UCHAR); + } + +#ifdef SUPPORT_UNICODE +if (needstype || needsscript) + { + if (needschar && !charsaved) + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); + +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (!common->utf) + { + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR); + JUMPHERE(jump); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + + OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + + /* Before anything else, we deal with scripts. */ + if (needsscript) + { + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + + ccbegin = cc; + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_SC) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + cc += 2; + } + } + + cc = ccbegin; + } + + if (needschar) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + + if (needstype) + { + if (!needschar) + { + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + } + else + { + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + typereg = RETURN_ADDR; + } + } + } +#endif /* SUPPORT_UNICODE */ + +/* Generating code. */ +charoffset = 0; +numberofcmps = 0; +#ifdef SUPPORT_UNICODE +typeoffset = 0; +#endif /* SUPPORT_UNICODE */ + +while (*cc != XCL_END) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + jump = NULL; + + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + + if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + numberofcmps++; + } + else if (numberofcmps > 0) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + numberofcmps = 0; + } + else + { + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + numberofcmps = 0; + } + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + SET_CHAR_OFFSET(c); + GETCHARINCTEST(c, cc); + + if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) + { + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + numberofcmps++; + } + else if (numberofcmps > 0) + { + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + numberofcmps = 0; + } + else + { + jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + numberofcmps = 0; + } + } +#ifdef SUPPORT_UNICODE + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + if (*cc == XCL_NOTPROP) + invertcmp ^= 0x1; + cc++; + switch(*cc) + { + case PT_ANY: + if (!invertcmp) + jump = JUMP(SLJIT_JUMP); + break; + + case PT_LAMP: + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_GC: + c = PRIV(ucp_typerange)[(int)cc[1] * 2]; + SET_TYPE_OFFSET(c); + jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); + break; + + case PT_PC: + jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); + break; + + case PT_SC: + compares++; + /* Do nothing. */ + break; + + case PT_SPACE: + case PT_PXSPACE: + SET_CHAR_OFFSET(9); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + SET_TYPE_OFFSET(ucp_Zl); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_WORD: + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + /* Fall through. */ + + case PT_ALNUM: + SET_TYPE_OFFSET(ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); + OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + SET_TYPE_OFFSET(ucp_Nd); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_CLIST: + other_cases = PRIV(ucd_caseless_sets) + cc[1]; + + /* At least three characters are required. + Otherwise this case would be handled by the normal code path. */ + SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); + SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); + + /* Optimizing character pairs, if their difference is power of 2. */ + if (is_powerof2(other_cases[1] ^ other_cases[0])) + { + if (charoffset == 0) + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + else + { + OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + } + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + other_cases += 2; + } + else if (is_powerof2(other_cases[2] ^ other_cases[1])) + { + if (charoffset == 0) + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); + else + { + OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); + } + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); + OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); + + other_cases += 3; + } + else + { + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + } + + while (*other_cases != NOTACHAR) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); + } + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_UCNC: + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + SET_CHAR_OFFSET(0xa0); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + SET_CHAR_OFFSET(0); + OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXGRAPH: + /* C and Z groups are the farthest two groups. */ + SET_TYPE_OFFSET(ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); + + jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPRINT: + /* C and Z groups are the farthest two groups. */ + SET_TYPE_OFFSET(ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); + + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); + OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL); + + jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPUNCT: + SET_TYPE_OFFSET(ucp_Sc); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); + OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_TYPE_OFFSET(ucp_Pc); + OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + cc += 2; + } +#endif /* SUPPORT_UNICODE */ + + if (jump != NULL) + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + +if (found != NULL) + set_jumps(found, LABEL()); +} + +#undef SET_TYPE_OFFSET +#undef SET_CHAR_OFFSET + +#endif + +static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks) +{ +DEFINE_COMPILER; +int length; +struct sljit_jump *jump[4]; +#ifdef SUPPORT_UNICODE +struct sljit_label *label; +#endif /* SUPPORT_UNICODE */ + +switch(type) + { + case OP_SOD: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + return cc; + + case OP_SOM: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + return cc; + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); +#ifdef SUPPORT_UNICODE + if (common->invalid_utf) + { + add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + return cc; + } +#endif /* SUPPORT_UNICODE */ + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + + case OP_EODN: + /* Requires rather complex checks. */ + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else if (common->nltype == NLTYPE_FIXED) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + } + else + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + jump[2] = JUMP(SLJIT_GREATER); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); + /* Equal. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + + JUMPHERE(jump[1]); + if (common->nltype == NLTYPE_ANYCRLF) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + } + JUMPHERE(jump[2]); + JUMPHERE(jump[3]); + } + JUMPHERE(jump[0]); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); + return cc; + + case OP_EOD: + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); + return cc; + + case OP_DOLL: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + } + else + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + + if (!common->endonly) + compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); + else + { + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + } + return cc; + + case OP_DOLLM: + jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + } + else + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + check_partial(common, FALSE); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + /* STR_PTR = STR_END - IN_UCHARS(1) */ + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + peek_char(common, common->nlmax, TMP3, 0, NULL); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + + case OP_CIRC: + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + } + return cc; + + case OP_CIRCM: + /* TMP2 might be used by peek_char_back. */ + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + } + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + if (!common->alt_circumflex) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + peek_char_back(common, common->nlmax, backtracks); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + + case OP_REVERSE: + length = GET(cc, 0); + if (length == 0) + return cc + LINK_SIZE; + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); +#ifdef SUPPORT_UNICODE + if (common->utf) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length); + label = LABEL(); + add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0)); + move_back(common, backtracks, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else +#endif + { + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0)); + } + check_start_used_ptr(common); + return cc + LINK_SIZE; + } +SLJIT_UNREACHABLE(); +return cc; +} + +#ifdef SUPPORT_UNICODE + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR prevcc, endcc, bptr; +BOOL first = TRUE; +uint32_t c; + +prevcc = cc; +endcc = NULL; +do + { + GETCHARINC(c, cc); + rgb = UCD_GRAPHBREAK(c); + + if (first) + { + lgb = rgb; + endcc = cc; + first = FALSE; + continue; + } + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + { + ricount = 0; + bptr = prevcc; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + bptr--; + BACKCHAR(bptr); + GETCHAR(c, bptr); + + if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + break; + + ricount++; + } + + if ((ricount & 1) != 0) break; /* Grapheme break required */ + } + + /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this + allows any number of them before a following Extended_Pictographic. */ + + if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || + lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + prevcc = endcc; + endcc = cc; + } +while (cc < end_subject); + +return endcc; +} + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR prevcc, endcc, bptr; +BOOL first = TRUE; +uint32_t c; + +prevcc = cc; +endcc = NULL; +do + { + GETCHARINC_INVALID(c, cc, end_subject, break); + rgb = UCD_GRAPHBREAK(c); + + if (first) + { + lgb = rgb; + endcc = cc; + first = FALSE; + continue; + } + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + { + ricount = 0; + bptr = prevcc; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + GETCHARBACK_INVALID(c, bptr, start_subject, break); + + if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + break; + + ricount++; + } + + if ((ricount & 1) != 0) + break; /* Grapheme break required */ + } + + /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this + allows any number of them before a following Extended_Pictographic. */ + + if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || + lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + prevcc = endcc; + endcc = cc; + } +while (cc < end_subject); + +return endcc; +} + +static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc) +{ +PCRE2_SPTR start_subject = args->begin; +PCRE2_SPTR end_subject = args->end; +int lgb, rgb, ricount; +PCRE2_SPTR bptr; +uint32_t c; + +/* Patch by PH */ +/* GETCHARINC(c, cc); */ +c = *cc++; + +#if PCRE2_CODE_UNIT_WIDTH == 32 +if (c >= 0x110000) + return NULL; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ +lgb = UCD_GRAPHBREAK(c); + +while (cc < end_subject) + { + c = *cc; +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x110000) + break; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + rgb = UCD_GRAPHBREAK(c); + + if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) + break; + + /* Not breaking between Regional Indicators is allowed only if there + are an even number of preceding RIs. */ + + if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + { + ricount = 0; + bptr = cc - 1; + + /* bptr is pointing to the left-hand character */ + while (bptr > start_subject) + { + bptr--; + c = *bptr; +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c >= 0x110000) + break; +#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ + + if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break; + + ricount++; + } + + if ((ricount & 1) != 0) + break; /* Grapheme break required */ + } + + /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this + allows any number of them before a following Extended_Pictographic. */ + + if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || + lgb != ucp_gbExtended_Pictographic) + lgb = rgb; + + cc++; + } + +return cc; +} + +#endif /* SUPPORT_UNICODE */ + +static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr) +{ +DEFINE_COMPILER; +int length; +unsigned int c, oc, bit; +compare_context context; +struct sljit_jump *jump[3]; +jump_list *end_list; +#ifdef SUPPORT_UNICODE +PCRE2_UCHAR propdata[5]; +#endif /* SUPPORT_UNICODE */ + +switch(type) + { + case OP_NOT_DIGIT: + case OP_DIGIT: + /* Digits are usually 0-9, so it is worth to optimize them. */ + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_DIGIT); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_DIGIT); + /* Flip the starting bit in the negative case. */ + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); + add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); + add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE)) + read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR); + else +#endif + read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); + add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); + return cc; + + case OP_ANY: + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + end_list = NULL; + if (common->mode != PCRE2_JIT_PARTIAL_HARD) + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); + set_jumps(end_list, LABEL()); + JUMPHERE(jump[0]); + } + else + check_newlinechar(common, common->nltype, backtracks, TRUE); + return cc; + + case OP_ALLANY: + if (check_str_ptr) + detect_partial_match(common, backtracks); +#ifdef SUPPORT_UNICODE + if (common->utf) + { + if (common->invalid_utf) + { + read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); + return cc; + } + +#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + JUMPHERE(jump[0]); + return cc; +#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */ + } +#endif /* SUPPORT_UNICODE */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + return cc; + + case OP_ANYBYTE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + return cc; + +#ifdef SUPPORT_UNICODE + case OP_NOTPROP: + case OP_PROP: + propdata[0] = XCL_HASPROP; + propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; + propdata[2] = cc[0]; + propdata[3] = cc[1]; + propdata[4] = XCL_END; + if (check_str_ptr) + detect_partial_match(common, backtracks); + compile_xclass_matchingpath(common, propdata, backtracks); + return cc + 2; +#endif + + case OP_ANYNL: + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0); + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + /* We don't need to handle soft partial matching case. */ + end_list = NULL; + if (common->mode != PCRE2_JIT_PARTIAL_HARD) + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + else + check_str_end(common, &end_list); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump[2] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[0]); + check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); + set_jumps(end_list, LABEL()); + JUMPHERE(jump[1]); + JUMPHERE(jump[2]); + return cc; + + case OP_NOT_HSPACE: + case OP_HSPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + if (type == OP_NOT_HSPACE) + read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0x9, 0x3000, NULL, 0); + + add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + + case OP_NOT_VSPACE: + case OP_VSPACE: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + if (type == OP_NOT_VSPACE) + read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0xa, 0x2029, NULL, 0); + + add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); + sljit_set_current_flags(compiler, SLJIT_SET_Z); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + +#ifdef SUPPORT_UNICODE + case OP_EXTUNI: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, + common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +#else + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, + common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); + if (!common->utf || common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +#endif + + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); + + if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0); + /* Since we successfully read a char above, partial matching must occure. */ + check_partial(common, TRUE); + JUMPHERE(jump[0]); + } + return cc; +#endif + + case OP_CHAR: + case OP_CHARI: + length = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); +#endif + + if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE) + detect_partial_match(common, backtracks); + + if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) + { + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + + context.length = IN_UCHARS(length); + context.sourcereg = -1; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + context.ucharptr = 0; +#endif + return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); + } + +#ifdef SUPPORT_UNICODE + if (common->utf) + { + GETCHAR(c, cc); + } + else +#endif + c = *cc; + + SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc)); + + if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + oc = char_othercase(common, c); + read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0); + + SLJIT_ASSERT(!is_powerof2(c ^ oc)); + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); + CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + } + else + { + jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + JUMPHERE(jump[0]); + } + return cc + length; + + case OP_NOT: + case OP_NOTI: + if (check_str_ptr) + detect_partial_match(common, backtracks); + + length = 1; +#ifdef SUPPORT_UNICODE + if (common->utf) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 + c = *cc; + if (c < 128 && !common->invalid_utf) + { + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + if (type == OP_NOT || !char_has_othercase(common, cc)) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + else + { + /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ + OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); + } + /* Skip the variable-length character. */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(jump[0]); + return cc + 1; + } + else +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + { + GETCHARLEN(c, cc, length); + } + } + else +#endif /* SUPPORT_UNICODE */ + c = *cc; + + if (type == OP_NOT || !char_has_othercase(common, cc)) + { + read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + } + else + { + oc = char_othercase(common, c); + read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR); + bit = c ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + } + else + { + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + } + } + return cc + length; + + case OP_CLASS: + case OP_NCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; + if (type == OP_NCLASS) + read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0, bit, NULL, 0); +#else + if (type == OP_NCLASS) + read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR); + else + read_char(common, 0, 255, NULL, 0); +#endif + + if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) + return cc + 32 / sizeof(PCRE2_UCHAR); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 + jump[0] = NULL; + if (common->utf) + { + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); + if (type == OP_CLASS) + { + add_jump(compiler, backtracks, jump[0]); + jump[0] = NULL; + } + } +#elif PCRE2_CODE_UNIT_WIDTH != 8 + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + if (type == OP_CLASS) + { + add_jump(compiler, backtracks, jump[0]); + jump[0] = NULL; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */ + + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + if (jump[0] != NULL) + JUMPHERE(jump[0]); +#endif + return cc + 32 / sizeof(PCRE2_UCHAR); + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + case OP_XCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); + compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); + return cc + GET(cc, 0) - 1; +#endif + } +SLJIT_UNREACHABLE(); +return cc; +} + +static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks) +{ +/* This function consumes at least one input character. */ +/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ +DEFINE_COMPILER; +PCRE2_SPTR ccbegin = cc; +compare_context context; +int size; + +context.length = 0; +do + { + if (cc >= ccend) + break; + + if (*cc == OP_CHAR) + { + size = 1; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); +#endif + } + else if (*cc == OP_CHARI) + { + size = 1; +#ifdef SUPPORT_UNICODE + if (common->utf) + { + if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) + size = 0; + else if (HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); + } + else +#endif + if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) + size = 0; + } + else + size = 0; + + cc += 1 + size; + context.length += IN_UCHARS(size); + } +while (size > 0 && context.length <= 128); + +cc = ccbegin; +if (context.length > 0) + { + /* We have a fixed-length byte sequence. */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); + + context.sourcereg = -1; +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + context.ucharptr = 0; +#endif + do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); + return cc; + } + +/* A non-fixed length character will be checked if length == 0. */ +return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); +} + +/* Forward definitions. */ +static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *); +static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); + +#define PUSH_BACKTRACK(size, ccstart, error) \ + do \ + { \ + backtrack = sljit_alloc_memory(compiler, (size)); \ + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ + return error; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ + } \ + while (0) + +#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ + do \ + { \ + backtrack = sljit_alloc_memory(compiler, (size)); \ + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ + return; \ + memset(backtrack, 0, size); \ + backtrack->prev = parent->top; \ + backtrack->cc = (ccstart); \ + parent->top = backtrack; \ + } \ + while (0) + +#define BACKTRACK_AS(type) ((type *)backtrack) + +static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) +{ +/* The OVECTOR offset goes to TMP2. */ +DEFINE_COMPILER; +int count = GET2(cc, 1 + IMM2_SIZE); +PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size; +unsigned int offset; +jump_list *found = NULL; + +SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + +count--; +while (count-- > 0) + { + offset = GET2(slot, 0) << 1; + GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); + add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); + slot += common->name_entry_size; + } + +offset = GET2(slot, 0) << 1; +GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); +if (backtracks != NULL && !common->unset_backref) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); + +set_jumps(found, LABEL()); +} + +static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) +{ +DEFINE_COMPILER; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +int offset = 0; +struct sljit_jump *jump = NULL; +struct sljit_jump *partial; +struct sljit_jump *nopartial; +#if defined SUPPORT_UNICODE +struct sljit_label *loop; +struct sljit_label *caseless_loop; +jump_list *no_match = NULL; +int source_reg = COUNT_MATCH; +int source_end_reg = ARGUMENTS; +int char1_reg = STACK_LIMIT; +#endif /* SUPPORT_UNICODE */ + +if (ref) + { + offset = GET2(cc, 1) << 1; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + /* OVECTOR(1) contains the "string begin - 1" constant. */ + if (withchecks && !common->unset_backref) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + } +else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + +#if defined SUPPORT_UNICODE +if (common->utf && *cc == OP_REFI) + { + SLJIT_ASSERT(common->iref_ptr != 0); + + if (ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + + if (withchecks && emptyfail) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0)); + + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0); + + OP1(SLJIT_MOV, source_reg, 0, TMP1, 0); + OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0); + + loop = LABEL(); + jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0); + partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + + /* Read original character. It must be a valid UTF character. */ + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0); + + read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF); + + OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0); + + /* Read second character. */ + read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR); + + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + + OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); + + add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); + + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records)); + + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case)); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset)); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); + CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); + + add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets)); + + caseless_loop = LABEL(); + OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t)); + OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0); + JUMPTO(SLJIT_EQUAL, loop); + JUMPTO(SLJIT_LESS, caseless_loop); + + set_jumps(no_match, LABEL()); + if (common->mode == PCRE2_JIT_COMPLETE) + JUMPHERE(partial); + + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + + if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + } + + JUMPHERE(jump); + OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr); + OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2); + return; + } +else +#endif /* SUPPORT_UNICODE */ + { + if (ref) + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); + else + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); + + if (withchecks) + jump = JUMP(SLJIT_ZERO); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); + if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, backtracks, partial); + + add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + + if (common->mode != PCRE2_JIT_COMPLETE) + { + nopartial = JUMP(SLJIT_JUMP); + JUMPHERE(partial); + /* TMP2 -= STR_END - STR_PTR */ + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); + partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + JUMPHERE(partial); + check_partial(common, FALSE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(nopartial); + } + } + +if (jump != NULL) + { + if (emptyfail) + add_jump(compiler, backtracks, jump); + else + JUMPHERE(jump); + } +} + +static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +backtrack_common *backtrack; +PCRE2_UCHAR type; +int offset = 0; +struct sljit_label *label; +struct sljit_jump *zerolength; +struct sljit_jump *jump = NULL; +PCRE2_SPTR ccbegin = cc; +int min = 0, max = 0; +BOOL minimize; + +PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL); + +if (ref) + offset = GET2(cc, 1) << 1; +else + cc += IMM2_SIZE; +type = cc[1 + IMM2_SIZE]; + +SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); +minimize = (type & 0x1) != 0; +switch(type) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + min = 0; + max = 0; + cc += 1 + IMM2_SIZE + 1; + break; + case OP_CRPLUS: + case OP_CRMINPLUS: + min = 1; + max = 0; + cc += 1 + IMM2_SIZE + 1; + break; + case OP_CRQUERY: + case OP_CRMINQUERY: + min = 0; + max = 1; + cc += 1 + IMM2_SIZE + 1; + break; + case OP_CRRANGE: + case OP_CRMINRANGE: + min = GET2(cc, 1 + IMM2_SIZE + 1); + max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); + cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; + break; + default: + SLJIT_UNREACHABLE(); + break; + } + +if (!minimize) + { + if (min == 0) + { + allocate_stack(common, 2); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + /* Temporary release of STR_PTR. */ + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + /* Restore if not zero length. */ + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + } + else + { + allocate_stack(common, 1); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + if (ref) + { + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + } + + if (min > 1 || max > 1) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); + + label = LABEL(); + if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); + compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); + + if (min > 1 || max > 1) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + if (min > 1) + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); + if (max > 1) + { + jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + JUMPHERE(jump); + } + } + + if (max == 0) + { + /* Includes min > 1 case as well. */ + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + } + + JUMPHERE(zerolength); + BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); + + count_match(common); + return cc; + } + +allocate_stack(common, ref ? 2 : 3); +if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); +if (type != OP_CRMINSTAR) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + +if (min == 0) + { + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + /* Length is non-zero, we can match real repeats. */ + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + jump = JUMP(SLJIT_JUMP); + } +else + { + if (ref) + { + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + } + +BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); +if (max > 0) + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); + +if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); +compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + +if (min > 1) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath); + } +else if (max > 0) + OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); + +if (jump != NULL) + JUMPHERE(jump); +JUMPHERE(zerolength); + +count_match(common); +return cc; +} + +static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +recurse_entry *entry = common->entries; +recurse_entry *prev = NULL; +sljit_sw start = GET(cc, 1); +PCRE2_SPTR start_cc; +BOOL needs_control_head; + +PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); + +/* Inlining simple patterns. */ +if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack) + { + start_cc = common->start + start; + compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack); + BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE; + return cc + 1 + LINK_SIZE; + } + +while (entry != NULL) + { + if (entry->start == start) + break; + prev = entry; + entry = entry->next; + } + +if (entry == NULL) + { + entry = sljit_alloc_memory(compiler, sizeof(recurse_entry)); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + entry->next = NULL; + entry->entry_label = NULL; + entry->backtrack_label = NULL; + entry->entry_calls = NULL; + entry->backtrack_calls = NULL; + entry->start = start; + + if (prev != NULL) + prev->next = entry; + else + common->entries = entry; + } + +BACKTRACK_AS(recurse_backtrack)->entry = entry; + +if (entry->entry_label == NULL) + add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL)); +else + JUMPTO(SLJIT_FAST_CALL, entry->entry_label); +/* Leave if the match is failed. */ +add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); +BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); +return cc + 1 + LINK_SIZE; +} + +static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +{ +PCRE2_SPTR begin; +PCRE2_SIZE *ovector; +sljit_u32 oveccount, capture_top; + +if (arguments->callout == NULL) + return 0; + +SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size); + +begin = arguments->begin; +ovector = (PCRE2_SIZE*)(callout_block + 1); +oveccount = callout_block->capture_top; + +SLJIT_ASSERT(oveccount >= 1); + +callout_block->version = 2; +callout_block->callout_flags = 0; + +/* Offsets in subject. */ +callout_block->subject_length = arguments->end - arguments->begin; +callout_block->start_match = jit_ovector[0] - begin; +callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin; +callout_block->subject = begin; + +/* Convert and copy the JIT offset vector to the ovector array. */ +callout_block->capture_top = 1; +callout_block->offset_vector = ovector; + +ovector[0] = PCRE2_UNSET; +ovector[1] = PCRE2_UNSET; +ovector += 2; +jit_ovector += 2; +capture_top = 1; + +/* Convert pointers to sizes. */ +while (--oveccount != 0) + { + capture_top++; + + ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin); + ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin); + + if (ovector[0] != PCRE2_UNSET) + callout_block->capture_top = capture_top; + + ovector += 2; + jit_ovector += 2; + } + +return (arguments->callout)(callout_block, arguments->callout_data); +} + +#define CALLOUT_ARG_OFFSET(arg) \ + SLJIT_OFFSETOF(pcre2_callout_block, arg) + +static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +sljit_s32 mov_opcode; +unsigned int callout_length = (*cc == OP_CALLOUT) + ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE); +sljit_sw value1; +sljit_sw value2; +sljit_sw value3; +sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw); + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw); + +allocate_stack(common, callout_arg_size); + +SLJIT_ASSERT(common->capture_last_ptr != 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); +OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0; +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1); +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); +OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1); + +/* These pointer sized fields temporarly stores internal variables. */ +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); + +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); +mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1)); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE)); + +if (*cc == OP_CALLOUT) + { + value1 = 0; + value2 = 0; + value3 = 0; + } +else + { + value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1); + value2 = (callout_length - (1 + 4*LINK_SIZE + 2)); + value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE)); + } + +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2); +OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); + +SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + +/* Needed to save important temporary registers. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); +/* SLJIT_R0 = arguments */ +OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); +GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +free_stack(common, callout_arg_size); + +/* Check return value. */ +OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32)); +if (common->abort_label == NULL) + add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */); +else + JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label); +return cc + callout_length; +} + +#undef CALLOUT_ARG_SIZE +#undef CALLOUT_ARG_OFFSET + +static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc) +{ +while (TRUE) + { + switch (*cc) + { + case OP_CALLOUT_STR: + cc += GET(cc, 1 + 2*LINK_SIZE); + break; + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CALLOUT: + case OP_ALT: + cc += PRIV(OP_lengths)[*cc]; + break; + + case OP_KET: + return FALSE; + + default: + return TRUE; + } + } +} + +static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional) +{ +DEFINE_COMPILER; +int framesize; +int extrasize; +BOOL local_quit_available = FALSE; +BOOL needs_control_head; +int private_data_ptr; +backtrack_common altbacktrack; +PCRE2_SPTR ccbegin; +PCRE2_UCHAR opcode; +PCRE2_UCHAR bra = OP_BRA; +jump_list *tmp = NULL; +jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; +jump_list **found; +/* Saving previous accept variables. */ +BOOL save_local_quit_available = common->local_quit_available; +BOOL save_in_positive_assertion = common->in_positive_assertion; +then_trap_backtrack *save_then_trap = common->then_trap; +struct sljit_label *save_quit_label = common->quit_label; +struct sljit_label *save_accept_label = common->accept_label; +jump_list *save_quit = common->quit; +jump_list *save_positive_assertion_quit = common->positive_assertion_quit; +jump_list *save_accept = common->accept; +struct sljit_jump *jump; +struct sljit_jump *brajump = NULL; + +/* Assert captures then. */ +common->then_trap = NULL; + +if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) + { + SLJIT_ASSERT(!conditional); + bra = *cc; + cc++; + } +private_data_ptr = PRIVATE_DATA(cc); +SLJIT_ASSERT(private_data_ptr != 0); +framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); +backtrack->framesize = framesize; +backtrack->private_data_ptr = private_data_ptr; +opcode = *cc; +SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); +found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; +ccbegin = cc; +cc += GET(cc, 1); + +if (bra == OP_BRAMINZERO) + { + /* This is a braminzero backtrack path. */ + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + } + +if (framesize < 0) + { + extrasize = 1; + if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE)) + extrasize = 0; + + if (needs_control_head) + extrasize++; + + if (framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); + + if (extrasize > 0) + allocate_stack(common, extrasize); + + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + + if (extrasize > 0) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + + if (needs_control_head) + { + SLJIT_ASSERT(extrasize == 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + } + } +else + { + extrasize = needs_control_head ? 3 : 2; + allocate_stack(common, framesize + extrasize); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + + if (needs_control_head) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + } + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + + init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize); + } + +memset(&altbacktrack, 0, sizeof(backtrack_common)); +if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)) + { + /* Control verbs cannot escape from these asserts. */ + local_quit_available = TRUE; + common->local_quit_available = TRUE; + common->quit_label = NULL; + common->quit = NULL; + } + +common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK); +common->positive_assertion_quit = NULL; + +while (1) + { + common->accept_label = NULL; + common->accept = NULL; + altbacktrack.top = NULL; + altbacktrack.topbacktracks = NULL; + + if (*ccbegin == OP_ALT && extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + altbacktrack.cc = ccbegin; + compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + if (local_quit_available) + { + common->local_quit_available = save_local_quit_available; + common->quit_label = save_quit_label; + common->quit = save_quit; + } + common->in_positive_assertion = save_in_positive_assertion; + common->then_trap = save_then_trap; + common->accept_label = save_accept_label; + common->positive_assertion_quit = save_positive_assertion_quit; + common->accept = save_accept; + return NULL; + } + common->accept_label = LABEL(); + if (common->accept != NULL) + set_jumps(common->accept, common->accept_label); + + /* Reset stack. */ + if (framesize < 0) + { + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + else if (extrasize > 0) + free_stack(common, extrasize); + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); + } + else + { + if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) + { + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); + } + else + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2)); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); + } + } + + if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) + { + /* We know that STR_PTR was stored on the top of the stack. */ + if (conditional) + { + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1)); + } + else if (bra == OP_BRAZERO) + { + if (framesize < 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize)); + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else if (framesize >= 0) + { + /* For OP_BRA and OP_BRAMINZERO. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1)); + } + } + add_jump(compiler, found, JUMP(SLJIT_JUMP)); + + compile_backtrackingpath(common, altbacktrack.top); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + if (local_quit_available) + { + common->local_quit_available = save_local_quit_available; + common->quit_label = save_quit_label; + common->quit = save_quit; + } + common->in_positive_assertion = save_in_positive_assertion; + common->then_trap = save_then_trap; + common->accept_label = save_accept_label; + common->positive_assertion_quit = save_positive_assertion_quit; + common->accept = save_accept; + return NULL; + } + set_jumps(altbacktrack.topbacktracks, LABEL()); + + if (*cc != OP_ALT) + break; + + ccbegin = cc; + cc += GET(cc, 1); + } + +if (local_quit_available) + { + SLJIT_ASSERT(common->positive_assertion_quit == NULL); + /* Makes the check less complicated below. */ + common->positive_assertion_quit = common->quit; + } + +/* None of them matched. */ +if (common->positive_assertion_quit != NULL) + { + jump = JUMP(SLJIT_JUMP); + set_jumps(common->positive_assertion_quit, LABEL()); + SLJIT_ASSERT(framesize != no_stack); + if (framesize < 0) + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); + else + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw)); + } + JUMPHERE(jump); + } + +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); + +if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) + { + /* Assert is failed. */ + if ((conditional && extrasize > 0) || bra == OP_BRAZERO) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + if (framesize < 0) + { + /* The topmost item should be 0. */ + if (bra == OP_BRAZERO) + { + if (extrasize == 2) + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else if (extrasize > 0) + free_stack(common, extrasize); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); + /* The topmost item should be 0. */ + if (bra == OP_BRAZERO) + { + free_stack(common, framesize + extrasize - 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + free_stack(common, framesize + extrasize); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + jump = JUMP(SLJIT_JUMP); + if (bra != OP_BRAZERO) + add_jump(compiler, target, jump); + + /* Assert is successful. */ + set_jumps(tmp, LABEL()); + if (framesize < 0) + { + /* We know that STR_PTR was stored on the top of the stack. */ + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize)); + + /* Keep the STR_PTR on the top of the stack. */ + if (bra == OP_BRAZERO) + { + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + if (extrasize == 2) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + else if (bra == OP_BRAMINZERO) + { + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + } + else + { + if (bra == OP_BRA) + { + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1)); + } + else + { + /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); + if (extrasize == 2) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (bra == OP_BRAMINZERO) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + { + SLJIT_ASSERT(extrasize == 3); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); + } + } + } + + if (bra == OP_BRAZERO) + { + backtrack->matchingpath = LABEL(); + SET_LABEL(jump, backtrack->matchingpath); + } + else if (bra == OP_BRAMINZERO) + { + JUMPTO(SLJIT_JUMP, backtrack->matchingpath); + JUMPHERE(brajump); + if (framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + set_jumps(backtrack->common.topbacktracks, LABEL()); + } + } +else + { + /* AssertNot is successful. */ + if (framesize < 0) + { + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + if (bra != OP_BRA) + { + if (extrasize == 2) + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else if (extrasize > 0) + free_stack(common, extrasize); + } + else + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); + /* The topmost item should be 0. */ + if (bra != OP_BRA) + { + free_stack(common, framesize + extrasize - 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + else + free_stack(common, framesize + extrasize); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + + if (bra == OP_BRAZERO) + backtrack->matchingpath = LABEL(); + else if (bra == OP_BRAMINZERO) + { + JUMPTO(SLJIT_JUMP, backtrack->matchingpath); + JUMPHERE(brajump); + } + + if (bra != OP_BRA) + { + SLJIT_ASSERT(found == &backtrack->common.topbacktracks); + set_jumps(backtrack->common.topbacktracks, LABEL()); + backtrack->common.topbacktracks = NULL; + } + } + +if (local_quit_available) + { + common->local_quit_available = save_local_quit_available; + common->quit_label = save_quit_label; + common->quit = save_quit; + } +common->in_positive_assertion = save_in_positive_assertion; +common->then_trap = save_then_trap; +common->accept_label = save_accept_label; +common->positive_assertion_quit = save_positive_assertion_quit; +common->accept = save_accept; +return cc + 1 + LINK_SIZE; +} + +static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) +{ +DEFINE_COMPILER; +int stacksize; + +if (framesize < 0) + { + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + else + { + stacksize = needs_control_head ? 1 : 0; + if (ket != OP_KET || has_alternatives) + stacksize++; + + if (stacksize > 0) + free_stack(common, stacksize); + } + + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1)); + + /* TMP2 which is set here used by OP_KETRMAX below. */ + if (ket == OP_KETRMAX) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); + else if (ket == OP_KETRMIN) + { + /* Move the STR_PTR to the private_data_ptr. */ + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); + } + } +else + { + stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); + if (needs_control_head) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); + + if (ket == OP_KETRMAX) + { + /* TMP2 which is set here used by OP_KETRMAX below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + } +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); +} + +static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) +{ +DEFINE_COMPILER; + +if (common->capture_last_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + stacksize++; + } +if (common->optimized_cbracket[offset >> 1] == 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + stacksize += 2; + } +return stacksize; +} + +static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr) +{ + if (PRIV(script_run)(ptr, endptr, FALSE)) + return endptr; + return NULL; +} + +#ifdef SUPPORT_UNICODE + +static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr) +{ + if (PRIV(script_run)(ptr, endptr, TRUE)) + return endptr; + return NULL; +} + +#endif /* SUPPORT_UNICODE */ + +static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent) +{ +DEFINE_COMPILER; + +SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); +#ifdef SUPPORT_UNICODE +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, + common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run)); +#else +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run)); +#endif + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); +add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +} + +/* + Handling bracketed expressions is probably the most complex part. + + Stack layout naming characters: + S - Push the current STR_PTR + 0 - Push a 0 (NULL) + A - Push the current STR_PTR. Needed for restoring the STR_PTR + before the next alternative. Not pushed if there are no alternatives. + M - Any values pushed by the current alternative. Can be empty, or anything. + C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack. + L - Push the previous local (pointed by localptr) to the stack + () - opional values stored on the stack + ()* - optonal, can be stored multiple times + + The following list shows the regular expression templates, their PCRE byte codes + and stack layout supported by pcre-sljit. + + (?:) OP_BRA | OP_KET A M + () OP_CBRA | OP_KET C M + (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )* + OP_SBRA | OP_KETRMAX 0 L M S ( L M S )* + (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )* + OP_SBRA | OP_KETRMIN 0 L M S ( L M S )* + ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )* + OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )* + ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )* + OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )* + (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 ) + (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 ) + ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 ) + ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 ) + (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )* + OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )* + (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )* + OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )* + ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )* + OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )* + ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )* + OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )* + + + Stack layout naming characters: + A - Push the alternative index (starting from 0) on the stack. + Not pushed if there is no alternatives. + M - Any values pushed by the current alternative. Can be empty, or anything. + + The next list shows the possible content of a bracket: + (|) OP_*BRA | OP_ALT ... M A + (?()|) OP_*COND | OP_ALT M A + (?>|) OP_ONCE | OP_ALT ... [stack trace] M A + Or nothing, if trace is unnecessary +*/ + +static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode; +int private_data_ptr = 0; +int offset = 0; +int i, stacksize; +int repeat_ptr = 0, repeat_length = 0; +int repeat_type = 0, repeat_count = 0; +PCRE2_SPTR ccbegin; +PCRE2_SPTR matchingpath; +PCRE2_SPTR slot; +PCRE2_UCHAR bra = OP_BRA; +PCRE2_UCHAR ket; +assert_backtrack *assert; +BOOL has_alternatives; +BOOL needs_control_head = FALSE; +struct sljit_jump *jump; +struct sljit_jump *skip; +struct sljit_label *rmax_label = NULL; +struct sljit_jump *braminzero = NULL; + +PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); + +if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) + { + bra = *cc; + cc++; + opcode = *cc; + } + +opcode = *cc; +ccbegin = cc; +matchingpath = bracketend(cc) - 1 - LINK_SIZE; +ket = *matchingpath; +if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0) + { + repeat_ptr = PRIVATE_DATA(matchingpath); + repeat_length = PRIVATE_DATA(matchingpath + 1); + repeat_type = PRIVATE_DATA(matchingpath + 2); + repeat_count = PRIVATE_DATA(matchingpath + 3); + SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0); + if (repeat_type == OP_UPTO) + ket = OP_KETRMAX; + if (repeat_type == OP_MINUPTO) + ket = OP_KETRMIN; + } + +matchingpath = ccbegin + 1 + LINK_SIZE; +SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); +SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); +cc += GET(cc, 1); + +has_alternatives = *cc == OP_ALT; +if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) + { + SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3, + compile_time_checks_must_be_grouped_together); + has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE; + } + +if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) + opcode = OP_SCOND; + +if (opcode == OP_CBRA || opcode == OP_SCBRA) + { + /* Capturing brackets has a pre-allocated space. */ + offset = GET2(ccbegin, 1 + LINK_SIZE); + if (common->optimized_cbracket[offset] == 0) + { + private_data_ptr = OVECTOR_PRIV(offset); + offset <<= 1; + } + else + { + offset <<= 1; + private_data_ptr = OVECTOR(offset); + } + BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; + matchingpath += IMM2_SIZE; + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) + { + /* Other brackets simply allocate the next entry. */ + private_data_ptr = PRIVATE_DATA(ccbegin); + SLJIT_ASSERT(private_data_ptr != 0); + BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; + if (opcode == OP_ONCE) + BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head); + } + +/* Instructions before the first alternative. */ +stacksize = 0; +if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) + stacksize++; +if (bra == OP_BRAZERO) + stacksize++; + +if (stacksize > 0) + allocate_stack(common, stacksize); + +stacksize = 0; +if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + stacksize++; + } + +if (bra == OP_BRAZERO) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + +if (bra == OP_BRAMINZERO) + { + /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (ket != OP_KETRMIN) + { + free_stack(common, 1); + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + } + else if (opcode == OP_ONCE || opcode >= OP_SBRA) + { + jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + /* Nothing stored during the first run. */ + skip = JUMP(SLJIT_JUMP); + JUMPHERE(jump); + /* Checking zero-length iteration. */ + if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) + { + /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + } + else + { + /* Except when the whole stack frame must be saved. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2)); + } + JUMPHERE(skip); + } + else + { + jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + JUMPHERE(jump); + } + } + +if (repeat_type != 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count); + if (repeat_type == OP_EXACT) + rmax_label = LABEL(); + } + +if (ket == OP_KETRMIN) + BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); + +if (ket == OP_KETRMAX) + { + rmax_label = LABEL(); + if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label; + } + +/* Handling capturing brackets and alternatives. */ +if (opcode == OP_ONCE) + { + stacksize = 0; + if (needs_control_head) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + stacksize++; + } + + if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) + { + /* Neither capturing brackets nor recursions are found in the block. */ + if (ket == OP_KETRMIN) + { + stacksize += 2; + if (!needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + } + else + { + if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); + if (ket == OP_KETRMAX || has_alternatives) + stacksize++; + } + + if (stacksize > 0) + allocate_stack(common, stacksize); + + stacksize = 0; + if (needs_control_head) + { + stacksize++; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + + if (ket == OP_KETRMIN) + { + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); + } + else if (ket == OP_KETRMAX || has_alternatives) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + } + else + { + if (ket != OP_KET || has_alternatives) + stacksize++; + + stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1; + allocate_stack(common, stacksize); + + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + + stacksize = needs_control_head ? 1 : 0; + if (ket != OP_KET || has_alternatives) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); + stacksize++; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + } + else + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); + } + init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1); + } + } +else if (opcode == OP_CBRA || opcode == OP_SCBRA) + { + /* Saving the previous values. */ + if (common->optimized_cbracket[offset >> 1] != 0) + { + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); + allocate_stack(common, 2); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) + { + /* Saving the previous value. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } +else if (has_alternatives) + { + /* Pushing the starting string pointer. */ + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + +/* Generating code for the first alternative. */ +if (opcode == OP_COND || opcode == OP_SCOND) + { + if (*matchingpath == OP_CREF) + { + SLJIT_ASSERT(has_alternatives); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), + CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + matchingpath += 1 + IMM2_SIZE; + } + else if (*matchingpath == OP_DNCREF) + { + SLJIT_ASSERT(has_alternatives); + + i = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + slot += common->name_entry_size; + i--; + while (i-- > 0) + { + OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0); + slot += common->name_entry_size; + } + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); + matchingpath += 1 + 2 * IMM2_SIZE; + } + else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) + { + /* Never has other case. */ + BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; + SLJIT_ASSERT(!has_alternatives); + + if (*matchingpath == OP_TRUE) + { + stacksize = 1; + matchingpath++; + } + else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL) + stacksize = 0; + else if (*matchingpath == OP_RREF) + { + stacksize = GET2(matchingpath, 1); + if (common->currententry == NULL) + stacksize = 0; + else if (stacksize == RREF_ANY) + stacksize = 1; + else if (common->currententry->start == 0) + stacksize = stacksize == 0; + else + stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + + if (stacksize != 0) + matchingpath += 1 + IMM2_SIZE; + } + else + { + if (common->currententry == NULL || common->currententry->start == 0) + stacksize = 0; + else + { + stacksize = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + while (stacksize > 0) + { + if ((int)GET2(slot, 0) == i) + break; + slot += common->name_entry_size; + stacksize--; + } + } + + if (stacksize != 0) + matchingpath += 1 + 2 * IMM2_SIZE; + } + + /* The stacksize == 0 is a common "else" case. */ + if (stacksize == 0) + { + if (*cc == OP_ALT) + { + matchingpath = cc + 1 + LINK_SIZE; + cc += GET(cc, 1); + } + else + matchingpath = cc; + } + } + else + { + SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); + /* Similar code as PUSH_BACKTRACK macro. */ + assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + memset(assert, 0, sizeof(assert_backtrack)); + assert->common.cc = matchingpath; + BACKTRACK_AS(bracket_backtrack)->u.assert = assert; + matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); + } + } + +compile_matchingpath(common, matchingpath, cc, backtrack); +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + +if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + +if (opcode == OP_ONCE) + match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); + +if (opcode == OP_SCRIPT_RUN) + match_script_run_common(common, private_data_ptr, backtrack); + +stacksize = 0; +if (repeat_type == OP_MINUPTO) + { + /* We need to preserve the counter. TMP2 will be used below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); + stacksize++; + } +if (ket != OP_KET || bra != OP_BRA) + stacksize++; +if (offset != 0) + { + if (common->capture_last_ptr != 0) + stacksize++; + if (common->optimized_cbracket[offset >> 1] == 0) + stacksize += 2; + } +if (has_alternatives && opcode != OP_ONCE) + stacksize++; + +if (stacksize > 0) + allocate_stack(common, stacksize); + +stacksize = 0; +if (repeat_type == OP_MINUPTO) + { + /* TMP2 was set above. */ + OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); + stacksize++; + } + +if (ket != OP_KET || bra != OP_BRA) + { + if (ket != OP_KET) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + stacksize++; + } + +if (offset != 0) + stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); + +/* Skip and count the other alternatives. */ +i = 1; +while (*cc == OP_ALT) + { + cc += GET(cc, 1); + i++; + } + +if (has_alternatives) + { + if (opcode != OP_ONCE) + { + if (i <= 3) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + else + BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + } + if (ket != OP_KETRMAX) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); + } + +/* Must be after the matchingpath label. */ +if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) + { + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + } + +if (ket == OP_KETRMAX) + { + if (repeat_type != 0) + { + if (has_alternatives) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, rmax_label); + /* Drop STR_PTR for greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else if (opcode < OP_BRA || opcode >= OP_SBRA) + { + if (has_alternatives) + BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); + + /* Checking zero-length iteration. */ + if (opcode != OP_ONCE) + { + /* This case includes opcodes such as OP_SCRIPT_RUN. */ + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label); + /* Drop STR_PTR for greedy plus quantifier. */ + if (bra != OP_BRAZERO) + free_stack(common, 1); + } + else + /* TMP2 must contain the starting STR_PTR. */ + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); + } + else + JUMPTO(SLJIT_JUMP, rmax_label); + BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); + } + +if (repeat_type == OP_EXACT) + { + count_match(common); + OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, rmax_label); + } +else if (repeat_type == OP_UPTO) + { + /* We need to preserve the counter. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + } + +if (bra == OP_BRAZERO) + BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); + +if (bra == OP_BRAMINZERO) + { + /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ + JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); + if (braminzero != NULL) + { + JUMPHERE(braminzero); + /* We need to release the end pointer to perform the + backtrack for the zero-length iteration. When + framesize is < 0, OP_ONCE will do the release itself. */ + if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + } + else if (ket == OP_KETRMIN && opcode != OP_ONCE) + free_stack(common, 1); + } + /* Continue to the normal backtrack. */ + } + +if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) + count_match(common); + +cc += 1 + LINK_SIZE; + +if (opcode == OP_ONCE) + { + /* We temporarily encode the needs_control_head in the lowest bit. + Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns + the same value for small signed numbers (including negative numbers). */ + BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); + } +return cc + repeat_length; +} + +static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode; +int private_data_ptr; +int cbraprivptr = 0; +BOOL needs_control_head; +int framesize; +int stacksize; +int offset = 0; +BOOL zero = FALSE; +PCRE2_SPTR ccbegin = NULL; +int stack; /* Also contains the offset of control head. */ +struct sljit_label *loop = NULL; +struct jump_list *emptymatch = NULL; + +PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); +if (*cc == OP_BRAPOSZERO) + { + zero = TRUE; + cc++; + } + +opcode = *cc; +private_data_ptr = PRIVATE_DATA(cc); +SLJIT_ASSERT(private_data_ptr != 0); +BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; +switch(opcode) + { + case OP_BRAPOS: + case OP_SBRAPOS: + ccbegin = cc + 1 + LINK_SIZE; + break; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + offset = GET2(cc, 1 + LINK_SIZE); + /* This case cannot be optimized in the same was as + normal capturing brackets. */ + SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); + cbraprivptr = OVECTOR_PRIV(offset); + offset <<= 1; + ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + +framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); +BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; +if (framesize < 0) + { + if (offset != 0) + { + stacksize = 2; + if (common->capture_last_ptr != 0) + stacksize++; + } + else + stacksize = 1; + + if (needs_control_head) + stacksize++; + if (!zero) + stacksize++; + + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; + allocate_stack(common, stacksize); + if (framesize == no_frame) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); + + stack = 0; + if (offset != 0) + { + stack = 2; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + if (common->capture_last_ptr != 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); + stack = 3; + } + } + else + { + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + stack = 1; + } + + if (needs_control_head) + stack++; + if (!zero) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1); + if (needs_control_head) + { + stack--; + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); + } + } +else + { + stacksize = framesize + 1; + if (!zero) + stacksize++; + if (needs_control_head) + stacksize++; + if (offset == 0) + stacksize++; + BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; + + allocate_stack(common, stacksize); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + if (needs_control_head) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + + stack = 0; + if (!zero) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); + stack = 1; + } + if (needs_control_head) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); + stack++; + } + if (offset == 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); + stack++; + } + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); + init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize); + stack -= 1 + (offset == 0); + } + +if (offset != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); + +loop = LABEL(); +while (*cc != OP_KETRPOS) + { + backtrack->top = NULL; + backtrack->topbacktracks = NULL; + cc += GET(cc, 1); + + compile_matchingpath(common, ccbegin, cc, backtrack); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + + if (framesize < 0) + { + if (framesize == no_frame) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + if (offset != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + } + else + { + if (opcode == OP_SBRAPOS) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) + add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); + + if (!zero) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); + } + else + { + if (offset != 0) + { + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + if (opcode == OP_SBRAPOS) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0); + } + + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) + add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); + + if (!zero) + { + if (framesize < 0) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + } + } + + JUMPTO(SLJIT_JUMP, loop); + flush_stubs(common); + + compile_backtrackingpath(common, backtrack->top); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + set_jumps(backtrack->topbacktracks, LABEL()); + + if (framesize < 0) + { + if (offset != 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + else + { + if (offset != 0) + { + /* Last alternative. */ + if (*cc == OP_KETRPOS) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2)); + } + } + + if (*cc == OP_KETRPOS) + break; + ccbegin = cc + 1 + LINK_SIZE; + } + +/* We don't have to restore the control head in case of a failed match. */ + +backtrack->topbacktracks = NULL; +if (!zero) + { + if (framesize < 0) + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); + else /* TMP2 is set to [private_data_ptr] above. */ + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0)); + } + +/* None of them matched. */ +set_jumps(emptymatch, LABEL()); +count_match(common); +return cc + 1 + LINK_SIZE; +} + +static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end) +{ +int class_len; + +*opcode = *cc; +*exact = 0; + +if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) + { + cc++; + *type = OP_CHAR; + } +else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI) + { + cc++; + *type = OP_CHARI; + *opcode -= OP_STARI - OP_STAR; + } +else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO) + { + cc++; + *type = OP_NOT; + *opcode -= OP_NOTSTAR - OP_STAR; + } +else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI) + { + cc++; + *type = OP_NOTI; + *opcode -= OP_NOTSTARI - OP_STAR; + } +else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) + { + cc++; + *opcode -= OP_TYPESTAR - OP_STAR; + *type = OP_END; + } +else + { + SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); + *type = *opcode; + cc++; + class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0); + *opcode = cc[class_len - 1]; + + if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) + { + *opcode -= OP_CRSTAR - OP_STAR; + *end = cc + class_len; + + if (*opcode == OP_PLUS || *opcode == OP_MINPLUS) + { + *exact = 1; + *opcode -= OP_PLUS - OP_STAR; + } + } + else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) + { + *opcode -= OP_CRPOSSTAR - OP_POSSTAR; + *end = cc + class_len; + + if (*opcode == OP_POSPLUS) + { + *exact = 1; + *opcode = OP_POSSTAR; + } + } + else + { + SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); + *max = GET2(cc, (class_len + IMM2_SIZE)); + *exact = GET2(cc, class_len); + + if (*max == 0) + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSSTAR; + else + *opcode -= OP_CRRANGE - OP_STAR; + } + else + { + *max -= *exact; + if (*max == 0) + *opcode = OP_EXACT; + else if (*max == 1) + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSQUERY; + else + *opcode -= OP_CRRANGE - OP_QUERY; + } + else + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSUPTO; + else + *opcode -= OP_CRRANGE - OP_UPTO; + } + } + *end = cc + class_len + 2 * IMM2_SIZE; + } + return cc; + } + +switch(*opcode) + { + case OP_EXACT: + *exact = GET2(cc, 0); + cc += IMM2_SIZE; + break; + + case OP_PLUS: + case OP_MINPLUS: + *exact = 1; + *opcode -= OP_PLUS - OP_STAR; + break; + + case OP_POSPLUS: + *exact = 1; + *opcode = OP_POSSTAR; + break; + + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + *max = GET2(cc, 0); + cc += IMM2_SIZE; + break; + } + +if (*type == OP_END) + { + *type = *cc; + *end = next_opcode(common, cc); + cc++; + return cc; + } + +*end = cc + 1; +#ifdef SUPPORT_UNICODE +if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); +#endif +return cc; +} + +static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode; +PCRE2_UCHAR type; +sljit_u32 max = 0, exact; +sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1); +sljit_s32 early_fail_type; +BOOL charpos_enabled; +PCRE2_UCHAR charpos_char; +unsigned int charpos_othercasebit; +PCRE2_SPTR end; +jump_list *no_match = NULL; +jump_list *no_char1_match = NULL; +struct sljit_jump *jump = NULL; +struct sljit_label *label; +int private_data_ptr = PRIVATE_DATA(cc); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); +int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int tmp_base, tmp_offset; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +BOOL use_tmp; +#endif + +PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); + +early_fail_type = (early_fail_ptr & 0x7); +early_fail_ptr >>= 3; + +/* During recursion, these optimizations are disabled. */ +if (common->early_fail_start_ptr == 0) + { + early_fail_ptr = 0; + early_fail_type = type_skip; + } + +SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0 + || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr)); + +if (early_fail_type == type_fail) + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); + +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); + +if (type != OP_EXTUNI) + { + tmp_base = TMP3; + tmp_offset = 0; + } +else + { + tmp_base = SLJIT_MEM1(SLJIT_SP); + tmp_offset = POSSESSIVE0; + } + +/* Handle fixed part first. */ +if (exact > 1) + { + SLJIT_ASSERT(early_fail_ptr == 0); + + if (common->mode == PCRE2_JIT_COMPLETE +#ifdef SUPPORT_UNICODE + && !common->utf +#endif + && type != OP_ANYNL && type != OP_EXTUNI) + { + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + } +else if (exact == 1) + { + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + + if (early_fail_type == type_fail_range) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); + + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0); + } + } + +switch(opcode) + { + case OP_STAR: + case OP_UPTO: + SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR); + + if (type == OP_ANYNL || type == OP_EXTUNI) + { + SLJIT_ASSERT(private_data_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); + + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max); + + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); + if (opcode == OP_UPTO) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + jump = JUMP(SLJIT_ZERO); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); + } + + /* We cannot use TMP3 because of allocate_stack. */ + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + if (jump != NULL) + JUMPHERE(jump); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } +#ifdef SUPPORT_UNICODE + else if (type == OP_ALLANY && !common->invalid_utf) +#else + else if (type == OP_ALLANY) +#endif + { + if (opcode == OP_STAR) + { + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset0, STR_END, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } +#ifdef SUPPORT_UNICODE + else if (!common->utf) +#else + else +#endif + { + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + + if (common->mode == PCRE2_JIT_COMPLETE) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + } + else + { + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + JUMPHERE(jump); + } + + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } + } + + charpos_enabled = FALSE; + charpos_char = 0; + charpos_othercasebit = 0; + + if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) + { +#ifdef SUPPORT_UNICODE + charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); +#else + charpos_enabled = TRUE; +#endif + if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) + { + charpos_othercasebit = char_get_othercase_bit(common, end + 1); + if (charpos_othercasebit == 0) + charpos_enabled = FALSE; + } + + if (charpos_enabled) + { + charpos_char = end[1]; + /* Consume the OP_CHAR opcode. */ + end += 2; +#if PCRE2_CODE_UNIT_WIDTH == 8 + SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); + if ((charpos_othercasebit & 0x100) != 0) + charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; +#endif + if (charpos_othercasebit != 0) + charpos_char |= charpos_othercasebit; + + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; + } + } + + if (charpos_enabled) + { + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); + + /* Search the first instance of charpos_char. */ + jump = JUMP(SLJIT_JUMP); + label = LABEL(); + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); + } + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + JUMPHERE(jump); + + detect_partial_match(common, &backtrack->topbacktracks); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } + + /* Search the last instance of charpos_char. */ + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + detect_partial_match(common, &no_match); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + + if (opcode == OP_STAR) + { + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); + } + else + { + jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPHERE(jump); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + + set_jumps(no_match, LABEL()); + OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1)); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else + { + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR); + SLJIT_ASSERT(!use_tmp || tmp_base == TMP3); + + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); +#endif + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); +#endif + + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } + + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + set_jumps(no_char1_match, LABEL()); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + { + set_jumps(no_match, LABEL()); + if (use_tmp) + { + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + OP1(SLJIT_MOV, base, offset0, TMP3, 0); + } + else + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + } + else +#endif + { + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + } + + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_MINSTAR: + if (private_data_ptr == 0) + allocate_stack(common, 1); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + break; + + case OP_MINUPTO: + SLJIT_ASSERT(early_fail_ptr == 0); + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_QUERY: + case OP_MINQUERY: + SLJIT_ASSERT(early_fail_ptr == 0); + if (private_data_ptr == 0) + allocate_stack(common, 1); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (opcode == OP_QUERY) + compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + + case OP_EXACT: + break; + + case OP_POSSTAR: +#if defined SUPPORT_UNICODE + if (type == OP_ALLANY && !common->invalid_utf) +#else + if (type == OP_ALLANY) +#endif + { + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0); + break; + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match_to(common, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + if (early_fail_ptr != 0) + { + if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + } + break; + } +#endif + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + set_jumps(no_char1_match, LABEL()); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + break; + + case OP_POSUPTO: + SLJIT_ASSERT(early_fail_ptr == 0); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + detect_partial_match_to(common, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); + break; + } +#endif + + if (type == OP_ALLANY) + { + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + + if (common->mode == PCRE2_JIT_COMPLETE) + { + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + } + else + { + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + JUMPHERE(jump); + } + break; + } + + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + set_jumps(no_char1_match, LABEL()); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + break; + + case OP_POSQUERY: + SLJIT_ASSERT(early_fail_ptr == 0); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + +count_match(common); +return end; +} + +static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +if (*cc == OP_FAIL) + { + add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); + return cc + 1; + } + +if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0) + add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + +if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) + { + /* No need to check notempty conditions. */ + if (common->accept_label == NULL) + add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->accept_label); + return cc + 1; + } + +if (common->accept_label == NULL) + add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); +else + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); + } +else + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options)); + +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO)); +OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); +if (common->accept_label == NULL) + add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO)); +else + JUMPTO(SLJIT_ZERO, common->accept_label); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); +if (common->accept_label == NULL) + add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); +else + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); +return cc + 1; +} + +static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc) +{ +DEFINE_COMPILER; +int offset = GET2(cc, 1); +BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; + +/* Data will be discarded anyway... */ +if (common->currententry != NULL) + return cc + 1 + IMM2_SIZE; + +if (!optimized_cbracket) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset)); +offset <<= 1; +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); +if (!optimized_cbracket) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); +return cc + 1 + IMM2_SIZE; +} + +static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +PCRE2_UCHAR opcode = *cc; +PCRE2_SPTR ccend = cc + 1; + +if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || + opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) + ccend += 2 + cc[1]; + +PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); + +if (opcode == OP_SKIP) + { + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + return ccend; + } + +if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) + { + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + } + +return ccend; +} + +static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP }; + +static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +BOOL needs_control_head; +int size; + +PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); +common->then_trap = BACKTRACK_AS(then_trap_backtrack); +BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; +BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start); +BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head); + +size = BACKTRACK_AS(then_trap_backtrack)->framesize; +size = 3 + (size < 0 ? 0 : size); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); +allocate_stack(common, size); +if (size > 3) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); +else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); + +size = BACKTRACK_AS(then_trap_backtrack)->framesize; +if (size >= 0) + init_frame(common, cc, ccend, size - 1, 0); +} + +static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack; +BOOL has_then_trap = FALSE; +then_trap_backtrack *save_then_trap = NULL; + +SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); + +if (common->has_then && common->then_offsets[cc - common->start] != 0) + { + SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0); + has_then_trap = TRUE; + save_then_trap = common->then_trap; + /* Tail item on backtrack. */ + compile_then_trap_matchingpath(common, cc, ccend, parent); + } + +while (cc < ccend) + { + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_DOLL: + case OP_DOLLM: + case OP_CIRC: + case OP_CIRCM: + case OP_REVERSE: + cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + break; + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOTPROP: + case OP_PROP: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EXTUNI: + case OP_NOT: + case OP_NOTI: + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + break; + + case OP_SET_SOM: + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + cc++; + break; + + case OP_CHAR: + case OP_CHARI: + if (common->mode == PCRE2_JIT_COMPLETE) + cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + else + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + break; + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + cc = compile_iterator_matchingpath(common, cc, parent); + break; + + case OP_CLASS: + case OP_NCLASS: + if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent); + else + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + break; + +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + case OP_XCLASS: + if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) + cc = compile_iterator_matchingpath(common, cc, parent); + else + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + break; +#endif + + case OP_REF: + case OP_REFI: + if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) + cc = compile_ref_iterator_matchingpath(common, cc, parent); + else + { + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + cc += 1 + IMM2_SIZE; + } + break; + + case OP_DNREF: + case OP_DNREFI: + if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) + cc = compile_ref_iterator_matchingpath(common, cc, parent); + else + { + compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + cc += 1 + 2 * IMM2_SIZE; + } + break; + + case OP_RECURSE: + cc = compile_recurse_matchingpath(common, cc, parent); + break; + + case OP_CALLOUT: + case OP_CALLOUT_STR: + cc = compile_callout_matchingpath(common, cc, parent); + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); + break; + + case OP_BRAMINZERO: + PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); + cc = bracketend(cc + 1); + if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) + { + allocate_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + } + else + { + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); + } + BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); + count_match(common); + break; + + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRA: + case OP_CBRA: + case OP_COND: + case OP_SBRA: + case OP_SCBRA: + case OP_SCOND: + cc = compile_bracket_matchingpath(common, cc, parent); + break; + + case OP_BRAZERO: + if (cc[1] > OP_ASSERTBACK_NOT) + cc = compile_bracket_matchingpath(common, cc, parent); + else + { + PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); + cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); + } + break; + + case OP_BRAPOS: + case OP_CBRAPOS: + case OP_SBRAPOS: + case OP_SCBRAPOS: + case OP_BRAPOSZERO: + cc = compile_bracketpos_matchingpath(common, cc, parent); + break; + + case OP_MARK: + PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); + SLJIT_ASSERT(common->mark_ptr != 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + allocate_stack(common, common->has_skip_arg ? 5 : 1); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + if (common->has_skip_arg) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); + } + cc += 1 + 2 + cc[1]; + break; + + case OP_PRUNE: + case OP_PRUNE_ARG: + case OP_SKIP: + case OP_SKIP_ARG: + case OP_THEN: + case OP_THEN_ARG: + case OP_COMMIT: + case OP_COMMIT_ARG: + cc = compile_control_verb_matchingpath(common, cc, parent); + break; + + case OP_FAIL: + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + cc = compile_fail_accept_matchingpath(common, cc, parent); + break; + + case OP_CLOSE: + cc = compile_close_matchingpath(common, cc); + break; + + case OP_SKIPZERO: + cc = bracketend(cc + 1); + break; + + default: + SLJIT_UNREACHABLE(); + return; + } + if (cc == NULL) + return; + } + +if (has_then_trap) + { + /* Head item on backtrack. */ + PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); + BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; + BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap; + common->then_trap = save_then_trap; + } +SLJIT_ASSERT(cc == ccend); +} + +#undef PUSH_BACKTRACK +#undef PUSH_BACKTRACK_NOVALUE +#undef BACKTRACK_AS + +#define COMPILE_BACKTRACKINGPATH(current) \ + do \ + { \ + compile_backtrackingpath(common, (current)); \ + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ + return; \ + } \ + while (0) + +#define CURRENT_AS(type) ((type *)current) + +static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = current->cc; +PCRE2_UCHAR opcode; +PCRE2_UCHAR type; +sljit_u32 max = 0, exact; +struct sljit_label *label = NULL; +struct sljit_jump *jump = NULL; +jump_list *jumplist = NULL; +PCRE2_SPTR end; +int private_data_ptr = PRIVATE_DATA(cc); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); +int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); + +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); + +switch(opcode) + { + case OP_STAR: + case OP_UPTO: + if (type == OP_ANYNL || type == OP_EXTUNI) + { + SLJIT_ASSERT(private_data_ptr == 0); + set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + } + else + { + if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled) + { + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, TMP2, 0, base, offset1); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + label = LABEL(); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); + move_back(common, NULL, TRUE); + CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label); + } + else + { + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); + move_back(common, NULL, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + } + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 2); + } + break; + + case OP_MINSTAR: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + + case OP_MINUPTO: + OP1(SLJIT_MOV, TMP1, 0, base, offset1); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); + + OP1(SLJIT_MOV, base, offset1, TMP1, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + + set_jumps(jumplist, LABEL()); + if (private_data_ptr == 0) + free_stack(common, 2); + break; + + case OP_QUERY: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); + jump = JUMP(SLJIT_JUMP); + set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + + case OP_MINQUERY: + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); + jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); + set_jumps(jumplist, LABEL()); + JUMPHERE(jump); + if (private_data_ptr == 0) + free_stack(common, 1); + break; + + case OP_EXACT: + case OP_POSSTAR: + case OP_POSQUERY: + case OP_POSUPTO: + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + +set_jumps(current->topbacktracks, LABEL()); +} + +static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = current->cc; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +PCRE2_UCHAR type; + +type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; + +if ((type & 0x1) == 0) + { + /* Maximize case. */ + set_jumps(current->topbacktracks, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); + return; + } + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); +set_jumps(current->topbacktracks, LABEL()); +free_stack(common, ref ? 2 : 3); +} + +static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +recurse_entry *entry; + +if (!CURRENT_AS(recurse_backtrack)->inlined_pattern) + { + entry = CURRENT_AS(recurse_backtrack)->entry; + if (entry->backtrack_label == NULL) + add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL)); + else + JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath); + } +else + compile_backtrackingpath(common, current->top); + +set_jumps(current->topbacktracks, LABEL()); +} + +static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = current->cc; +PCRE2_UCHAR bra = OP_BRA; +struct sljit_jump *brajump = NULL; + +SLJIT_ASSERT(*cc != OP_BRAMINZERO); +if (*cc == OP_BRAZERO) + { + bra = *cc; + cc++; + } + +if (bra == OP_BRAZERO) + { + SLJIT_ASSERT(current->topbacktracks == NULL); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + +if (CURRENT_AS(assert_backtrack)->framesize < 0) + { + set_jumps(current->topbacktracks, LABEL()); + + if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); + free_stack(common, 1); + } + return; + } + +if (bra == OP_BRAZERO) + { + if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); + free_stack(common, 1); + return; + } + free_stack(common, 1); + brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + } + +if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0); + + set_jumps(current->topbacktracks, LABEL()); + } +else + set_jumps(current->topbacktracks, LABEL()); + +if (bra == OP_BRAZERO) + { + /* We know there is enough place on the stack. */ + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); + JUMPHERE(brajump); + } +} + +static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +int opcode, stacksize, alt_count, alt_max; +int offset = 0; +int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; +int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; +PCRE2_SPTR cc = current->cc; +PCRE2_SPTR ccbegin; +PCRE2_SPTR ccprev; +PCRE2_UCHAR bra = OP_BRA; +PCRE2_UCHAR ket; +assert_backtrack *assert; +BOOL has_alternatives; +BOOL needs_control_head = FALSE; +struct sljit_jump *brazero = NULL; +struct sljit_jump *next_alt = NULL; +struct sljit_jump *once = NULL; +struct sljit_jump *cond = NULL; +struct sljit_label *rmin_label = NULL; +struct sljit_label *exact_label = NULL; +struct sljit_put_label *put_label = NULL; + +if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) + { + bra = *cc; + cc++; + } + +opcode = *cc; +ccbegin = bracketend(cc) - 1 - LINK_SIZE; +ket = *ccbegin; +if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0) + { + repeat_ptr = PRIVATE_DATA(ccbegin); + repeat_type = PRIVATE_DATA(ccbegin + 2); + repeat_count = PRIVATE_DATA(ccbegin + 3); + SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0); + if (repeat_type == OP_UPTO) + ket = OP_KETRMAX; + if (repeat_type == OP_MINUPTO) + ket = OP_KETRMIN; + } +ccbegin = cc; +cc += GET(cc, 1); +has_alternatives = *cc == OP_ALT; +if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) + has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; +if (opcode == OP_CBRA || opcode == OP_SCBRA) + offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; +if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) + opcode = OP_SCOND; + +alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; + +/* Decoding the needs_control_head in framesize. */ +if (opcode == OP_ONCE) + { + needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0; + CURRENT_AS(bracket_backtrack)->u.framesize >>= 1; + } + +if (ket != OP_KET && repeat_type != 0) + { + /* TMP1 is used in OP_KETRMIN below. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + if (repeat_type == OP_UPTO) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); + else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); + } + +if (ket == OP_KETRMAX) + { + if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + } +else if (ket == OP_KETRMIN) + { + if (bra != OP_BRAMINZERO) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (repeat_type != 0) + { + /* TMP1 was set a few lines above. */ + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + /* Drop STR_PTR for non-greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else if (opcode >= OP_SBRA || opcode == OP_ONCE) + { + /* Checking zero-length iteration. */ + if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + } + /* Drop STR_PTR for non-greedy plus quantifier. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + } + else + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + } + rmin_label = LABEL(); + if (repeat_type != 0) + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + } +else if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } +else if (repeat_type == OP_EXACT) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + exact_label = LABEL(); + } + +if (offset != 0) + { + if (common->capture_last_ptr != 0) + { + SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); + free_stack(common, 3); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); + } + else if (common->optimized_cbracket[offset >> 1] == 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); + } + } + +if (SLJIT_UNLIKELY(opcode == OP_ONCE)) + { + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw)); + } + once = JUMP(SLJIT_JUMP); + } +else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) + { + if (has_alternatives) + { + /* Always exactly one alternative. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + + alt_max = 2; + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + } +else if (has_alternatives) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + + if (alt_max > 3) + { + sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); + + SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label); + sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + else + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + +COMPILE_BACKTRACKINGPATH(current->top); +if (current->topbacktracks) + set_jumps(current->topbacktracks, LABEL()); + +if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) + { + /* Conditional block always has at most one alternative. */ + if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) + { + SLJIT_ASSERT(has_alternatives); + assert = CURRENT_AS(bracket_backtrack)->u.assert; + if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + } + cond = JUMP(SLJIT_JUMP); + set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); + } + else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) + { + SLJIT_ASSERT(has_alternatives); + cond = JUMP(SLJIT_JUMP); + set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); + } + else + SLJIT_ASSERT(!has_alternatives); + } + +if (has_alternatives) + { + alt_count = 1; + do + { + current->top = NULL; + current->topbacktracks = NULL; + current->nextbacktracks = NULL; + /* Conditional blocks always have an additional alternative, even if it is empty. */ + if (*cc == OP_ALT) + { + ccprev = cc + 1 + LINK_SIZE; + cc += GET(cc, 1); + if (opcode != OP_COND && opcode != OP_SCOND) + { + if (opcode != OP_ONCE) + { + if (private_data_ptr != 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + } + else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); + } + compile_matchingpath(common, ccprev, cc, current); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return; + + if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + if (opcode == OP_SCRIPT_RUN) + match_script_run_common(common, private_data_ptr, current); + } + + /* Instructions after the current alternative is successfully matched. */ + /* There is a similar code in compile_bracket_matchingpath. */ + if (opcode == OP_ONCE) + match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); + + stacksize = 0; + if (repeat_type == OP_MINUPTO) + { + /* We need to preserve the counter. TMP2 will be used below. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); + stacksize++; + } + if (ket != OP_KET || bra != OP_BRA) + stacksize++; + if (offset != 0) + { + if (common->capture_last_ptr != 0) + stacksize++; + if (common->optimized_cbracket[offset >> 1] == 0) + stacksize += 2; + } + if (opcode != OP_ONCE) + stacksize++; + + if (stacksize > 0) + allocate_stack(common, stacksize); + + stacksize = 0; + if (repeat_type == OP_MINUPTO) + { + /* TMP2 was set above. */ + OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); + stacksize++; + } + + if (ket != OP_KET || bra != OP_BRA) + { + if (ket != OP_KET) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + stacksize++; + } + + if (offset != 0) + stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); + + if (opcode != OP_ONCE) + { + if (alt_max <= 3) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); + else + put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + } + + if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) + { + /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ + SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + } + + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); + + if (opcode != OP_ONCE) + { + if (alt_max <= 3) + { + JUMPHERE(next_alt); + alt_count++; + if (alt_count < alt_max) + { + SLJIT_ASSERT(alt_count == 2 && alt_max == 3); + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1); + } + } + else + { + sljit_set_put_label(put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + } + + COMPILE_BACKTRACKINGPATH(current->top); + if (current->topbacktracks) + set_jumps(current->topbacktracks, LABEL()); + SLJIT_ASSERT(!current->nextbacktracks); + } + while (*cc == OP_ALT); + + if (cond != NULL) + { + SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); + assert = CURRENT_AS(bracket_backtrack)->u.assert; + if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) + { + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0); + } + JUMPHERE(cond); + } + + /* Free the STR_PTR. */ + if (private_data_ptr == 0) + free_stack(common, 1); + } + +if (offset != 0) + { + /* Using both tmp register is better for instruction scheduling. */ + if (common->optimized_cbracket[offset >> 1] != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + } +else if (opcode == OP_ONCE) + { + cc = ccbegin + GET(ccbegin, 1); + stacksize = needs_control_head ? 1 : 0; + + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + { + /* Reset head and drop saved frame. */ + stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1); + } + else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) + { + /* The STR_PTR must be released. */ + stacksize++; + } + + if (stacksize > 0) + free_stack(common, stacksize); + + JUMPHERE(once); + /* Restore previous private_data_ptr */ + if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1)); + else if (ket == OP_KETRMIN) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + /* See the comment below. */ + free_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + } + } + +if (repeat_type == OP_EXACT) + { + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); + CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); + } +else if (ket == OP_KETRMAX) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (bra != OP_BRAZERO) + free_stack(common, 1); + + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); + JUMPHERE(brazero); + free_stack(common, 1); + } + } +else if (ket == OP_KETRMIN) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + /* OP_ONCE removes everything in case of a backtrack, so we don't + need to explicitly release the STR_PTR. The extra release would + affect badly the free_stack(2) above. */ + if (opcode != OP_ONCE) + free_stack(common, 1); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); + if (opcode == OP_ONCE) + free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); + else if (bra == OP_BRAMINZERO) + free_stack(common, 1); + } +else if (bra == OP_BRAZERO) + { + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); + JUMPHERE(brazero); + } +} + +static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +int offset; +struct sljit_jump *jump; + +if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) + { + if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) + { + offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); + if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); + } + set_jumps(current->topbacktracks, LABEL()); + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); + return; + } + +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); +add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw)); + +if (current->topbacktracks) + { + jump = JUMP(SLJIT_JUMP); + set_jumps(current->topbacktracks, LABEL()); + /* Drop the stack frame. */ + free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); + JUMPHERE(jump); + } +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1)); +} + +static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +assert_backtrack backtrack; + +current->top = NULL; +current->topbacktracks = NULL; +current->nextbacktracks = NULL; +if (current->cc[1] > OP_ASSERTBACK_NOT) + { + /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ + compile_bracket_matchingpath(common, current->cc, current); + compile_bracket_backtrackingpath(common, current->top); + } +else + { + memset(&backtrack, 0, sizeof(backtrack)); + backtrack.common.cc = current->cc; + backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; + /* Manual call of compile_assert_matchingpath. */ + compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); + } +SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); +} + +static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +PCRE2_UCHAR opcode = *current->cc; +struct sljit_label *loop; +struct sljit_jump *jump; + +if (opcode == OP_THEN || opcode == OP_THEN_ARG) + { + if (common->then_trap != NULL) + { + SLJIT_ASSERT(common->control_head_ptr != 0); + + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); + jump = JUMP(SLJIT_JUMP); + + loop = LABEL(); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + JUMPHERE(jump); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop); + add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); + return; + } + else if (!common->local_quit_available && common->in_positive_assertion) + { + add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP)); + return; + } + } + +if (common->local_quit_available) + { + /* Abort match with a fail. */ + if (common->quit_label == NULL) + add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->quit_label); + return; + } + +if (opcode == OP_SKIP_ARG) + { + SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); + + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); + add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); + return; + } + +if (opcode == OP_SKIP) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +else + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0); +add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); +} + +static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +int size; + +if (CURRENT_AS(then_trap_backtrack)->then_trap) + { + common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap; + return; + } + +size = CURRENT_AS(then_trap_backtrack)->framesize; +size = 3 + (size < 0 ? 0 : size); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3)); +free_stack(common, size); +jump = JUMP(SLJIT_JUMP); + +set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); +/* STACK_TOP is set by THEN. */ +if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) + { + add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw)); + } +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); +free_stack(common, 3); + +JUMPHERE(jump); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); +} + +static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +then_trap_backtrack *save_then_trap = common->then_trap; + +while (current) + { + if (current->nextbacktracks != NULL) + set_jumps(current->nextbacktracks, LABEL()); + switch(*current->cc) + { + case OP_SET_SOM: + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0); + break; + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEEXACT: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSUPTO: + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: +#endif + compile_iterator_backtrackingpath(common, current); + break; + + case OP_REF: + case OP_REFI: + case OP_DNREF: + case OP_DNREFI: + compile_ref_iterator_backtrackingpath(common, current); + break; + + case OP_RECURSE: + compile_recurse_backtrackingpath(common, current); + break; + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + compile_assert_backtrackingpath(common, current); + break; + + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_BRA: + case OP_CBRA: + case OP_COND: + case OP_SBRA: + case OP_SCBRA: + case OP_SCOND: + compile_bracket_backtrackingpath(common, current); + break; + + case OP_BRAZERO: + if (current->cc[1] > OP_ASSERTBACK_NOT) + compile_bracket_backtrackingpath(common, current); + else + compile_assert_backtrackingpath(common, current); + break; + + case OP_BRAPOS: + case OP_CBRAPOS: + case OP_SBRAPOS: + case OP_SCBRAPOS: + case OP_BRAPOSZERO: + compile_bracketpos_backtrackingpath(common, current); + break; + + case OP_BRAMINZERO: + compile_braminzero_backtrackingpath(common, current); + break; + + case OP_MARK: + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0)); + if (common->has_skip_arg) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + free_stack(common, common->has_skip_arg ? 5 : 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); + if (common->has_skip_arg) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); + break; + + case OP_THEN: + case OP_THEN_ARG: + case OP_PRUNE: + case OP_PRUNE_ARG: + case OP_SKIP: + case OP_SKIP_ARG: + compile_control_verb_backtrackingpath(common, current); + break; + + case OP_COMMIT: + case OP_COMMIT_ARG: + if (!common->local_quit_available) + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + if (common->quit_label == NULL) + add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); + else + JUMPTO(SLJIT_JUMP, common->quit_label); + break; + + case OP_CALLOUT: + case OP_CALLOUT_STR: + case OP_FAIL: + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: + set_jumps(current->topbacktracks, LABEL()); + break; + + case OP_THEN_TRAP: + /* A virtual opcode for then traps. */ + compile_then_trap_backtrackingpath(common, current); + break; + + default: + SLJIT_UNREACHABLE(); + break; + } + current = current->prev; + } +common->then_trap = save_then_trap; +} + +static SLJIT_INLINE void compile_recurse(compiler_common *common) +{ +DEFINE_COMPILER; +PCRE2_SPTR cc = common->start + common->currententry->start; +PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); +PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE); +BOOL needs_control_head; +BOOL has_quit; +BOOL has_accept; +int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept); +int alt_count, alt_max, local_size; +backtrack_common altbacktrack; +jump_list *match = NULL; +struct sljit_jump *next_alt = NULL; +struct sljit_jump *accept_exit = NULL; +struct sljit_label *quit; +struct sljit_put_label *put_label = NULL; + +/* Recurse captures then. */ +common->then_trap = NULL; + +SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); + +alt_max = no_alternatives(cc); +alt_count = 0; + +/* Matching path. */ +SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0); +common->currententry->entry_label = LABEL(); +set_jumps(common->currententry->entry_calls, common->currententry->entry_label); + +sljit_emit_fast_enter(compiler, TMP2, 0); +count_match(common); + +local_size = (alt_max > 1) ? 2 : 1; + +/* (Reversed) stack layout: + [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */ + +allocate_stack(common, private_data_size + local_size); +/* Save return address. */ +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0); + +copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit); + +/* This variable is saved and restored all time when we enter or exit from a recursive context. */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); + +if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + +if (alt_max > 1) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + +memset(&altbacktrack, 0, sizeof(backtrack_common)); +common->quit_label = NULL; +common->accept_label = NULL; +common->quit = NULL; +common->accept = NULL; +altbacktrack.cc = ccbegin; +cc += GET(cc, 1); +while (1) + { + altbacktrack.top = NULL; + altbacktrack.topbacktracks = NULL; + + if (altbacktrack.cc != ccbegin) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + + compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return; + + allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); + + if (alt_max > 1 || has_accept) + { + if (alt_max > 3) + put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); + } + + add_jump(compiler, &match, JUMP(SLJIT_JUMP)); + + if (alt_count == 0) + { + /* Backtracking path entry. */ + SLJIT_ASSERT(common->currententry->backtrack_label == NULL); + common->currententry->backtrack_label = LABEL(); + set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label); + + sljit_emit_fast_enter(compiler, TMP1, 0); + + if (has_accept) + accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); + + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + /* Save return address. */ + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0); + + copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); + + if (alt_max > 1) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + free_stack(common, 2); + + if (alt_max > 3) + { + sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); + sljit_set_put_label(put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + else + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + } + else + free_stack(common, has_accept ? 2 : 1); + } + else if (alt_max > 3) + { + sljit_set_put_label(put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } + else + { + JUMPHERE(next_alt); + if (alt_count + 1 < alt_max) + { + SLJIT_ASSERT(alt_count == 1 && alt_max == 3); + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1); + } + } + + alt_count++; + + compile_backtrackingpath(common, altbacktrack.top); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return; + set_jumps(altbacktrack.topbacktracks, LABEL()); + + if (*cc != OP_ALT) + break; + + altbacktrack.cc = cc + 1 + LINK_SIZE; + cc += GET(cc, 1); + } + +/* No alternative is matched. */ + +quit = LABEL(); + +copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); +free_stack(common, private_data_size + local_size); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); + +if (common->quit != NULL) + { + SLJIT_ASSERT(has_quit); + + set_jumps(common->quit, LABEL()); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit); + JUMPTO(SLJIT_JUMP, quit); + } + +if (has_accept) + { + JUMPHERE(accept_exit); + free_stack(common, 2); + + /* Save return address. */ + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0); + + copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit); + + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); + free_stack(common, private_data_size + local_size); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); + } + +if (common->accept != NULL) + { + SLJIT_ASSERT(has_accept); + + set_jumps(common->accept, LABEL()); + + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); + OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0); + + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); + } + +set_jumps(match, LABEL()); + +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + +copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); +OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); +} + +#undef COMPILE_BACKTRACKINGPATH +#undef CURRENT_AS + +#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \ + (PCRE2_JIT_INVALID_UTF) + +static int jit_compile(pcre2_code *code, sljit_u32 mode) +{ +pcre2_real_code *re = (pcre2_real_code *)code; +struct sljit_compiler *compiler; +backtrack_common rootbacktrack; +compiler_common common_data; +compiler_common *common = &common_data; +const sljit_u8 *tables = re->tables; +void *allocator_data = &re->memctl; +int private_data_size; +PCRE2_SPTR ccend; +executable_functions *functions; +void *executable_func; +sljit_uw executable_size; +sljit_uw total_length; +struct sljit_label *mainloop_label = NULL; +struct sljit_label *continue_match_label; +struct sljit_label *empty_match_found_label = NULL; +struct sljit_label *empty_match_backtrack_label = NULL; +struct sljit_label *reset_match_label; +struct sljit_label *quit_label; +struct sljit_jump *jump; +struct sljit_jump *minlength_check_failed = NULL; +struct sljit_jump *empty_match = NULL; +struct sljit_jump *end_anchor_failed = NULL; +jump_list *reqcu_not_found = NULL; + +SLJIT_ASSERT(tables); + +#if HAS_VIRTUAL_REGISTERS == 1 +SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0); +#elif HAS_VIRTUAL_REGISTERS == 0 +SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0); +#else +#error "Invalid value for HAS_VIRTUAL_REGISTERS" +#endif + +memset(&rootbacktrack, 0, sizeof(backtrack_common)); +memset(common, 0, sizeof(compiler_common)); +common->re = re; +common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); +rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size; + +#ifdef SUPPORT_UNICODE +common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0; +#endif /* SUPPORT_UNICODE */ +mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS; + +common->start = rootbacktrack.cc; +common->read_only_data_head = NULL; +common->fcc = tables + fcc_offset; +common->lcc = (sljit_sw)(tables + lcc_offset); +common->mode = mode; +common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY); +common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY); +common->nltype = NLTYPE_FIXED; +switch(re->newline_convention) + { + case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break; + case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break; + case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break; + case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; + case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; + case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break; + default: return PCRE2_ERROR_INTERNAL; + } +common->nlmax = READ_CHAR_MAX; +common->nlmin = 0; +if (re->bsr_convention == PCRE2_BSR_UNICODE) + common->bsr_nltype = NLTYPE_ANY; +else if (re->bsr_convention == PCRE2_BSR_ANYCRLF) + common->bsr_nltype = NLTYPE_ANYCRLF; +else + { +#ifdef BSR_ANYCRLF + common->bsr_nltype = NLTYPE_ANYCRLF; +#else + common->bsr_nltype = NLTYPE_ANY; +#endif + } +common->bsr_nlmax = READ_CHAR_MAX; +common->bsr_nlmin = 0; +common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0; +common->ctypes = (sljit_sw)(tables + ctypes_offset); +common->name_count = re->name_count; +common->name_entry_size = re->name_entry_size; +common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0; +common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0; +#ifdef SUPPORT_UNICODE +/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ +common->utf = (re->overall_options & PCRE2_UTF) != 0; +common->ucp = (re->overall_options & PCRE2_UCP) != 0; +if (common->utf) + { + if (common->nltype == NLTYPE_ANY) + common->nlmax = 0x2029; + else if (common->nltype == NLTYPE_ANYCRLF) + common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; + else + { + /* We only care about the first newline character. */ + common->nlmax = common->newline & 0xff; + } + + if (common->nltype == NLTYPE_FIXED) + common->nlmin = common->newline & 0xff; + else + common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; + + if (common->bsr_nltype == NLTYPE_ANY) + common->bsr_nlmax = 0x2029; + else + common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; + common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; + } +else + common->invalid_utf = FALSE; +#endif /* SUPPORT_UNICODE */ +ccend = bracketend(common->start); + +/* Calculate the local space size on the stack. */ +common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); +common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data); +if (!common->optimized_cbracket) + return PCRE2_ERROR_NOMEMORY; +#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 +memset(common->optimized_cbracket, 0, re->top_bracket + 1); +#else +memset(common->optimized_cbracket, 1, re->top_bracket + 1); +#endif + +SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); +#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 +common->capture_last_ptr = common->ovector_start; +common->ovector_start += sizeof(sljit_sw); +#endif +if (!check_opcode_types(common, common->start, ccend)) + { + SLJIT_FREE(common->optimized_cbracket, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +/* Checking flags and updating ovector_start. */ +if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + common->req_char_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +if (mode != PCRE2_JIT_COMPLETE) + { + common->start_used_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + if (mode == PCRE2_JIT_PARTIAL_SOFT) + { + common->hit_start = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + } +if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0) + { + common->match_end_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD +common->control_head_ptr = 1; +#endif +if (common->control_head_ptr != 0) + { + common->control_head_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } +if (common->has_set_som) + { + /* Saving the real start pointer is necessary. */ + common->start_ptr = common->ovector_start; + common->ovector_start += sizeof(sljit_sw); + } + +/* Aligning ovector to even number of sljit words. */ +if ((common->ovector_start & sizeof(sljit_sw)) != 0) + common->ovector_start += sizeof(sljit_sw); + +if (common->start_ptr == 0) + common->start_ptr = OVECTOR(0); + +/* Capturing brackets cannot be optimized if callouts are allowed. */ +if (common->capture_last_ptr != 0) + memset(common->optimized_cbracket, 0, re->top_bracket + 1); + +SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); +common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); + +total_length = ccend - common->start; +common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); +if (!common->private_data_ptrs) + { + SLJIT_FREE(common->optimized_cbracket, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } +memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); + +private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); +set_private_data_ptrs(common, &private_data_size, ccend); +if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) + detect_early_fail(common, common->start, &private_data_size, 0, 0); + +SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); + +if (private_data_size > SLJIT_MAX_LOCAL_SIZE) + { + SLJIT_FREE(common->private_data_ptrs, allocator_data); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +if (common->has_then) + { + common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); + memset(common->then_offsets, 0, total_length); + set_then_offsets(common, common->start, NULL); + } + +compiler = sljit_create_compiler(allocator_data); +if (!compiler) + { + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } +common->compiler = compiler; + +/* Main pcre_jit_exec entry. */ +sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size); + +/* Register init. */ +reset_ovector(common, (re->top_bracket + 1) * 2); +if (common->req_char_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0); + +OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); +OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); +OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end)); +OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); + +if (common->early_fail_start_ptr < common->early_fail_end_ptr) + reset_early_fail(common); + +if (mode == PCRE2_JIT_PARTIAL_SOFT) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); +if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); +if (common->control_head_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); + +/* Main part of the matching */ +if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + mainloop_label = mainloop_entry(common); + continue_match_label = LABEL(); + /* Forward search if possible. */ + if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common)) + ; + else if ((re->flags & PCRE2_FIRSTSET) != 0) + fast_forward_first_char(common); + else if ((re->flags & PCRE2_STARTLINE) != 0) + fast_forward_newline(common); + else if ((re->flags & PCRE2_FIRSTMAPSET) != 0) + fast_forward_start_bits(common); + } + } +else + continue_match_label = LABEL(); + +if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) + { + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength)); + minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); + } +if (common->req_char_ptr != 0) + reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0); + +/* Store the current STR_PTR in OVECTOR(0). */ +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); +/* Copy the limit of allowed recursions. */ +OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); +if (common->capture_last_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0); +if (common->fast_forward_bc_ptr != NULL) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0); + +if (common->start_ptr != OVECTOR(0)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); + +/* Copy the beginning of the string. */ +if (mode == PCRE2_JIT_PARTIAL_SOFT) + { + jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + JUMPHERE(jump); + } +else if (mode == PCRE2_JIT_PARTIAL_HARD) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + +compile_matchingpath(common, common->start, ccend, &rootbacktrack); +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + sljit_free_compiler(compiler); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +if ((re->overall_options & PCRE2_ENDANCHORED) != 0) + end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0); + +if (common->might_be_empty) + { + empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + empty_match_found_label = LABEL(); + } + +common->accept_label = LABEL(); +if (common->accept != NULL) + set_jumps(common->accept, common->accept_label); + +/* This means we have a match. Update the ovector. */ +copy_ovector(common, re->top_bracket + 1); +common->quit_label = common->abort_label = LABEL(); +if (common->quit != NULL) + set_jumps(common->quit, common->quit_label); +if (common->abort != NULL) + set_jumps(common->abort, common->abort_label); +if (minlength_check_failed != NULL) + SET_LABEL(minlength_check_failed, common->abort_label); + +sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); +sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); + +if (common->failed_match != NULL) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + set_jumps(common->failed_match, LABEL()); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + JUMPTO(SLJIT_JUMP, common->abort_label); + } + +if ((re->overall_options & PCRE2_ENDANCHORED) != 0) + JUMPHERE(end_anchor_failed); + +if (mode != PCRE2_JIT_COMPLETE) + { + common->partialmatchlabel = LABEL(); + set_jumps(common->partialmatch, common->partialmatchlabel); + return_with_partial_match(common, common->quit_label); + } + +if (common->might_be_empty) + empty_match_backtrack_label = LABEL(); +compile_backtrackingpath(common, rootbacktrack.top); +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + sljit_free_compiler(compiler); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +SLJIT_ASSERT(rootbacktrack.prev == NULL); +reset_match_label = LABEL(); + +if (mode == PCRE2_JIT_PARTIAL_SOFT) + { + /* Update hit_start only in the first time. */ + jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0); + JUMPHERE(jump); + } + +/* Check we have remaining characters. */ +if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + } + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), + (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr); + +if ((re->overall_options & PCRE2_ANCHORED) == 0) + { + if (common->ff_newline_shortcut != NULL) + { + /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */ + if ((re->overall_options & PCRE2_FIRSTLINE) == 0) + { + if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); + CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + } + else + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); + } + } + else + CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label); + } + +/* No more remaining characters. */ +if (reqcu_not_found != NULL) + set_jumps(reqcu_not_found, LABEL()); + +if (mode == PCRE2_JIT_PARTIAL_SOFT) + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); + +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); +JUMPTO(SLJIT_JUMP, common->quit_label); + +flush_stubs(common); + +if (common->might_be_empty) + { + JUMPHERE(empty_match); + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); + JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label); + OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); + JUMPTO(SLJIT_ZERO, empty_match_found_label); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); + JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); + } + +common->fast_forward_bc_ptr = NULL; +common->early_fail_start_ptr = 0; +common->early_fail_end_ptr = 0; +common->currententry = common->entries; +common->local_quit_available = TRUE; +quit_label = common->quit_label; +while (common->currententry != NULL) + { + /* Might add new entries. */ + compile_recurse(common); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + { + sljit_free_compiler(compiler); + SLJIT_FREE(common->optimized_cbracket, allocator_data); + SLJIT_FREE(common->private_data_ptrs, allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + flush_stubs(common); + common->currententry = common->currententry->next; + } +common->local_quit_available = FALSE; +common->quit_label = quit_label; + +/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ +/* This is a (really) rare case. */ +set_jumps(common->stackalloc, LABEL()); +/* RETURN_ADDR is not a saved register. */ +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); + +SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); + +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); +OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); +OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); +OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0); + +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); + +jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); +OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + +/* Allocation failed. */ +JUMPHERE(jump); +/* We break the return address cache here, but this is a really rare case. */ +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT); +JUMPTO(SLJIT_JUMP, common->quit_label); + +/* Call limit reached. */ +set_jumps(common->calllimit, LABEL()); +OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT); +JUMPTO(SLJIT_JUMP, common->quit_label); + +if (common->revertframes != NULL) + { + set_jumps(common->revertframes, LABEL()); + do_revertframes(common); + } +if (common->wordboundary != NULL) + { + set_jumps(common->wordboundary, LABEL()); + check_wordboundary(common); + } +if (common->anynewline != NULL) + { + set_jumps(common->anynewline, LABEL()); + check_anynewline(common); + } +if (common->hspace != NULL) + { + set_jumps(common->hspace, LABEL()); + check_hspace(common); + } +if (common->vspace != NULL) + { + set_jumps(common->vspace, LABEL()); + check_vspace(common); + } +if (common->casefulcmp != NULL) + { + set_jumps(common->casefulcmp, LABEL()); + do_casefulcmp(common); + } +if (common->caselesscmp != NULL) + { + set_jumps(common->caselesscmp, LABEL()); + do_caselesscmp(common); + } +if (common->reset_match != NULL) + { + set_jumps(common->reset_match, LABEL()); + do_reset_match(common, (re->top_bracket + 1) * 2); + CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); + OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); + JUMPTO(SLJIT_JUMP, reset_match_label); + } +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 +if (common->utfreadchar != NULL) + { + set_jumps(common->utfreadchar, LABEL()); + do_utfreadchar(common); + } +if (common->utfreadtype8 != NULL) + { + set_jumps(common->utfreadtype8, LABEL()); + do_utfreadtype8(common); + } +if (common->utfpeakcharback != NULL) + { + set_jumps(common->utfpeakcharback, LABEL()); + do_utfpeakcharback(common); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ +#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 +if (common->utfreadchar_invalid != NULL) + { + set_jumps(common->utfreadchar_invalid, LABEL()); + do_utfreadchar_invalid(common); + } +if (common->utfreadnewline_invalid != NULL) + { + set_jumps(common->utfreadnewline_invalid, LABEL()); + do_utfreadnewline_invalid(common); + } +if (common->utfmoveback_invalid) + { + set_jumps(common->utfmoveback_invalid, LABEL()); + do_utfmoveback_invalid(common); + } +if (common->utfpeakcharback_invalid) + { + set_jumps(common->utfpeakcharback_invalid, LABEL()); + do_utfpeakcharback_invalid(common); + } +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */ +if (common->getucd != NULL) + { + set_jumps(common->getucd, LABEL()); + do_getucd(common); + } +if (common->getucdtype != NULL) + { + set_jumps(common->getucdtype, LABEL()); + do_getucdtype(common); + } +#endif /* SUPPORT_UNICODE */ + +SLJIT_FREE(common->optimized_cbracket, allocator_data); +SLJIT_FREE(common->private_data_ptrs, allocator_data); + +executable_func = sljit_generate_code(compiler); +executable_size = sljit_get_generated_code_size(compiler); +sljit_free_compiler(compiler); + +if (executable_func == NULL) + { + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + +/* Reuse the function descriptor if possible. */ +if (re->executable_jit != NULL) + functions = (executable_functions *)re->executable_jit; +else + { + functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data); + if (functions == NULL) + { + /* This case is highly unlikely since we just recently + freed a lot of memory. Not impossible though. */ + sljit_free_code(executable_func); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); + return PCRE2_ERROR_NOMEMORY; + } + memset(functions, 0, sizeof(executable_functions)); + functions->top_bracket = re->top_bracket + 1; + functions->limit_match = re->limit_match; + re->executable_jit = functions; + } + +/* Turn mode into an index. */ +if (mode == PCRE2_JIT_COMPLETE) + mode = 0; +else + mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2; + +SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES); +functions->executable_funcs[mode] = executable_func; +functions->read_only_data_heads[mode] = common->read_only_data_head; +functions->executable_sizes[mode] = executable_size; +return 0; +} + +#endif + +/************************************************* +* JIT compile a Regular Expression * +*************************************************/ + +/* This function used JIT to convert a previously-compiled pattern into machine +code. + +Arguments: + code a compiled pattern + options JIT option bits + +Returns: 0: success or (*NOJIT) was used + <0: an error code +*/ + +#define PUBLIC_JIT_COMPILE_OPTIONS \ + (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF) + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_jit_compile(pcre2_code *code, uint32_t options) +{ +pcre2_real_code *re = (pcre2_real_code *)code; + +if (code == NULL) + return PCRE2_ERROR_NULL; + +if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0) + return PCRE2_ERROR_JIT_BADOPTION; + +/* Support for invalid UTF was first introduced in JIT, with the option +PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the +compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the +preferred feature, with the earlier option deprecated. However, for backward +compatibility, if the earlier option is set, it forces the new option so that +if JIT matching falls back to the interpreter, there is still support for +invalid UTF. However, if this function has already been successfully called +without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that +non-invalid-supporting JIT code was compiled), give an error. + +If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following +actions are needed: + + 1. Remove the definition from pcre2.h.in and from the list in + PUBLIC_JIT_COMPILE_OPTIONS above. + + 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module. + + 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c. + + 4. Delete the following short block of code. The setting of "re" and + "functions" can be moved into the JIT-only block below, but if that is + done, (void)re and (void)functions will be needed in the non-JIT case, to + avoid compiler warnings. +*/ + +#ifdef SUPPORT_JIT +executable_functions *functions = (executable_functions *)re->executable_jit; +static int executable_allocator_is_working = 0; +#endif + +if ((options & PCRE2_JIT_INVALID_UTF) != 0) + { + if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0) + { +#ifdef SUPPORT_JIT + if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION; +#endif + re->overall_options |= PCRE2_MATCH_INVALID_UTF; + } + } + +/* The above tests are run with and without JIT support. This means that +PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring +interpreter support) even in the absence of JIT. But now, if there is no JIT +support, give an error return. */ + +#ifndef SUPPORT_JIT +return PCRE2_ERROR_JIT_BADOPTION; +#else /* SUPPORT_JIT */ + +/* There is JIT support. Do the necessary. */ + +if ((re->flags & PCRE2_NOJIT) != 0) return 0; + +if (executable_allocator_is_working == 0) + { + /* Checks whether the executable allocator is working. This check + might run multiple times in multi-threaded environments, but the + result should not be affected by it. */ + void *ptr = SLJIT_MALLOC_EXEC(32); + + executable_allocator_is_working = -1; + + if (ptr != NULL) + { + SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr)); + executable_allocator_is_working = 1; + } + } + +if (executable_allocator_is_working < 0) + return PCRE2_ERROR_NOMEMORY; + +if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) + options |= PCRE2_JIT_INVALID_UTF; + +if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL + || functions->executable_funcs[0] == NULL)) { + uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); + if (result != 0) + return result; + } + +if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL + || functions->executable_funcs[1] == NULL)) { + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); + if (result != 0) + return result; + } + +if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL + || functions->executable_funcs[2] == NULL)) { + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); + int result = jit_compile(code, options & ~excluded_options); + if (result != 0) + return result; + } + +return 0; + +#endif /* SUPPORT_JIT */ +} + +/* JIT compiler uses an all-in-one approach. This improves security, + since the code generator functions are not exported. */ + +#define INCLUDED_FROM_PCRE2_JIT_COMPILE + +#include "pcre2_jit_match.c" +#include "pcre2_jit_misc.c" + +/* End of pcre2_jit_compile.c */ diff --git a/pcre2/src/pcre2_jit_match.c b/pcre2/src/pcre2_jit_match.c new file mode 100644 index 000000000..7e13b8cfe --- /dev/null +++ b/pcre2/src/pcre2_jit_match.c @@ -0,0 +1,186 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE +#error This file must be included from pcre2_jit_compile.c. +#endif + +#ifdef SUPPORT_JIT + +static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func) +{ +sljit_u8 local_space[MACHINE_STACK_SIZE]; +struct sljit_stack local_stack; + +local_stack.min_start = local_space; +local_stack.start = local_space; +local_stack.end = local_space + MACHINE_STACK_SIZE; +local_stack.top = local_space + MACHINE_STACK_SIZE; +arguments->stack = &local_stack; +return executable_func(arguments); +} + +#endif + + +/************************************************* +* Do a JIT pattern match * +*************************************************/ + +/* This function runs a JIT pattern match. + +Arguments: + code points to the compiled expression + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + match_data points to a match_data block + mcontext points to a match context + +Returns: > 0 => success; value is the number of ovector pairs filled + = 0 => success, but ovector is not big enough + -1 => failed to match (PCRE_ERROR_NOMATCH) + < -1 => some kind of unexpected problem +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, + PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext) +{ +#ifndef SUPPORT_JIT + +(void)code; +(void)subject; +(void)length; +(void)start_offset; +(void)options; +(void)match_data; +(void)mcontext; +return PCRE2_ERROR_JIT_BADOPTION; + +#else /* SUPPORT_JIT */ + +pcre2_real_code *re = (pcre2_real_code *)code; +executable_functions *functions = (executable_functions *)re->executable_jit; +pcre2_jit_stack *jit_stack; +uint32_t oveccount = match_data->oveccount; +uint32_t max_oveccount; +union { + void *executable_func; + jit_function call_executable_func; +} convert_executable_func; +jit_arguments arguments; +int rc; +int index = 0; + +if ((options & PCRE2_PARTIAL_HARD) != 0) + index = 2; +else if ((options & PCRE2_PARTIAL_SOFT) != 0) + index = 1; + +if (functions == NULL || functions->executable_funcs[index] == NULL) + return PCRE2_ERROR_JIT_BADOPTION; + +/* Sanity checks should be handled by pcre_exec. */ +arguments.str = subject + start_offset; +arguments.begin = subject; +arguments.end = subject + length; +arguments.match_data = match_data; +arguments.startchar_ptr = subject; +arguments.mark_ptr = NULL; +arguments.options = options; + +if (mcontext != NULL) + { + arguments.callout = mcontext->callout; + arguments.callout_data = mcontext->callout_data; + arguments.offset_limit = mcontext->offset_limit; + arguments.limit_match = (mcontext->match_limit < re->limit_match)? + mcontext->match_limit : re->limit_match; + if (mcontext->jit_callback != NULL) + jit_stack = mcontext->jit_callback(mcontext->jit_callback_data); + else + jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data; + } +else + { + arguments.callout = NULL; + arguments.callout_data = NULL; + arguments.offset_limit = PCRE2_UNSET; + arguments.limit_match = (MATCH_LIMIT < re->limit_match)? + MATCH_LIMIT : re->limit_match; + jit_stack = NULL; + } + + +max_oveccount = functions->top_bracket; +if (oveccount > max_oveccount) + oveccount = max_oveccount; +arguments.oveccount = oveccount << 1; + + +convert_executable_func.executable_func = functions->executable_funcs[index]; +if (jit_stack != NULL) + { + arguments.stack = (struct sljit_stack *)(jit_stack->stack); + rc = convert_executable_func.call_executable_func(&arguments); + } +else + rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func); + +if (rc > (int)oveccount) + rc = 0; +match_data->code = re; +match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL; +match_data->rc = rc; +match_data->startchar = arguments.startchar_ptr - subject; +match_data->leftchar = 0; +match_data->rightchar = 0; +match_data->mark = arguments.mark_ptr; +match_data->matchedby = PCRE2_MATCHEDBY_JIT; + +return match_data->rc; + +#endif /* SUPPORT_JIT */ +} + +/* End of pcre2_jit_match.c */ diff --git a/pcre2/src/pcre2_jit_misc.c b/pcre2/src/pcre2_jit_misc.c new file mode 100644 index 000000000..36abdbaf9 --- /dev/null +++ b/pcre2/src/pcre2_jit_misc.c @@ -0,0 +1,232 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE +#error This file must be included from pcre2_jit_compile.c. +#endif + + + +/************************************************* +* Free JIT read-only data * +*************************************************/ + +void +PRIV(jit_free_rodata)(void *current, void *allocator_data) +{ +#ifndef SUPPORT_JIT +(void)current; +(void)allocator_data; +#else /* SUPPORT_JIT */ +void *next; + +SLJIT_UNUSED_ARG(allocator_data); + +while (current != NULL) + { + next = *(void**)current; + SLJIT_FREE(current, allocator_data); + current = next; + } + +#endif /* SUPPORT_JIT */ +} + +/************************************************* +* Free JIT compiled code * +*************************************************/ + +void +PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl) +{ +#ifndef SUPPORT_JIT +(void)executable_jit; +(void)memctl; +#else /* SUPPORT_JIT */ + +executable_functions *functions = (executable_functions *)executable_jit; +void *allocator_data = memctl; +int i; + +for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) + { + if (functions->executable_funcs[i] != NULL) + sljit_free_code(functions->executable_funcs[i]); + PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data); + } + +SLJIT_FREE(functions, allocator_data); + +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Free unused JIT memory * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) +{ +#ifndef SUPPORT_JIT +(void)gcontext; /* Suppress warning */ +#else /* SUPPORT_JIT */ +SLJIT_UNUSED_ARG(gcontext); +sljit_free_unused_memory_exec(); +#endif /* SUPPORT_JIT */ +} + + + +/************************************************* +* Allocate a JIT stack * +*************************************************/ + +PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION +pcre2_jit_stack_create(size_t startsize, size_t maxsize, + pcre2_general_context *gcontext) +{ +#ifndef SUPPORT_JIT + +(void)gcontext; +(void)startsize; +(void)maxsize; +return NULL; + +#else /* SUPPORT_JIT */ + +pcre2_jit_stack *jit_stack; + +if (startsize < 1 || maxsize < 1) + return NULL; +if (startsize > maxsize) + startsize = maxsize; +startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); +maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); + +jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext); +if (jit_stack == NULL) return NULL; +jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl); +if (jit_stack->stack == NULL) + { + jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data); + return NULL; + } +return jit_stack; + +#endif +} + + +/************************************************* +* Assign a JIT stack to a pattern * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback, + void *callback_data) +{ +#ifndef SUPPORT_JIT +(void)mcontext; +(void)callback; +(void)callback_data; +#else /* SUPPORT_JIT */ + +if (mcontext == NULL) return; +mcontext->jit_callback = callback; +mcontext->jit_callback_data = callback_data; + +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Free a JIT stack * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_jit_stack_free(pcre2_jit_stack *jit_stack) +{ +#ifndef SUPPORT_JIT +(void)jit_stack; +#else /* SUPPORT_JIT */ +if (jit_stack != NULL) + { + sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl); + jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data); + } +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Get target CPU type * +*************************************************/ + +const char* +PRIV(jit_get_target)(void) +{ +#ifndef SUPPORT_JIT +return "JIT is not supported"; +#else /* SUPPORT_JIT */ +return sljit_get_platform_name(); +#endif /* SUPPORT_JIT */ +} + + +/************************************************* +* Get size of JIT code * +*************************************************/ + +size_t +PRIV(jit_get_size)(void *executable_jit) +{ +#ifndef SUPPORT_JIT +(void)executable_jit; +return 0; +#else /* SUPPORT_JIT */ +sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes; +SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed); +return executable_sizes[0] + executable_sizes[1] + executable_sizes[2]; +#endif +} + +/* End of pcre2_jit_misc.c */