--- pcre2-10.45.orig/.pc/.quilt_patches +++ pcre2-10.45/.pc/.quilt_patches @@ -0,0 +1 @@ +debian/patches --- pcre2-10.45.orig/.pc/.quilt_series +++ pcre2-10.45/.pc/.quilt_series @@ -0,0 +1 @@ +series --- pcre2-10.45.orig/.pc/.version +++ pcre2-10.45/.pc/.version @@ -0,0 +1 @@ +2 --- pcre2-10.45.orig/.pc/CVE-2025-58050.patch/src/pcre2_match.c +++ pcre2-10.45/.pc/CVE-2025-58050.patch/src/pcre2_match.c @@ -0,0 +1,8080 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2015-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre2_internal.h" + +/* These defines enable debugging code */ + +/* #define DEBUG_FRAMES_DISPLAY */ +/* #define DEBUG_SHOW_OPS */ +/* #define DEBUG_SHOW_RMATCH */ + +#ifdef DEBUG_FRAMES_DISPLAY +#include +#endif + +#ifdef DEBUG_SHOW_OPS +static const char *OP_names[] = { OP_NAME_LIST }; +#endif + +/* These defines identify the name of the block containing "static" +information, and fields within it. */ + +#define NLBLOCK mb /* Block containing newline information */ +#define PSSTART start_subject /* Field containing processed string start */ +#define PSEND end_subject /* Field containing processed string end */ + +#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */ + +/* Masks for identifying the public options that are permitted at match time. */ + +#define PUBLIC_MATCH_OPTIONS \ + (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ + PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ + PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \ + PCRE2_DISABLE_RECURSELOOP_CHECK) + +#define PUBLIC_JIT_MATCH_OPTIONS \ + (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\ + PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\ + PCRE2_COPY_MATCHED_SUBJECT) + +/* Non-error returns from and within the match() function. Error returns are +externally defined PCRE2_ERROR_xxx codes, which are all negative. */ + +#define MATCH_MATCH 1 +#define MATCH_NOMATCH 0 + +/* Special internal returns used in the match() function. Make them +sufficiently negative to avoid the external error codes. */ + +#define MATCH_ACCEPT (-999) +#define MATCH_KETRPOS (-998) +/* The next 5 must be kept together and in sequence so that a test that checks +for any one of them can use a range. */ +#define MATCH_COMMIT (-997) +#define MATCH_PRUNE (-996) +#define MATCH_SKIP (-995) +#define MATCH_SKIP_ARG (-994) +#define MATCH_THEN (-993) +#define MATCH_BACKTRACK_MAX MATCH_THEN +#define MATCH_BACKTRACK_MIN MATCH_COMMIT + +/* Group frame type values. Zero means the frame is not a group frame. The +lower 16 bits are used for data (e.g. the capture number). Group frames are +used for most groups so that information about the start is easily available at +the end without having to scan back through intermediate frames (backtrack +points). */ + +#define GF_CAPTURE 0x00010000u +#define GF_NOCAPTURE 0x00020000u +#define GF_CONDASSERT 0x00030000u +#define GF_RECURSE 0x00040000u + +/* Masks for the identity and data parts of the group frame type. */ + +#define GF_IDMASK(a) ((a) & 0xffff0000u) +#define GF_DATAMASK(a) ((a) & 0x0000ffffu) + +/* Repetition types */ + +enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS }; + +/* Min and max values for the common repeats; a maximum of UINT32_MAX => +infinity. */ + +static const uint32_t rep_min[] = { + 0, 0, /* * and *? */ + 1, 1, /* + and +? */ + 0, 0, /* ? and ?? */ + 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */ + 0, 1, 0 }; /* OP_CRPOS{STAR, PLUS, QUERY} */ + +static const uint32_t rep_max[] = { + UINT32_MAX, UINT32_MAX, /* * and *? */ + UINT32_MAX, UINT32_MAX, /* + and +? */ + 1, 1, /* ? and ?? */ + 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */ + UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */ + +/* Repetition types - must include OP_CRPOSRANGE (not needed above) */ + +static const uint32_t rep_typ[] = { + REPTYPE_MAX, REPTYPE_MIN, /* * and *? */ + REPTYPE_MAX, REPTYPE_MIN, /* + and +? */ + REPTYPE_MAX, REPTYPE_MIN, /* ? and ?? */ + REPTYPE_MAX, REPTYPE_MIN, /* OP_CRRANGE and OP_CRMINRANGE */ + REPTYPE_POS, REPTYPE_POS, /* OP_CRPOSSTAR, OP_CRPOSPLUS */ + REPTYPE_POS, REPTYPE_POS }; /* OP_CRPOSQUERY, OP_CRPOSRANGE */ + +/* Numbers for RMATCH calls at backtracking points. When these lists are +changed, the code at RETURN_SWITCH below must be updated in sync. */ + +enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, + RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, + RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, + RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39 }; + +#ifdef SUPPORT_WIDE_CHARS +enum { RM100=100, RM101, RM102, RM103 }; +#endif + +#ifdef SUPPORT_UNICODE +enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, + RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, + RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, + RM224 }; +#endif + +/* Define short names for general fields in the current backtrack frame, which +is always pointed to by the F variable. Occasional references to fields in +other frames are written out explicitly. There are also some fields in the +current frame whose names start with "temp" that are used for short-term, +localised backtracking memory. These are #defined with Lxxx names at the point +of use and undefined afterwards. */ + +#define Fback_frame F->back_frame +#define Fcapture_last F->capture_last +#define Fcurrent_recurse F->current_recurse +#define Fecode F->ecode +#define Feptr F->eptr +#define Fgroup_frame_type F->group_frame_type +#define Flast_group_offset F->last_group_offset +#define Flength F->length +#define Fmark F->mark +#define Frdepth F->rdepth +#define Fstart_match F->start_match +#define Foffset_top F->offset_top +#define Foccu F->occu +#define Fop F->op +#define Fovector F->ovector +#define Freturn_id F->return_id + + +#ifdef DEBUG_FRAMES_DISPLAY +/************************************************* +* Display current frames and contents * +*************************************************/ + +/* This debugging function displays the current set of frames and their +contents. It is not called automatically from anywhere, the intention being +that calls can be inserted where necessary when debugging frame-related +problems. + +Arguments: + f the file to write to + F the current top frame + P a previous frame of interest + frame_size the frame size + mb points to the match block + match_data points to the match data block + s identification text + +Returns: nothing +*/ + +static void +display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size, + match_block *mb, pcre2_match_data *match_data, const char *s, ...) +{ +uint32_t i; +heapframe *Q; +va_list ap; +va_start(ap, s); + +fprintf(f, "FRAMES "); +vfprintf(f, s, ap); +va_end(ap); + +if (P != NULL) fprintf(f, " P=%lu", + ((char *)P - (char *)(match_data->heapframes))/frame_size); +fprintf(f, "\n"); + +for (i = 0, Q = match_data->heapframes; + Q <= F; + i++, Q = (heapframe *)((char *)Q + frame_size)) + { + fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d", + i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode), + Q->back_frame, Q->return_id); + + if (Q->last_group_offset == PCRE2_UNSET) + fprintf(f, " lgoffset=unset\n"); + else + fprintf(f, " lgoffset=%lu\n", Q->last_group_offset/frame_size); + } +} + +#endif + + + +/************************************************* +* Process a callout * +*************************************************/ + +/* This function is called for all callouts, whether "standalone" or at the +start of a conditional group. Feptr will be pointing to either OP_CALLOUT or +OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized +with fixed values. + +Arguments: + F points to the current backtracking frame + mb points to the match block + lengthptr where to return the length of the callout item + +Returns: the return from the callout + or 0 if no callout function exists +*/ + +static int +do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr) +{ +int rc; +PCRE2_SIZE save0, save1; +PCRE2_SIZE *callout_ovector; +pcre2_callout_block *cb; + +*lengthptr = (*Fecode == OP_CALLOUT)? + PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE); + +if (mb->callout == NULL) return 0; /* No callout function provided */ + +/* The original matching code (pre 10.30) worked directly with the ovector +passed by the user, and this was passed to callouts. Now that the working +ovector is in the backtracking frame, it no longer needs to reserve space for +the overall match offsets (which would waste space in the frame). For backward +compatibility, however, we pass capture_top and offset_vector to the callout as +if for the extended ovector, and we ensure that the first two slots are unset +by preserving and restoring their current contents. Picky compilers complain if +references such as Fovector[-2] are use directly, so we set up a separate +pointer. */ + +callout_ovector = (PCRE2_SIZE *)(Fovector) - 2; + +/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields +are set externally. The first 3 never change; the last is updated for each +bumpalong. */ + +cb = mb->cb; +cb->capture_top = (uint32_t)Foffset_top/2 + 1; +cb->capture_last = Fcapture_last; +cb->offset_vector = callout_ovector; +cb->mark = mb->nomatch_mark; +cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject); +cb->pattern_position = GET(Fecode, 1); +cb->next_item_length = GET(Fecode, 1 + LINK_SIZE); + +if (*Fecode == OP_CALLOUT) /* Numerical callout */ + { + cb->callout_number = Fecode[1 + 2*LINK_SIZE]; + cb->callout_string_offset = 0; + cb->callout_string = NULL; + cb->callout_string_length = 0; + } +else /* String callout */ + { + cb->callout_number = 0; + cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE); + cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1; + cb->callout_string_length = + *lengthptr - (1 + 4*LINK_SIZE) - 2; + } + +save0 = callout_ovector[0]; +save1 = callout_ovector[1]; +callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET; +rc = mb->callout(cb, mb->callout_data); +callout_ovector[0] = save0; +callout_ovector[1] = save1; +cb->callout_flags = 0; +return rc; +} + + + +/************************************************* +* Match a back-reference * +*************************************************/ + +/* This function is called only when it is known that the offset lies within +the offsets that have so far been used in the match. Note that in caseless +UTF-8 mode, the number of subject bytes matched may be different to the number +of reference bytes. (In theory this could also happen in UTF-16 mode, but it +seems unlikely.) + +Arguments: + offset index into the offset vector + caseless TRUE if caseless + caseopts bitmask of REFI_FLAG_XYZ values + F the current backtracking frame pointer + mb points to match block + lengthptr pointer for returning the length matched + +Returns: = 0 sucessful match; number of code units matched is set + < 0 no match + > 0 partial match +*/ + +static int +match_ref(PCRE2_SIZE offset, BOOL caseless, int caseopts, heapframe *F, + match_block *mb, PCRE2_SIZE *lengthptr) +{ +PCRE2_SPTR p; +PCRE2_SIZE length; +PCRE2_SPTR eptr; +PCRE2_SPTR eptr_start; + +/* Deal with an unset group. The default is no match, but there is an option to +match an empty string. */ + +if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET) + { + if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0) + { + *lengthptr = 0; + return 0; /* Match */ + } + else return -1; /* No match */ + } + +/* Separate the caseless and UTF cases for speed. */ + +eptr = eptr_start = Feptr; +p = mb->start_subject + Fovector[offset]; +length = Fovector[offset+1] - Fovector[offset]; + +if (caseless) + { +#if defined SUPPORT_UNICODE + BOOL utf = (mb->poptions & PCRE2_UTF) != 0; + BOOL caseless_restrict = (caseopts & REFI_FLAG_CASELESS_RESTRICT) != 0; + BOOL turkish_casing = !caseless_restrict && (caseopts & REFI_FLAG_TURKISH_CASING) != 0; + + if (utf || (mb->poptions & PCRE2_UCP) != 0) + { + PCRE2_SPTR endptr = p + length; + + /* Match characters up to the end of the reference. NOTE: the number of + code units matched may differ, because in UTF-8 there are some characters + whose upper and lower case codes have different numbers of bytes. For + example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3 + bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a + sequence of two of the latter. It is important, therefore, to check the + length along the reference, not along the subject (earlier code did this + wrong). UCP without uses Unicode properties but without UTF encoding. */ + + while (p < endptr) + { + uint32_t c, d; + const ucd_record *ur; + if (eptr >= mb->end_subject) return 1; /* Partial match */ + + if (utf) + { + GETCHARINC(c, eptr); + GETCHARINC(d, p); + } + else + { + c = *eptr++; + d = *p++; + } + + if (turkish_casing && UCD_ANY_I(d)) + { + c = UCD_FOLD_I_TURKISH(c); + d = UCD_FOLD_I_TURKISH(d); + if (c != d) return -1; /* No match */ + } + else if (c != d && c != (uint32_t)((int)d + (ur = GET_UCD(d))->other_case)) + { + const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset; + + /* When PCRE2_EXTRA_CASELESS_RESTRICT is set, ignore any caseless sets + that start with an ASCII character. */ + if (caseless_restrict && *pp < 128) return -1; /* No match */ + + for (;;) + { + if (c < *pp) return -1; /* No match */ + if (c == *pp++) break; + } + } + } + } + else +#endif + + /* Not in UTF or UCP mode */ + { + for (; length > 0; length--) + { + uint32_t cc, cp; + if (eptr >= mb->end_subject) return 1; /* Partial match */ + cc = UCHAR21TEST(eptr); + cp = UCHAR21TEST(p); + if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc)) + return -1; /* No match */ + p++; + eptr++; + } + } + } + +/* In the caseful case, we can just compare the code units, whether or not we +are in UTF and/or UCP mode. When partial matching, we have to do this unit by +unit. */ + +else + { + if (mb->partial != 0) + { + for (; length > 0; length--) + { + if (eptr >= mb->end_subject) return 1; /* Partial match */ + if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /* No match */ + } + } + + /* Not partial matching */ + + else + { + if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */ + if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */ + eptr += length; + } + } + +*lengthptr = eptr - eptr_start; +return 0; /* Match */ +} + + + +/****************************************************************************** +******************************************************************************* + "Recursion" in the match() function + +The original match() function was highly recursive, but this proved to be the +source of a number of problems over the years, mostly because of the relatively +small system stacks that are commonly found. As new features were added to +patterns, various kludges were invented to reduce the amount of stack used, +making the code hard to understand in places. + +A version did exist that used individual frames on the heap instead of calling +match() recursively, but this ran substantially slower. The current version is +a refactoring that uses a vector of frames to remember backtracking points. +This runs no slower, and possibly even a bit faster than the original recursive +implementation. + +At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50 +frames) was allocated on the system stack. If this was not big enough, the heap +was used for a larger vector. However, it turns out that there are environments +where taking as little as 20KiB from the system stack is an embarrassment. +After another refactoring, the heap is used exclusively, but a pointer the +frames vector and its size are cached in the match_data block, so that there is +no new memory allocation if the same match_data block is used for multiple +matches (unless the frames vector has to be extended). +******************************************************************************* +******************************************************************************/ + + + + +/************************************************* +* Macros for the match() function * +*************************************************/ + +/* These macros pack up tests that are used for partial matching several times +in the code. The second one is used when we already know we are past the end of +the subject. We set the "hit end" flag if the pointer is at the end of the +subject and either (a) the pointer is past the earliest inspected character +(i.e. something has been matched, even if not part of the actual matched +string), or (b) the pattern contains a lookbehind. These are the conditions for +which adding more characters may allow the current match to continue. + +For hard partial matching, we immediately return a partial match. Otherwise, +carrying on means that a complete match on the current subject will be sought. +A partial match is returned only if no complete match can be found. */ + +#define CHECK_PARTIAL() \ + do { \ + if (Feptr >= mb->end_subject) \ + { \ + SCHECK_PARTIAL(); \ + } \ + } \ + while (0) + +#define SCHECK_PARTIAL() \ + do { \ + if (mb->partial != 0 && \ + (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ + { \ + mb->hitend = TRUE; \ + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ + } \ + } \ + while (0) + + +/* These macros are used to implement backtracking. They simulate a recursive +call to the match() function by means of a local vector of frames which +remember the backtracking points. */ + +#define RMATCH(ra,rb) \ + do { \ + start_ecode = ra; \ + Freturn_id = rb; \ + goto MATCH_RECURSE; \ + L_##rb:; \ + } \ + while (0) + +#define RRETURN(ra) \ + do { \ + rrc = ra; \ + goto RETURN_SWITCH; \ + } \ + while (0) + + + +/************************************************* +* Match from current position * +*************************************************/ + +/* This function is called to run one match attempt at a single starting point +in the subject. + +Performance note: It might be tempting to extract commonly used fields from the +mb structure (e.g. end_subject) into individual variables to improve +performance. Tests using gcc on a SPARC disproved this; in the first case, it +made performance worse. + +Arguments: + start_eptr starting character in subject + start_ecode starting position in compiled code + top_bracket number of capturing parentheses in the pattern + frame_size size of each backtracking frame + match_data pointer to the match_data block + mb pointer to "static" variables block + +Returns: MATCH_MATCH if matched ) these values are >= 0 + MATCH_NOMATCH if failed to match ) + negative MATCH_xxx value for PRUNE, SKIP, etc + negative PCRE2_ERROR_xxx value if aborted by an error condition + (e.g. stopped by repeated call or depth limit) +*/ + +static int +match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, + PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb) +{ +/* Frame-handling variables */ + +heapframe *F; /* Current frame pointer */ +heapframe *N = NULL; /* Temporary frame pointers */ +heapframe *P = NULL; + +heapframe *frames_top; /* End of frames vector */ +heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */ +PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */ + +/* Local variables that do not need to be preserved over calls to RRMATCH(). */ + +PCRE2_SPTR branch_end = NULL; +PCRE2_SPTR branch_start; +PCRE2_SPTR bracode; /* Temp pointer to start of group */ +PCRE2_SIZE offset; /* Used for group offsets */ +PCRE2_SIZE length; /* Used for various length calculations */ + +int rrc; /* Return from functions & backtracking "recursions" */ +#ifdef SUPPORT_UNICODE +int proptype; /* Type of character property */ +#endif + +uint32_t i; /* Used for local loops */ +uint32_t fc; /* Character values */ +uint32_t number; /* Used for group and other numbers */ +uint32_t reptype = 0; /* Type of repetition (0 to avoid compiler warning) */ +uint32_t group_frame_type; /* Specifies type for new group frames */ + +BOOL condition; /* Used in conditional groups */ +BOOL cur_is_word; /* Used in "word" tests */ +BOOL prev_is_word; /* Used in "word" tests */ + +/* UTF and UCP flags */ + +#ifdef SUPPORT_UNICODE +BOOL utf = (mb->poptions & PCRE2_UTF) != 0; +BOOL ucp = (mb->poptions & PCRE2_UCP) != 0; +#else +BOOL utf = FALSE; /* Required for convenience even when no Unicode support */ +#endif + +/* This is the length of the last part of a backtracking frame that must be +copied when a new frame is created. */ + +frame_copy_size = frame_size - offsetof(heapframe, eptr); + +/* Set up the first frame and the end of the frames vector. */ + +F = match_data->heapframes; +frames_top = (heapframe *)((char *)F + match_data->heapframes_size); + +Frdepth = 0; /* "Recursion" depth */ +Fcapture_last = 0; /* Number of most recent capture */ +Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */ +Fstart_match = Feptr = start_eptr; /* Current data pointer and start match */ +Fmark = NULL; /* Most recent mark */ +Foffset_top = 0; /* End of captures within the frame */ +Flast_group_offset = PCRE2_UNSET; /* Saved frame of most recent group */ +group_frame_type = 0; /* Not a start of group frame */ +goto NEW_FRAME; /* Start processing with this frame */ + +/* Come back here when we want to create a new frame for remembering a +backtracking point. */ + +MATCH_RECURSE: + +/* Set up a new backtracking frame. If the vector is full, get a new one, +doubling the size, but constrained by the heap limit (which is in KiB). */ + +N = (heapframe *)((char *)F + frame_size); +if ((heapframe *)((char *)N + frame_size) >= frames_top) + { + heapframe *new; + PCRE2_SIZE newsize; + PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes); + + if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2) + { + if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1) + return PCRE2_ERROR_NOMEMORY; + newsize = PCRE2_SIZE_MAX - 1; + } + else + newsize = match_data->heapframes_size * 2; + + if (newsize / 1024 >= mb->heap_limit) + { + PCRE2_SIZE old_size = match_data->heapframes_size / 1024; + if (mb->heap_limit <= old_size) + return PCRE2_ERROR_HEAPLIMIT; + else + { + PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size); + int over_bytes = match_data->heapframes_size % 1024; + if (over_bytes) max_delta -= (1024 - over_bytes); + newsize = match_data->heapframes_size + max_delta; + } + } + + /* With a heap limit set, the permitted additional size may not be enough for + another frame, so do a final check. */ + + if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT; + new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data); + if (new == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy(new, match_data->heapframes, usedsize); + + N = (heapframe *)((char *)new + usedsize); + F = (heapframe *)((char *)N - frame_size); + + match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data); + match_data->heapframes = new; + match_data->heapframes_size = newsize; + frames_top = (heapframe *)((char *)new + newsize); + } + +#ifdef DEBUG_SHOW_RMATCH +fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1); +if (group_frame_type != 0) + { + fprintf(stderr, " type=%x ", group_frame_type); + switch (GF_IDMASK(group_frame_type)) + { + case GF_CAPTURE: + fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type)); + break; + + case GF_NOCAPTURE: + fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type)); + break; + + case GF_CONDASSERT: + fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type)); + break; + + case GF_RECURSE: + fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type)); + break; + + default: + fprintf(stderr, "*** unknown ***"); + break; + } + } +fprintf(stderr, "\n"); +#endif + +/* Copy those fields that must be copied into the new frame, increase the +"recursion" depth (i.e. the new frame's index) and then make the new frame +current. */ + +memcpy((char *)N + offsetof(heapframe, eptr), + (char *)F + offsetof(heapframe, eptr), + frame_copy_size); + +N->rdepth = Frdepth + 1; +F = N; + +/* Carry on processing with a new frame. */ + +NEW_FRAME: +Fgroup_frame_type = group_frame_type; +Fecode = start_ecode; /* Starting code pointer */ +Fback_frame = frame_size; /* Default is go back one frame */ + +/* If this is a special type of group frame, remember its offset for quick +access at the end of the group. If this is a recursion, set a new current +recursion value. */ + +if (group_frame_type != 0) + { + Flast_group_offset = (char *)F - (char *)match_data->heapframes; + if (GF_IDMASK(group_frame_type) == GF_RECURSE) + Fcurrent_recurse = GF_DATAMASK(group_frame_type); + group_frame_type = 0; + } + + +/* ========================================================================= */ +/* This is the main processing loop. First check that we haven't recorded too +many backtracks (search tree is too large), or that we haven't exceeded the +recursive depth limit (used too many backtracking frames). If not, process the +opcodes. */ + +if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT; +if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT; + +#ifdef DEBUG_SHOW_OPS +fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n", + GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject); +#endif + +for (;;) + { +#ifdef DEBUG_SHOW_OPS +fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + OP_names[*Fecode]); +#endif + + Fop = (uint8_t)(*Fecode); /* Cast needed for 16-bit and 32-bit modes */ + switch(Fop) + { + /* ===================================================================== */ + /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close + any currently open capturing brackets. Unlike reaching the end of a group, + where we know the starting frame is at the top of the chained frames, in + this case we have to search back for the relevant frame in case other types + of group that use chained frames have intervened. Multiple OP_CLOSEs always + come innermost first, which matches the chain order. We can ignore this in + a recursion, because captures are not passed out of recursions. */ + + case OP_CLOSE: + if (Fcurrent_recurse == RECURSE_UNSET) + { + number = GET2(Fecode, 1); + offset = Flast_group_offset; + for(;;) + { + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); + if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + if (N->group_frame_type == (GF_CAPTURE | number)) break; + offset = P->last_group_offset; + } + offset = (number << 1) - 2; + Fcapture_last = number; + Fovector[offset] = P->eptr - mb->start_subject; + Fovector[offset+1] = Feptr - mb->start_subject; + if (offset >= Foffset_top) Foffset_top = offset + 2; + } + Fecode += PRIV(OP_lengths)[*Fecode]; + break; + + + /* ===================================================================== */ + /* Real or forced end of the pattern, assertion, or recursion. In an + assertion ACCEPT, update the last used pointer and remember the current + frame so that the captures and mark can be fished out of it. */ + + case OP_ASSERT_ACCEPT: + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + assert_accept_frame = F; + RRETURN(MATCH_ACCEPT); + + /* For ACCEPT within a recursion, we have to find the most recent + recursion. If not in a recursion, fall through to code that is common with + OP_END. */ + + case OP_ACCEPT: + if (Fcurrent_recurse != RECURSE_UNSET) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Accept within recursion\n"); +#endif + offset = Flast_group_offset; + for(;;) + { + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); + if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break; + offset = P->last_group_offset; + } + + /* N is now the frame of the recursion; the previous frame is at the + OP_RECURSE position. Go back there, copying the current subject position + and mark, and the start_match position (\K might have changed it), and + then move on past the OP_RECURSE. */ + + P->eptr = Feptr; + P->mark = Fmark; + P->start_match = Fstart_match; + F = P; + Fecode += 1 + LINK_SIZE; + continue; + } + /* Fall through */ + + /* OP_END itself can never be reached within a recursion because that is + picked up when the OP_KET that always precedes OP_END is reached. */ + + case OP_END: + + /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if + PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the + subject. In both cases, backtracking will then try other alternatives, if + any. */ + + if (Feptr == Fstart_match && + ((mb->moptions & PCRE2_NOTEMPTY) != 0 || + ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 && + Fstart_match == mb->start_subject + mb->start_offset))) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Backtrack because empty string\n"); +#endif + RRETURN(MATCH_NOMATCH); + } + + /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not + the end of the subject. After (*ACCEPT) we fail the entire match (at this + position) but backtrack if we've reached the end of the pattern. This + applies whether or not we are in a recursion. */ + + if (Feptr < mb->end_subject && + ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0) + { + if (Fop == OP_END) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n"); +#endif + RRETURN(MATCH_NOMATCH); + } + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n"); +#endif + return MATCH_NOMATCH; /* (*ACCEPT) */ + } + + /* We have a successful match of the whole pattern. Record the result and + then do a direct return from the function. If there is space in the offset + vector, set any pairs that follow the highest-numbered captured string but + are less than the number of capturing groups in the pattern to PCRE2_UNSET. + It is documented that this happens. "Gaps" are set to PCRE2_UNSET + dynamically. It is only those at the end that need setting here. */ + + mb->end_match_ptr = Feptr; /* Record where we ended */ + mb->end_offset_top = Foffset_top; /* and how many extracts were taken */ + mb->mark = Fmark; /* and the last success mark */ + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + + match_data->ovector[0] = Fstart_match - mb->start_subject; + match_data->ovector[1] = Feptr - mb->start_subject; + + /* Set i to the smaller of the sizes of the external and frame ovectors. */ + + i = 2 * ((top_bracket + 1 > match_data->oveccount)? + match_data->oveccount : top_bracket + 1); + memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE)); + while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET; + return MATCH_MATCH; /* Note: NOT RRETURN */ + + + /*===================================================================== */ + /* Match any single character type except newline; have to take care with + CRLF newlines and partial matching. */ + + case OP_ANY: + if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0 && + Feptr == mb->end_subject - 1 && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21TEST(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + /* Fall through */ + + /* Match any single character whatsoever. */ + + case OP_ALLANY: + if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */ + { /* not be updated before SCHECK_PARTIAL. */ + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr++; +#ifdef SUPPORT_UNICODE + if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); +#endif + Fecode++; + break; + + + /* ===================================================================== */ + /* Match a single code unit, even in UTF mode. This opcode really does + match any code unit, even newline. (It really should be called ANYCODEUNIT, + of course - the byte name is from pre-16 bit days.) */ + + case OP_ANYBYTE: + if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */ + { /* not be updated before SCHECK_PARTIAL. */ + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr++; + Fecode++; + break; + + + /* ===================================================================== */ + /* Match a single character, casefully */ + + case OP_CHAR: +#ifdef SUPPORT_UNICODE + if (utf) + { + Flength = 1; + Fecode++; + GETCHARLEN(fc, Fecode, Flength); + if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr)) + { + CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } + for (; Flength > 0; Flength--) + { + if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + + /* Not UTF mode */ + { + if (mb->end_subject - Feptr < 1) + { + SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ + RRETURN(MATCH_NOMATCH); + } + if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH); + Fecode += 2; + } + break; + + + /* ===================================================================== */ + /* Match a single character, caselessly. If we are at the end of the + subject, give up immediately. We get here only when the pattern character + has at most one other case. Characters with more than two cases are coded + as OP_PROP with the pseudo-property PT_CLIST. */ + + case OP_CHARI: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + +#ifdef SUPPORT_UNICODE + if (utf) + { + Flength = 1; + Fecode++; + GETCHARLEN(fc, Fecode, Flength); + + /* If the pattern character's value is < 128, we know that its other case + (if any) is also < 128 (and therefore only one code unit long in all + code-unit widths), so we can use the fast lookup table. We checked above + that there is at least one character left in the subject. */ + + if (fc < 128) + { + uint32_t cc = UCHAR21(Feptr); + if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH); + Fecode++; + Feptr++; + } + + /* Otherwise we must pick up the subject character and use Unicode + property support to test its other case. Note that we cannot use the + value of "Flength" to check for sufficient bytes left, because the other + case of the character may have more or fewer code units. */ + + else + { + uint32_t dc; + GETCHARINC(dc, Feptr); + Fecode += Flength; + if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH); + } + } + + /* If UCP is set without UTF we must do the same as above, but with one + character per code unit. */ + + else if (ucp) + { + uint32_t cc = UCHAR21(Feptr); + fc = Fecode[1]; + if (fc < 128) + { + if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH); + } + else + { + if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH); + } + Feptr++; + Fecode += 2; + } + + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF or UCP mode; use the table for characters < 256. */ + { + if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1]) + != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH); + Feptr++; + Fecode += 2; + } + break; + + + /* ===================================================================== */ + /* Match not a single character. */ + + case OP_NOT: + case OP_NOTI: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t ch; + Fecode++; + GETCHARINC(ch, Fecode); + GETCHARINC(fc, Feptr); + if (ch == fc) + { + RRETURN(MATCH_NOMATCH); /* Caseful match */ + } + else if (Fop == OP_NOTI) /* If caseless */ + { + if (ch > 127) + ch = UCD_OTHERCASE(ch); + else + ch = (mb->fcc)[ch]; + if (ch == fc) RRETURN(MATCH_NOMATCH); + } + } + + /* UCP without UTF is as above, but with one character per code unit. */ + + else if (ucp) + { + uint32_t ch; + fc = UCHAR21INC(Feptr); + ch = Fecode[1]; + Fecode += 2; + + if (ch == fc) + { + RRETURN(MATCH_NOMATCH); /* Caseful match */ + } + else if (Fop == OP_NOTI) /* If caseless */ + { + if (ch > 127) + ch = UCD_OTHERCASE(ch); + else + ch = (mb->fcc)[ch]; + if (ch == fc) RRETURN(MATCH_NOMATCH); + } + } + + else +#endif /* SUPPORT_UNICODE */ + + /* Neither UTF nor UCP is set */ + + { + uint32_t ch = Fecode[1]; + fc = UCHAR21INC(Feptr); + if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc)) + RRETURN(MATCH_NOMATCH); + Fecode += 2; + } + break; + + + /* ===================================================================== */ + /* Match a single character repeatedly. */ + +#define Loclength F->temp_size +#define Lstart_eptr F->temp_sptr[0] +#define Lcharptr F->temp_sptr[1] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lc F->temp_32[2] +#define Loc F->temp_32[3] + + case OP_EXACT: + case OP_EXACTI: + Lmin = Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_POSUPTO: + case OP_POSUPTOI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_UPTO: + case OP_UPTOI: + reptype = REPTYPE_MAX; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_MINUPTO: + case OP_MINUPTOI: + reptype = REPTYPE_MIN; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATCHAR; + + case OP_POSSTAR: + case OP_POSSTARI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATCHAR; + + case OP_POSPLUS: + case OP_POSPLUSI: + reptype = REPTYPE_POS; + Lmin = 1; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATCHAR; + + case OP_POSQUERY: + case OP_POSQUERYI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = 1; + Fecode++; + goto REPEATCHAR; + + case OP_STAR: + case OP_STARI: + case OP_MINSTAR: + case OP_MINSTARI: + case OP_PLUS: + case OP_PLUSI: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_QUERY: + case OP_QUERYI: + case OP_MINQUERY: + case OP_MINQUERYI: + fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI); + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + + /* Common code for all repeated single-character matches. We first check + for the minimum number of characters. If the minimum equals the maximum, we + are done. Otherwise, if minimizing, check the rest of the pattern for a + match; if there isn't one, advance up to the maximum, one character at a + time. + + If maximizing, advance up to the maximum number of matching characters, + until Feptr is past the end of the maximum run. If possessive, we are + then done (no backing up). Otherwise, match at this position; anything + other than no match is immediately returned. For nomatch, back up one + character, unless we are matching \R and the last thing matched was + \r\n, in which case, back up two code units until we reach the first + optional character position. + + The various UTF/non-UTF and caseful/caseless cases are handled separately, + for speed. */ + + REPEATCHAR: +#ifdef SUPPORT_UNICODE + if (utf) + { + Flength = 1; + Lcharptr = Fecode; + GETCHARLEN(fc, Fecode, Flength); + Fecode += Flength; + + /* Handle multi-code-unit character matching, caseful and caseless. */ + + if (Flength > 1) + { + uint32_t othercase; + + if (Fop >= OP_STARI && /* Caseless */ + (othercase = UCD_OTHERCASE(fc)) != fc) + Loclength = PRIV(ord2utf)(othercase, Foccu); + else Loclength = 0; + + for (i = 1; i <= Lmin; i++) + { + if (Feptr <= mb->end_subject - Flength && + memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength; + else if (Loclength > 0 && + Feptr <= mb->end_subject - Loclength && + memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0) + Feptr += Loclength; + else + { + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + } + + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM202); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr <= mb->end_subject - Flength && + memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength; + else if (Loclength > 0 && + Feptr <= mb->end_subject - Loclength && + memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0) + Feptr += Loclength; + else + { + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + else /* Maximize */ + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + if (Feptr <= mb->end_subject - Flength && + memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) + Feptr += Flength; + else if (Loclength > 0 && + Feptr <= mb->end_subject - Loclength && + memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0) + Feptr += Loclength; + else + { + CHECK_PARTIAL(); + break; + } + } + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + if (reptype != REPTYPE_POS) for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM203); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + } + } + break; /* End of repeated wide character handling */ + } + + /* Length of UTF character is 1. Put it into the preserved variable and + fall through to the non-UTF code. */ + + Lc = fc; + } + else +#endif /* SUPPORT_UNICODE */ + + /* When not in UTF mode, load a single-code-unit character. Then proceed as + above, using Unicode casing if either UTF or UCP is set. */ + + Lc = *Fecode++; + + /* Caseless comparison */ + + if (Fop >= OP_STARI) + { +#if PCRE2_CODE_UNIT_WIDTH == 8 +#ifdef SUPPORT_UNICODE + if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc); + else +#endif /* SUPPORT_UNICODE */ + /* Lc will be < 128 in UTF-8 mode. */ + Loc = mb->fcc[Lc]; +#else /* 16-bit & 32-bit */ +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc); + else +#endif /* SUPPORT_UNICODE */ + Loc = TABLE_GET(Lc, mb->fcc, Lc); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; /* Faster than PCRE2_UCHAR */ + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21TEST(Feptr); + if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH); + Feptr++; + } + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + uint32_t cc; /* Faster than PCRE2_UCHAR */ + RMATCH(Fecode, RM25); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21TEST(Feptr); + if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH); + Feptr++; + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + else /* Maximize */ + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + uint32_t cc; /* Faster than PCRE2_UCHAR */ + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + cc = UCHAR21TEST(Feptr); + if (Lc != cc && Loc != cc) break; + Feptr++; + } + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM26); + Feptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + } + + /* Caseful comparisons (includes all multi-byte characters) */ + + else + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH); + } + + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM27); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + else /* Maximize */ + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + + if (Lc != UCHAR21TEST(Feptr)) break; + Feptr++; + } + + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM28); + Feptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + } + break; + +#undef Loclength +#undef Lstart_eptr +#undef Lcharptr +#undef Lmin +#undef Lmax +#undef Lc +#undef Loc + + + /* ===================================================================== */ + /* Match a negated single one-byte character repeatedly. This is almost a + repeat of the code for a repeated single character, but I haven't found a + nice way of commoning these up that doesn't require a test of the + positive/negative option for each character match. Maybe that wouldn't add + very much to the time taken, but character matching *is* what this is all + about... */ + +#define Lstart_eptr F->temp_sptr[0] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lc F->temp_32[2] +#define Loc F->temp_32[3] + + case OP_NOTEXACT: + case OP_NOTEXACTI: + Lmin = Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTUPTO: + case OP_NOTUPTOI: + Lmin = 0; + Lmax = GET2(Fecode, 1); + reptype = REPTYPE_MAX; + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + Lmin = 0; + Lmax = GET2(Fecode, 1); + reptype = REPTYPE_MIN; + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + reptype = REPTYPE_POS; + Lmin = 1; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = 1; + Fecode++; + goto REPEATNOTCHAR; + + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATNOTCHAR; + + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR); + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + + /* Common code for all repeated single-character non-matches. */ + + REPEATNOTCHAR: + GETCHARINCTEST(Lc, Fecode); + + /* The code is duplicated for the caseless and caseful cases, for speed, + since matching characters is likely to be quite common. First, ensure the + minimum number of matches are present. If Lmin = Lmax, we are done. + Otherwise, if minimizing, keep trying the rest of the expression and + advancing one matching character if failing, up to the maximum. + Alternatively, if maximizing, find the maximum number of characters and + work backwards. */ + + if (Fop >= OP_NOTSTARI) /* Caseless */ + { +#ifdef SUPPORT_UNICODE + if ((utf || ucp) && Lc > 127) + Loc = UCD_OTHERCASE(Lc); + else +#endif /* SUPPORT_UNICODE */ + + Loc = TABLE_GET(Lc, mb->fcc, Lc); /* Other case from table */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH); + Feptr++; + } + } + + if (Lmin == Lmax) continue; /* Finished for exact count */ + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (;;) + { + RMATCH(Fecode, RM204); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif /*SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM29); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH); + Feptr++; + } + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* Maximize case */ + + else + { + Lstart_eptr = Feptr; + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(d, Feptr, len); + if (Lc == d || Loc == d) break; + Feptr += len; + } + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + if (reptype != REPTYPE_POS) for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM205); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (Lc == *Feptr || Loc == *Feptr) break; + Feptr++; + } + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM30); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + } + } + } + } + + /* Caseful comparisons */ + + else + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH); + } + } + + if (Lmin == Lmax) continue; + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (;;) + { + RMATCH(Fecode, RM206); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(d, Feptr); + if (Lc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM31); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH); + } + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* Maximize case */ + + else + { + Lstart_eptr = Feptr; + +#ifdef SUPPORT_UNICODE + if (utf) + { + uint32_t d; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(d, Feptr, len); + if (Lc == d) break; + Feptr += len; + } + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + if (reptype != REPTYPE_POS) for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM207); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (Lc == *Feptr) break; + Feptr++; + } + if (reptype != REPTYPE_POS) for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM32); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + } + } + } + } + break; + +#undef Lstart_eptr +#undef Lmin +#undef Lmax +#undef Lc +#undef Loc + + + /* ===================================================================== */ + /* Match a bit-mapped character class, possibly repeatedly. These opcodes + are used when all the characters in the class have values in the range + 0-255, and either the matching is caseful, or the characters are in the + range 0-127 when UTF processing is enabled. The only difference between + OP_CLASS and OP_NCLASS occurs when a data character outside the range is + encountered. */ + +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lstart_eptr F->temp_sptr[0] +#define Lbyte_map_address F->temp_sptr[1] +#define Lbyte_map ((const unsigned char *)Lbyte_map_address) + + case OP_NCLASS: + case OP_CLASS: + { + Lbyte_map_address = Fecode + 1; /* Save for matching */ + Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */ + + /* Look past the end of the item to see if there is repeat information + following. Then obey similar code to character type repeats. */ + + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + reptype = rep_typ[*Fecode - OP_CRSTAR]; + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + Lmin = Lmax = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + fc = *Feptr++; +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else +#endif + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + + /* If Lmax == Lmin we are done. Continue with main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + for (;;) + { + RMATCH(Fecode, RM200); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM23); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + fc = *Feptr++; +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (fc > 255) + { + if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else +#endif + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + Lstart_eptr = Feptr; + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc > 255) + { + if (Fop == OP_CLASS) break; + } + else + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break; + Feptr += len; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for (;;) + { + RMATCH(Fecode, RM201); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ + BACKCHAR(Feptr); + } + } + else +#endif + /* Not UTF mode */ + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + fc = *Feptr; +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (fc > 255) + { + if (Fop == OP_CLASS) break; + } + else +#endif + if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break; + Feptr++; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + while (Feptr >= Lstart_eptr) + { + RMATCH(Fecode, RM24); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + } + } + + RRETURN(MATCH_NOMATCH); + } + } + + PCRE2_UNREACHABLE(); /* Control never reaches here */ + +#undef Lbyte_map_address +#undef Lbyte_map +#undef Lstart_eptr +#undef Lmin +#undef Lmax + + + /* ===================================================================== */ + /* Match an extended character class. In the 8-bit library, this opcode is + encountered only when UTF-8 mode mode is supported. In the 16-bit and + 32-bit libraries, codepoints greater than 255 may be encountered even when + UTF is not supported. */ + +#define Lstart_eptr F->temp_sptr[0] +#define Lxclass_data F->temp_sptr[1] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] + +#ifdef SUPPORT_WIDE_CHARS + case OP_XCLASS: + { + Lxclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */ + Fecode += GET(Fecode, 1); /* Advance past the item */ + + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + reptype = rep_typ[*Fecode - OP_CRSTAR]; + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + Lmin = Lmax = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + + /* If Lmax == Lmin we can just continue with the main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM100); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } +#ifdef SUPPORT_UNICODE + GETCHARLENTEST(fc, Feptr, len); +#else + fc = *Feptr; +#endif + if (!PRIV(xclass)(fc, Lxclass_data, + (const uint8_t*)mb->start_code, utf)) break; + Feptr += len; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for(;;) + { + RMATCH(Fecode, RM101); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ +#ifdef SUPPORT_UNICODE + if (utf) BACKCHAR(Feptr); +#endif + } + RRETURN(MATCH_NOMATCH); + } + + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } +#endif /* SUPPORT_WIDE_CHARS: end of XCLASS */ + +#undef Lstart_eptr +#undef Lxclass_data +#undef Lmin +#undef Lmax + + + /* ===================================================================== */ + /* Match a complex, set-based character class. This opcodes are used when + there is complex nesting or logical operations within the character + class. */ + +#define Lstart_eptr F->temp_sptr[0] +#define Leclass_data F->temp_sptr[1] +#define Leclass_len F->temp_size +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] + +#ifdef SUPPORT_WIDE_CHARS + case OP_ECLASS: + { + Leclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */ + Fecode += GET(Fecode, 1); /* Advance past the item */ + Leclass_len = (PCRE2_SIZE)(Fecode - Leclass_data); + + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + reptype = rep_typ[*Fecode - OP_CRSTAR]; + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + Lmin = Lmax = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + + /* If Lmax == Lmin we can just continue with the main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + RMATCH(Fecode, RM102); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + Lstart_eptr = Feptr; + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } +#ifdef SUPPORT_UNICODE + GETCHARLENTEST(fc, Feptr, len); +#else + fc = *Feptr; +#endif + if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len, + (const uint8_t*)mb->start_code, utf)) + break; + Feptr += len; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for(;;) + { + RMATCH(Fecode, RM103); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ +#ifdef SUPPORT_UNICODE + if (utf) BACKCHAR(Feptr); +#endif + } + RRETURN(MATCH_NOMATCH); + } + + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } +#endif /* SUPPORT_WIDE_CHARS: end of ECLASS */ + +#undef Lstart_eptr +#undef Leclass_data +#undef Leclass_len +#undef Lmin +#undef Lmax + + + /* ===================================================================== */ + /* Match various character types when PCRE2_UCP is not set. These opcodes + are not generated when PCRE2_UCP is set - instead appropriate property + tests are compiled. */ + + case OP_NOT_DIGIT: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_DIGIT: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_NOT_WHITESPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_WHITESPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_NOT_WORDCHAR: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_WORDCHAR: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_ANYNL: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + } + else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH); + break; + } + Fecode++; + break; + + case OP_NOT_HSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */ + default: break; + } + Fecode++; + break; + + case OP_HSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: break; /* Byte and multibyte cases */ + default: RRETURN(MATCH_NOMATCH); + } + Fecode++; + break; + + case OP_NOT_VSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + VSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + Fecode++; + break; + + case OP_VSPACE: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + VSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + Fecode++; + break; + + +#ifdef SUPPORT_UNICODE + + /* ===================================================================== */ + /* Check the next character by Unicode property. We will get here only + if the support is in the binary; otherwise a compile-time error occurs. */ + + case OP_PROP: + case OP_NOTPROP: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + { + const uint32_t *cp; + uint32_t chartype; + const ucd_record *prop = GET_UCD(fc); + BOOL notmatch = Fop == OP_NOTPROP; + + switch(Fecode[1]) + { + case PT_LAMP: + chartype = prop->chartype; + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_GC: + if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_PC: + if ((Fecode[2] == prop->chartype) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_SC: + if ((Fecode[2] == prop->script) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_SCX: + { + BOOL ok = (Fecode[2] == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0); + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + + /* These are specials */ + + case PT_ALNUM: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (notmatch) RRETURN(MATCH_NOMATCH); + break; + + default: + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + } + break; + + case PT_WORD: + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) break;; + RRETURN(MATCH_NOMATCH); + } +#endif + cp = PRIV(ucd_caseless_sets) + Fecode[2]; + for (;;) + { + if (fc < *cp) + { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } } + if (fc == *cp++) + { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; } + } + break; + + case PT_UCNC: + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BOOL: + { + BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Fecode[2]) != 0; + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + + /* This should never occur */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + + Fecode += 3; + } + break; + + + /* ===================================================================== */ + /* Match an extended Unicode sequence. We will get here only if the support + is in the binary; otherwise a compile-time error occurs. */ + + case OP_EXTUNI: + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf, + NULL); + } + CHECK_PARTIAL(); + Fecode++; + break; + +#endif /* SUPPORT_UNICODE */ + + + /* ===================================================================== */ + /* Match a single character type repeatedly. Note that the property type + does not need to be in a stack frame as it is not used within an RMATCH() + loop. */ + +#define Lstart_eptr F->temp_sptr[0] +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lctype F->temp_32[2] +#define Lpropvalue F->temp_32[3] + + case OP_TYPEEXACT: + Lmin = Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATTYPE; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + Lmin = 0; + Lmax = GET2(Fecode, 1); + reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX; + Fecode += 1 + IMM2_SIZE; + goto REPEATTYPE; + + case OP_TYPEPOSSTAR: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATTYPE; + + case OP_TYPEPOSPLUS: + reptype = REPTYPE_POS; + Lmin = 1; + Lmax = UINT32_MAX; + Fecode++; + goto REPEATTYPE; + + case OP_TYPEPOSQUERY: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = 1; + Fecode++; + goto REPEATTYPE; + + case OP_TYPEPOSUPTO: + reptype = REPTYPE_POS; + Lmin = 0; + Lmax = GET2(Fecode, 1); + Fecode += 1 + IMM2_SIZE; + goto REPEATTYPE; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + fc = *Fecode++ - OP_TYPESTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + + /* Common code for all repeated character type matches. */ + + REPEATTYPE: + Lctype = *Fecode++; /* Code for the character type */ + +#ifdef SUPPORT_UNICODE + if (Lctype == OP_PROP || Lctype == OP_NOTPROP) + { + proptype = *Fecode++; + Lpropvalue = *Fecode++; + } + else proptype = -1; +#endif + + /* First, ensure the minimum number of matches are present. Use inline + code for maximizing the speed, and do the type test once at the start + (i.e. keep it out of the loops). As there are no calls to RMATCH in the + loops, we can use an ordinary variable for "notmatch". The code for UTF + mode is separated out for tidiness, except for Unicode property tests. */ + + if (Lmin > 0) + { +#ifdef SUPPORT_UNICODE + if (proptype >= 0) /* Property tests in all modes */ + { + BOOL notmatch = Lctype == OP_NOTPROP; + switch(proptype) + { + case PT_LAMP: + for (i = 1; i <= Lmin; i++) + { + int chartype; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_GC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_PC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_SC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_SCX: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_ALNUM: + for (i = 1; i <= Lmin; i++) + { + int category; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + category = UCD_CATEGORY(fc); + if ((category == ucp_L || category == ucp_N) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (notmatch) RRETURN(MATCH_NOMATCH); + break; + + default: + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case PT_WORD: + for (i = 1; i <= Lmin; i++) + { + int chartype, category; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; + if ((category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_CLIST: + for (i = 1; i <= Lmin; i++) + { + const uint32_t *cp; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) continue; + RRETURN(MATCH_NOMATCH); + } +#endif + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { + if (notmatch) break; + RRETURN(MATCH_NOMATCH); + } + if (fc == *cp++) + { + if (notmatch) RRETURN(MATCH_NOMATCH); + break; + } + } + } + break; + + case PT_UCNC: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BIDICL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BOOL: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + /* This should not occur */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (Lctype == OP_EXTUNI) + { + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, + mb->end_subject, utf, NULL); + } + CHECK_PARTIAL(); + } + } + else +#endif /* SUPPORT_UNICODE */ + +/* Handle all other cases in UTF mode */ + +#ifdef SUPPORT_UNICODE + if (utf) switch(Lctype) + { + case OP_ANY: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_ALLANY: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_ANYBYTE: + if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH); + Feptr += Lmin; + break; + + case OP_ANYNL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case OP_NOT_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + HSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + } + break; + + case OP_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + HSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_NOT_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + VSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + } + break; + + case OP_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + switch(fc) + { + VSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINC(fc, Feptr); + if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_DIGIT: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + /* No need to skip more code units - we know it has only one. */ + } + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + /* No need to skip more code units - we know it has only one. */ + } + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + uint32_t cc; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + cc = UCHAR21(Feptr); + if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + /* No need to skip more code units - we know it has only one. */ + } + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } /* End switch(Lctype) */ + + else +#endif /* SUPPORT_UNICODE */ + + /* Code for the non-UTF case for minimum matching of operators other + than OP_PROP and OP_NOTPROP. */ + + switch(Lctype) + { + case OP_ANY: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *Feptr == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + } + break; + + case OP_ALLANY: + if (Feptr > mb->end_subject - Lmin) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr += Lmin; + break; + + /* This OP_ANYBYTE case will never be reached because \C gets turned + into OP_ALLANY in non-UTF mode. Cut out the code so that coverage + reports don't complain about it's never being used. */ + +/* case OP_ANYBYTE: +* if (Feptr > mb->end_subject - Lmin) +* { +* SCHECK_PARTIAL(); +* RRETURN(MATCH_NOMATCH); +* } +* Feptr += Lmin; +* break; +*/ + case OP_ANYNL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#if PCRE2_CODE_UNIT_WIDTH != 8 + case 0x2028: + case 0x2029: +#endif + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH); + break; + } + } + break; + + case OP_NOT_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: break; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + } + } + break; + + case OP_HSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: RRETURN(MATCH_NOMATCH); + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + break; + } + } + break; + + case OP_NOT_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + default: break; + } + } + break; + + case OP_VSPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + switch(*Feptr++) + { + default: RRETURN(MATCH_NOMATCH); + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + break; + } + } + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_DIGIT: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_WHITESPACE: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + case OP_WORDCHAR: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + Feptr++; + } + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + } + + /* If Lmin = Lmax we are done. Continue with the main loop. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, we have to test the rest of the pattern before each + subsequent match. This means we cannot use a local "notmatch" variable as + in the other cases. As all 4 temporary 32-bit values in the frame are + already in use, just test the type each time. */ + + if (reptype == REPTYPE_MIN) + { +#ifdef SUPPORT_UNICODE + if (proptype >= 0) + { + switch(proptype) + { + case PT_LAMP: + for (;;) + { + int chartype; + RMATCH(Fecode, RM208); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_GC: + for (;;) + { + RMATCH(Fecode, RM209); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_PC: + for (;;) + { + RMATCH(Fecode, RM210); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_SC: + for (;;) + { + RMATCH(Fecode, RM211); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_SCX: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM224); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_ALNUM: + for (;;) + { + int category; + RMATCH(Fecode, RM212); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + category = UCD_CATEGORY(fc); + if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + for (;;) + { + RMATCH(Fecode, RM213); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + break; + + default: + if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + break; + } + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_WORD: + for (;;) + { + int chartype, category; + RMATCH(Fecode, RM214); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; + if ((category == ucp_L || + category == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_CLIST: + for (;;) + { + const uint32_t *cp; + RMATCH(Fecode, RM215); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (Lctype == OP_NOTPROP) continue; + RRETURN(MATCH_NOMATCH); + } +#endif + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { + if (Lctype == OP_NOTPROP) break; + RRETURN(MATCH_NOMATCH); + } + if (fc == *cp++) + { + if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + break; + } + } + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_UCNC: + for (;;) + { + RMATCH(Fecode, RM216); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_BIDICL: + for (;;) + { + RMATCH(Fecode, RM223); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + case PT_BOOL: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM222); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + + /* This should never occur */ + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (Lctype == OP_EXTUNI) + { + for (;;) + { + RMATCH(Fecode, RM217); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, + utf, NULL); + } + CHECK_PARTIAL(); + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* UTF mode for non-property testing character types. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (;;) + { + RMATCH(Fecode, RM218); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH); + GETCHARINC(fc, Feptr); + switch(Lctype) + { + case OP_ANY: /* This is the non-NL case */ + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + fc == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + break; + + case OP_ALLANY: + case OP_ANYBYTE: + break; + + case OP_ANYNL: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#ifndef EBCDIC + case 0x2028: + case 0x2029: +#endif /* Not EBCDIC */ + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) + RRETURN(MATCH_NOMATCH); + break; + } + break; + + case OP_NOT_HSPACE: + switch(fc) + { + HSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + break; + + case OP_HSPACE: + switch(fc) + { + HSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + break; + + case OP_NOT_VSPACE: + switch(fc) + { + VSPACE_CASES: RRETURN(MATCH_NOMATCH); + default: break; + } + break; + + case OP_VSPACE: + switch(fc) + { + VSPACE_CASES: break; + default: RRETURN(MATCH_NOMATCH); + } + break; + + case OP_NOT_DIGIT: + if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: + if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + for (;;) + { + RMATCH(Fecode, RM33); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) + RRETURN(MATCH_NOMATCH); + fc = *Feptr++; + switch(Lctype) + { + case OP_ANY: /* This is the non-NL case */ + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + fc == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + break; + + case OP_ALLANY: + case OP_ANYBYTE: + break; + + case OP_ANYNL: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + + case CHAR_CR: + if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++; + break; + + case CHAR_LF: + break; + + case CHAR_VT: + case CHAR_FF: + case CHAR_NEL: +#if PCRE2_CODE_UNIT_WIDTH != 8 + case 0x2028: + case 0x2029: +#endif + if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) + RRETURN(MATCH_NOMATCH); + break; + } + break; + + case OP_NOT_HSPACE: + switch(fc) + { + default: break; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_HSPACE: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + break; + } + break; + + case OP_NOT_VSPACE: + switch(fc) + { + default: break; + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_VSPACE: + switch(fc) + { + default: RRETURN(MATCH_NOMATCH); + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + break; + } + break; + + case OP_NOT_DIGIT: + if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: + if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + } + } + + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ + } + + /* If maximizing, it is worth using inline code for speed, doing the type + test once at the start (i.e. keep it out of the loops). Once again, + "notmatch" can be an ordinary local variable because the loops do not call + RMATCH. */ + + else + { + Lstart_eptr = Feptr; /* Remember where we started */ + +#ifdef SUPPORT_UNICODE + if (proptype >= 0) + { + BOOL notmatch = Lctype == OP_NOTPROP; + switch(proptype) + { + case PT_LAMP: + for (i = Lmin; i < Lmax; i++) + { + int chartype; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + chartype = UCD_CHARTYPE(fc); + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == notmatch) + break; + Feptr+= len; + } + break; + + case PT_GC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_PC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_SC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_SCX: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) break; + Feptr+= len; + } + break; + + case PT_ALNUM: + for (i = Lmin; i < Lmax; i++) + { + int category; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + category = UCD_CATEGORY(fc); + if ((category == ucp_L || category == ucp_N) == notmatch) + break; + Feptr+= len; + } + break; + + /* Perl space used to exclude VT, but from Perl 5.18 it is included, + which means that Perl space and POSIX space are now identical. PCRE + was changed at release 8.34. */ + + case PT_SPACE: /* Perl space */ + case PT_PXSPACE: /* POSIX space */ + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + switch(fc) + { + HSPACE_CASES: + VSPACE_CASES: + if (notmatch) goto ENDLOOP99; /* Break the loop */ + break; + + default: + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) + goto ENDLOOP99; /* Break the loop */ + break; + } + Feptr+= len; + } + ENDLOOP99: + break; + + case PT_WORD: + for (i = Lmin; i < Lmax; i++) + { + int chartype, category; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; + if ((category == ucp_L || + category == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == notmatch) + break; + Feptr+= len; + } + break; + + case PT_CLIST: + for (i = Lmin; i < Lmax; i++) + { + const uint32_t *cp; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (!notmatch) goto GOT_MAX; + } + else +#endif + { + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { if (notmatch) break; else goto GOT_MAX; } + if (fc == *cp++) + { if (notmatch) goto GOT_MAX; else break; } + } + } + + Feptr += len; + } + GOT_MAX: + break; + + case PT_UCNC: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || + fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || + fc >= 0xe000) == notmatch) + break; + Feptr += len; + } + break; + + case PT_BIDICL: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_BOOL: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) break; + Feptr+= len; + } + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + + /* Feptr is now past the end of the maximum run */ + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + + for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM221); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + if (utf) BACKCHAR(Feptr); + } + } + + /* Match extended Unicode grapheme clusters. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (Lctype == OP_EXTUNI) + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + else + { + GETCHARINCTEST(fc, Feptr); + Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, + utf, NULL); + } + CHECK_PARTIAL(); + } + + /* Feptr is now past the end of the maximum run */ + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start + of the run while backtracking because the use of \C in UTF mode can + cause BACKCHAR to move back past Lstart_eptr. This is just palliative; + the use of \C in UTF mode is fraught with danger. */ + + for(;;) + { + int lgb, rgb; + PCRE2_SPTR fptr; + + if (Feptr <= Lstart_eptr) break; /* At start of char run */ + RMATCH(Fecode, RM219); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + + /* Backtracking over an extended grapheme cluster involves inspecting + the previous two characters (if present) to see if a break is + permitted between them. */ + + Feptr--; + if (!utf) fc = *Feptr; else + { + BACKCHAR(Feptr); + GETCHAR(fc, Feptr); + } + rgb = UCD_GRAPHBREAK(fc); + + for (;;) + { + if (Feptr <= Lstart_eptr) break; /* At start of char run */ + fptr = Feptr - 1; + if (!utf) fc = *fptr; else + { + BACKCHAR(fptr); + GETCHAR(fc, fptr); + } + lgb = UCD_GRAPHBREAK(fc); + if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; + Feptr = fptr; + rgb = lgb; + } + } + } + + else +#endif /* SUPPORT_UNICODE */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + switch(Lctype) + { + case OP_ANY: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(Feptr)) break; + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + break; + + case OP_ALLANY: + if (Lmax < UINT32_MAX) + { + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + Feptr++; + ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + } + } + else + { + Feptr = mb->end_subject; /* Unlimited UTF-8 repeat */ + SCHECK_PARTIAL(); + } + break; + + /* The "byte" (i.e. "code unit") case is the same as non-UTF */ + + case OP_ANYBYTE: + fc = Lmax - Lmin; + if (fc > (uint32_t)(mb->end_subject - Feptr)) + { + Feptr = mb->end_subject; + SCHECK_PARTIAL(); + } + else Feptr += fc; + break; + + case OP_ANYNL: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc == CHAR_CR) + { + if (++Feptr >= mb->end_subject) break; + if (UCHAR21(Feptr) == CHAR_LF) Feptr++; + } + else + { + if (fc != CHAR_LF && + (mb->bsr_convention == PCRE2_BSR_ANYCRLF || + (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL +#ifndef EBCDIC + && fc != 0x2028 && fc != 0x2029 +#endif /* Not EBCDIC */ + ))) + break; + Feptr += len; + } + } + break; + + case OP_NOT_HSPACE: + case OP_HSPACE: + for (i = Lmin; i < Lmax; i++) + { + BOOL gotspace; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + switch(fc) + { + HSPACE_CASES: gotspace = TRUE; break; + default: gotspace = FALSE; break; + } + if (gotspace == (Lctype == OP_NOT_HSPACE)) break; + Feptr += len; + } + break; + + case OP_NOT_VSPACE: + case OP_VSPACE: + for (i = Lmin; i < Lmax; i++) + { + BOOL gotspace; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + switch(fc) + { + VSPACE_CASES: gotspace = TRUE; break; + default: gotspace = FALSE; break; + } + if (gotspace == (Lctype == OP_NOT_VSPACE)) break; + Feptr += len; + } + break; + + case OP_NOT_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break; + Feptr+= len; + } + break; + + case OP_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break; + Feptr+= len; + } + break; + + case OP_NOT_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break; + Feptr+= len; + } + break; + + case OP_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break; + Feptr+= len; + } + break; + + case OP_NOT_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break; + Feptr+= len; + } + break; + + case OP_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLEN(fc, Feptr, len); + if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break; + Feptr+= len; + } + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go + too far. */ + + for(;;) + { + if (Feptr <= Lstart_eptr) break; + RMATCH(Fecode, RM220); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + BACKCHAR(Feptr); + if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && + UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR) + Feptr--; + } + } + else +#endif /* SUPPORT_UNICODE */ + + /* Not UTF mode */ + { + switch(Lctype) + { + case OP_ANY: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(Feptr)) break; + if (mb->partial != 0 && /* Take care with CRLF partial */ + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + *Feptr == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Feptr++; + } + break; + + case OP_ALLANY: + case OP_ANYBYTE: + fc = Lmax - Lmin; + if (fc > (uint32_t)(mb->end_subject - Feptr)) + { + Feptr = mb->end_subject; + SCHECK_PARTIAL(); + } + else Feptr += fc; + break; + + case OP_ANYNL: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + fc = *Feptr; + if (fc == CHAR_CR) + { + if (++Feptr >= mb->end_subject) break; + if (*Feptr == CHAR_LF) Feptr++; + } + else + { + if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF || + (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL +#if PCRE2_CODE_UNIT_WIDTH != 8 + && fc != 0x2028 && fc != 0x2029 +#endif + ))) break; + Feptr++; + } + } + break; + + case OP_NOT_HSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: Feptr++; break; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + goto ENDLOOP00; + } + } + ENDLOOP00: + break; + + case OP_HSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: goto ENDLOOP01; + HSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + HSPACE_MULTIBYTE_CASES: +#endif + Feptr++; break; + } + } + ENDLOOP01: + break; + + case OP_NOT_VSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: Feptr++; break; + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + goto ENDLOOP02; + } + } + ENDLOOP02: + break; + + case OP_VSPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + switch(*Feptr) + { + default: goto ENDLOOP03; + VSPACE_BYTE_CASES: +#if PCRE2_CODE_UNIT_WIDTH != 8 + VSPACE_MULTIBYTE_CASES: +#endif + Feptr++; break; + } + } + ENDLOOP03: + break; + + case OP_NOT_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0) + break; + Feptr++; + } + break; + + case OP_DIGIT: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0) + break; + Feptr++; + } + break; + + case OP_NOT_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0) + break; + Feptr++; + } + break; + + case OP_WHITESPACE: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0) + break; + Feptr++; + } + break; + + case OP_NOT_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0) + break; + Feptr++; + } + break; + + case OP_WORDCHAR: + for (i = Lmin; i < Lmax; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0) + break; + Feptr++; + } + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + + if (reptype == REPTYPE_POS) continue; /* No backtracking */ + + for (;;) + { + if (Feptr == Lstart_eptr) break; + RMATCH(Fecode, RM34); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr--; + if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF && + Feptr[-1] == CHAR_CR) Feptr--; + } + } + } + break; /* End of repeat character type processing */ + +#undef Lstart_eptr +#undef Lmin +#undef Lmax +#undef Lctype +#undef Lpropvalue + + + /* ===================================================================== */ + /* Match a back reference, possibly repeatedly. Look past the end of the + item to see if there is repeat information following. The OP_REF and + OP_REFI opcodes are used for a reference to a numbered group or to a + non-duplicated named group. For a duplicated named group, OP_DNREF and + OP_DNREFI are used. In this case we must scan the list of groups to which + the name refers, and use the first one that is set. */ + +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Lcaseless F->temp_32[2] +#define Lcaseopts F->temp_32[3] +#define Lstart F->temp_sptr[0] +#define Loffset F->temp_size + + case OP_DNREF: + case OP_DNREFI: + Lcaseless = (Fop == OP_DNREFI); + Lcaseopts = (Fop == OP_DNREFI)? Fecode[1 + 2*IMM2_SIZE] : 0; + { + int count = GET2(Fecode, 1+IMM2_SIZE); + PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; + Fecode += 1 + 2*IMM2_SIZE + (Fop == OP_DNREFI? 1 : 0); + + while (count-- > 0) + { + Loffset = (GET2(slot, 0) << 1) - 2; + if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break; + slot += mb->name_entry_size; + } + } + goto REF_REPEAT; + + case OP_REF: + case OP_REFI: + Lcaseless = (Fop == OP_REFI); + Lcaseopts = (Fop == OP_REFI)? Fecode[1 + IMM2_SIZE] : 0; + Loffset = (GET2(Fecode, 1) << 1) - 2; + Fecode += 1 + IMM2_SIZE + (Fop == OP_REFI? 1 : 0); + + /* Set up for repetition, or handle the non-repeated case. The maximum and + minimum must be in the heap frame, but as they are short-term values, we + use temporary fields. */ + + REF_REPEAT: + switch (*Fecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + fc = *Fecode++ - OP_CRSTAR; + Lmin = rep_min[fc]; + Lmax = rep_max[fc]; + reptype = rep_typ[fc]; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + reptype = rep_typ[*Fecode - OP_CRSTAR]; + if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */ + Fecode += 1 + 2 * IMM2_SIZE; + break; + + default: /* No repeat follows */ + { + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &length); + if (rrc != 0) + { + if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + } + Feptr += length; + continue; /* With the main loop */ + } + + /* Handle repeated back references. If a set group has length zero, just + continue with the main loop, because it matches however many times. For an + unset reference, if the minimum is zero, we can also just continue. We can + also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset + group behave as a zero-length group. For any other unset cases, carrying + on will result in NOMATCH. */ + + if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) + { + if (Fovector[Loffset] == Fovector[Loffset + 1]) continue; + } + else /* Group is not set */ + { + if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0) + continue; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= Lmin; i++) + { + PCRE2_SIZE slength; + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); + if (rrc != 0) + { + if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr += slength; + } + + /* If min = max, we are done. They are not both allowed to be zero. */ + + if (Lmin == Lmax) continue; + + /* If minimizing, keep trying and advancing the pointer. */ + + if (reptype == REPTYPE_MIN) + { + for (;;) + { + PCRE2_SIZE slength; + RMATCH(Fecode, RM20); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); + if (rrc != 0) + { + if (rrc > 0) Feptr = mb->end_subject; /* Partial match */ + CHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + Feptr += slength; + } + + PCRE2_UNREACHABLE(); /* Control never reaches here */ + } + + /* If maximizing, find the longest string and work backwards, as long as + the matched lengths for each iteration are the same. */ + + else + { + BOOL samelengths = TRUE; + Lstart = Feptr; /* Starting position */ + Flength = Fovector[Loffset+1] - Fovector[Loffset]; + + for (i = Lmin; i < Lmax; i++) + { + PCRE2_SIZE slength; + rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); + if (rrc != 0) + { + /* Can't use CHECK_PARTIAL because we don't want to update Feptr in + the soft partial matching case. */ + + if (rrc > 0 && mb->partial != 0 && + mb->end_subject > mb->start_used_ptr) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + break; + } + + if (slength != Flength) samelengths = FALSE; + Feptr += slength; + } + + /* If the length matched for each repetition is the same as the length of + the captured group, we can easily work backwards. This is the normal + case. However, in caseless UTF-8 mode there are pairs of case-equivalent + characters whose lengths (in terms of code units) differ. However, this + is very rare, so we handle it by re-matching fewer and fewer times. */ + + if (samelengths) + { + while (Feptr >= Lstart) + { + RMATCH(Fecode, RM21); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Feptr -= Flength; + } + } + + /* The rare case of non-matching lengths. Re-scan the repetition for each + iteration. We know that match_ref() will succeed every time. */ + + else + { + Lmax = i; + for (;;) + { + RMATCH(Fecode, RM22); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Feptr == Lstart) break; /* Failed after minimal repetition */ + Feptr = Lstart; + Lmax--; + for (i = Lmin; i < Lmax; i++) + { + PCRE2_SIZE slength; + (void)match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength); + Feptr += slength; + } + } + } + + RRETURN(MATCH_NOMATCH); + } + + PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ + +#undef Lcaseless +#undef Lmin +#undef Lmax +#undef Lstart +#undef Loffset + + + +/* ========================================================================= */ +/* Opcodes for the start of various parenthesized items */ +/* ========================================================================= */ + + /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the + (*THEN) is within the current branch by comparing the address of OP_THEN + that is passed back with the end of the branch. If (*THEN) is within the + current branch, and the branch is one of two or more alternatives (it + either starts or ends with OP_ALT), we have reached the limit of THEN's + action, so convert the return code to NOMATCH, which will cause normal + backtracking to happen from now on. Otherwise, THEN is passed back to an + outer alternative. This implements Perl's treatment of parenthesized + groups, where a group not containing | does not affect the current + alternative, that is, (X) is NOT the same as (X|(*F)). */ + + + /* ===================================================================== */ + /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive + bracket group, indicating that it may occur zero times. It may repeat + infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in + the pattern. Brackets with fixed upper repeat limits are compiled as a + number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO. + Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */ + +#define Lnext_ecode F->temp_sptr[0] + + case OP_BRAZERO: + Lnext_ecode = Fecode + 1; + RMATCH(Lnext_ecode, RM9); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT); + Fecode = Lnext_ecode + 1 + LINK_SIZE; + break; + + case OP_BRAMINZERO: + Lnext_ecode = Fecode + 1; + do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT); + RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode++; + break; + +#undef Lnext_ecode + + case OP_SKIPZERO: + Fecode++; + do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT); + Fecode += 1 + LINK_SIZE; + break; + + + /* ===================================================================== */ + /* Handle possessive brackets with an unlimited repeat. The end of these + brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without + going further in the pattern. */ + +#define Lframe_type F->temp_32[0] +#define Lmatched_once F->temp_32[1] +#define Lzero_allowed F->temp_32[2] +#define Lstart_eptr F->temp_sptr[0] +#define Lstart_group F->temp_sptr[1] + + case OP_BRAPOSZERO: + Lzero_allowed = TRUE; /* Zero repeat is allowed */ + Fecode += 1; + if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS) + goto POSSESSIVE_CAPTURE; + goto POSSESSIVE_NON_CAPTURE; + + case OP_BRAPOS: + case OP_SBRAPOS: + Lzero_allowed = FALSE; /* Zero repeat not allowed */ + + POSSESSIVE_NON_CAPTURE: + Lframe_type = GF_NOCAPTURE; /* Remembered frame type */ + goto POSSESSIVE_GROUP; + + case OP_CBRAPOS: + case OP_SCBRAPOS: + Lzero_allowed = FALSE; /* Zero repeat not allowed */ + + POSSESSIVE_CAPTURE: + number = GET2(Fecode, 1+LINK_SIZE); + Lframe_type = GF_CAPTURE | number; /* Remembered frame type */ + + POSSESSIVE_GROUP: + Lmatched_once = FALSE; /* Never matched */ + Lstart_group = Fecode; /* Start of this group */ + + for (;;) + { + Lstart_eptr = Feptr; /* Position at group start */ + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8); + if (rrc == MATCH_KETRPOS) + { + Lmatched_once = TRUE; /* Matched at least once */ + if (Feptr == Lstart_eptr) /* Empty match; skip to end */ + { + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + break; + } + + Fecode = Lstart_group; + continue; + } + + /* See comment above about handling THEN. */ + + if (rrc == MATCH_THEN) + { + PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1); + if (mb->verb_ecode_ptr < next_ecode && + (*Fecode == OP_ALT || *next_ecode == OP_ALT)) + rrc = MATCH_NOMATCH; + } + + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) break; + } + + /* Success if matched something or zero repeat allowed */ + + if (Lmatched_once || Lzero_allowed) + { + Fecode += 1 + LINK_SIZE; + break; + } + + RRETURN(MATCH_NOMATCH); + +#undef Lmatched_once +#undef Lzero_allowed +#undef Lframe_type +#undef Lstart_eptr +#undef Lstart_group + + + /* ===================================================================== */ + /* Handle non-capturing brackets that cannot match an empty string. When we + get to the final alternative within the brackets, as long as there are no + THEN's in the pattern, we can optimize by not recording a new backtracking + point. (Ideally we should test for a THEN within this group, but we don't + have that information.) Don't do this if we are at the very top level, + however, because that would make handling assertions and once-only brackets + messier when there is nothing to go back to. */ + +#define Lframe_type F->temp_32[0] /* Set for all that use GROUPLOOP */ +#define Lnext_branch F->temp_sptr[0] /* Used only in OP_BRA handling */ + + case OP_BRA: + if (mb->hasthen || Frdepth == 0) + { + Lframe_type = 0; + goto GROUPLOOP; + } + + for (;;) + { + Lnext_branch = Fecode + GET(Fecode, 1); + if (*Lnext_branch != OP_ALT) break; + + /* This is never the final branch. We do not need to test for MATCH_THEN + here because this code is not used when there is a THEN in the pattern. */ + + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode = Lnext_branch; + } + + /* Hit the start of the final branch. Continue at this level. */ + + Fecode += PRIV(OP_lengths)[*Fecode]; + break; + +#undef Lnext_branch + + + /* ===================================================================== */ + /* Handle a capturing bracket, other than those that are possessive with an + unlimited repeat. */ + + case OP_CBRA: + case OP_SCBRA: + Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE); + goto GROUPLOOP; + + + /* ===================================================================== */ + /* Atomic groups and non-capturing brackets that can match an empty string + must record a backtracking point and also set up a chained frame. */ + + case OP_ONCE: + case OP_SCRIPT_RUN: + case OP_SBRA: + Lframe_type = GF_NOCAPTURE | Fop; + + GROUPLOOP: + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2); + if (rrc == MATCH_THEN) + { + PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1); + if (mb->verb_ecode_ptr < next_ecode && + (*Fecode == OP_ALT || *next_ecode == OP_ALT)) + rrc = MATCH_NOMATCH; + } + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + +#undef Lframe_type + + + /* ===================================================================== */ + /* Pattern recursion either matches the current regex, or some + subexpression. The offset data is the offset to the starting bracket from + the start of the whole pattern. This is so that it works from duplicated + subpatterns. For a whole-pattern recursion, we have to infer the number + zero. */ + +#define Lframe_type F->temp_32[0] +#define Lstart_branch F->temp_sptr[0] + + case OP_RECURSE: + bracode = mb->start_code + GET(Fecode, 1); + number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE); + + /* If we are already in a pattern recursion, check for repeating the same + one without changing the subject pointer or the last referenced character + in the subject. This should catch convoluted mutual recursions; some + simple cases are caught at compile time. However, there are rare cases when + this check needs to be turned off. In this case, actual recursion loops + will be caught by the match or heap limits. */ + + if (Fcurrent_recurse != RECURSE_UNSET) + { + offset = Flast_group_offset; + while (offset != PCRE2_UNSET) + { + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + if (N->group_frame_type == (GF_RECURSE | number)) + { + if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used && + (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0) + return PCRE2_ERROR_RECURSELOOP; + break; + } + offset = P->last_group_offset; + } + } + + /* Remember the current last referenced character and then run the + recursion branch by branch. */ + + F->recurse_last_used = mb->last_used_ptr; + Lstart_branch = bracode; + Lframe_type = GF_RECURSE | number; + + for (;;) + { + PCRE2_SPTR next_ecode; + + group_frame_type = Lframe_type; + RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11); + next_ecode = Lstart_branch + GET(Lstart_branch,1); + + /* Handle backtracking verbs, which are defined in a range that can + easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to + escape beyond a recursion; they cause a NOMATCH for the entire recursion. + + When one of these verbs triggers, the current recursion group number is + recorded. If it matches the recursion we are processing, the verb + happened within the recursion and we must deal with it. Otherwise it must + have happened after the recursion completed, and so has to be passed + back. See comment above about handling THEN. */ + + if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX && + mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE)) + { + if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode && + (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT)) + rrc = MATCH_NOMATCH; + else RRETURN(MATCH_NOMATCH); + } + + /* Note that carrying on after (*ACCEPT) in a recursion is handled in the + OP_ACCEPT code. Nothing needs to be done here. */ + + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Lstart_branch = next_ecode; + if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH); + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + +#undef Lframe_type +#undef Lstart_branch + + + /* ===================================================================== */ + /* Positive assertions are like other groups except that PCRE doesn't allow + the effect of (*THEN) to escape beyond an assertion; it is therefore + treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its + captures and mark retained. Any other return is an error. */ + +#define Lframe_type F->temp_32[0] + + case OP_ASSERT: + case OP_ASSERTBACK: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + Lframe_type = GF_NOCAPTURE | Fop; + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3); + if (rrc == MATCH_ACCEPT) + { + memcpy(Fovector, + (char *)assert_accept_frame + offsetof(heapframe, ovector), + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + Fmark = assert_accept_frame->mark; + break; + } + if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH); + } + + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + Fecode += 1 + LINK_SIZE; + break; + +#undef Lframe_type + + + /* ===================================================================== */ + /* Handle negative assertions. Loop for each non-matching branch as for + positive assertions. */ + +#define Lframe_type F->temp_32[0] + + case OP_ASSERT_NOT: + case OP_ASSERTBACK_NOT: + Lframe_type = GF_NOCAPTURE | Fop; + + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4); + switch(rrc) + { + case MATCH_ACCEPT: /* Assertion matched, therefore it fails. */ + case MATCH_MATCH: + RRETURN (MATCH_NOMATCH); + + case MATCH_NOMATCH: /* Branch failed, try next if present. */ + case MATCH_THEN: + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED; + break; + + case MATCH_COMMIT: /* Assertion forced to fail, therefore continue. */ + case MATCH_SKIP: + case MATCH_PRUNE: + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + goto ASSERT_NOT_FAILED; + + default: /* Pass back any other return */ + RRETURN(rrc); + } + } + + /* None of the branches have matched or there was a backtrack to (*COMMIT), + (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a + negative assertion, so carry on. */ + + ASSERT_NOT_FAILED: + Fecode += 1 + LINK_SIZE; + break; + +#undef Lframe_type + + /* ===================================================================== */ + /* Handle scan substring operation. */ + +#define Lframe_type F->temp_32[0] +#define Lextra_size F->temp_32[1] +#define Lsaved_moptions F->temp_32[2] +#define Lsaved_end_subject F->temp_sptr[0] +#define Lsaved_eptr F->temp_sptr[1] +#define Ltrue_end_extra F->temp_size + + case OP_ASSERT_SCS: + { + PCRE2_SPTR ecode = Fecode + 1 + LINK_SIZE; + uint32_t extra_size = 0; + int count; + PCRE2_SPTR slot; + + /* Disable compiler warning. */ + offset = 0; + (void)offset; + + for (;;) + { + if (*ecode == OP_CREF) + { + extra_size += 1+IMM2_SIZE; + offset = (GET2(ecode, 1) << 1) - 2; + ecode += 1+IMM2_SIZE; + if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET) + goto SCS_OFFSET_FOUND; + continue; + } + + if (*ecode != OP_DNCREF) RRETURN(MATCH_NOMATCH); + + count = GET2(ecode, 1 + IMM2_SIZE); + slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size; + extra_size += 1+2*IMM2_SIZE; + ecode += 1+2*IMM2_SIZE; + + while (count > 0) + { + offset = (GET2(slot, 0) << 1) - 2; + if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET) + goto SCS_OFFSET_FOUND; + slot += mb->name_entry_size; + count--; + } + } + + SCS_OFFSET_FOUND: + + /* Skip remaining options. */ + for (;;) + { + if (*ecode == OP_CREF) + { + extra_size += 1+IMM2_SIZE; + ecode += 1+IMM2_SIZE; + } + else if (*ecode == OP_DNCREF) + { + extra_size += 1+2*IMM2_SIZE; + ecode += 1+2*IMM2_SIZE; + } + else break; + } + + Lextra_size = extra_size; + } + + Lsaved_end_subject = mb->end_subject; + Ltrue_end_extra = mb->true_end_subject - mb->end_subject; + Lsaved_eptr = Feptr; + Lsaved_moptions = mb->moptions; + + Feptr = mb->start_subject + Fovector[offset]; + mb->true_end_subject = mb->end_subject = + mb->start_subject + Fovector[offset + 1]; + mb->moptions &= ~PCRE2_NOTEOL; + + Lframe_type = GF_NOCAPTURE | Fop; + for (;;) + { + group_frame_type = Lframe_type; + RMATCH(Fecode + 1 + LINK_SIZE + Lextra_size, RM38); + if (rrc == MATCH_ACCEPT) + { + memcpy(Fovector, + (char *)assert_accept_frame + offsetof(heapframe, ovector), + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + Fmark = assert_accept_frame->mark; + break; + } + + if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) + { + mb->end_subject = Lsaved_end_subject; + mb->true_end_subject = mb->end_subject + Ltrue_end_extra; + mb->moptions = Lsaved_moptions; + RRETURN(rrc); + } + + Fecode += GET(Fecode, 1); + if (*Fecode != OP_ALT) + { + mb->end_subject = Lsaved_end_subject; + mb->true_end_subject = mb->end_subject + Ltrue_end_extra; + mb->moptions = Lsaved_moptions; + RRETURN(MATCH_NOMATCH); + } + Lextra_size = 0; + } + + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + Fecode += 1 + LINK_SIZE; + Feptr = Lsaved_eptr; + break; + +#undef Lframe_type +#undef Lextra_size +#undef Lsaved_end_subject +#undef Lsaved_eptr +#undef Ltrue_end_extra +#undef Lsave_moptions + + /* ===================================================================== */ + /* The callout item calls an external function, if one is provided, passing + details of the match so far. This is mainly for debugging, though the + function is able to force a failure. */ + + case OP_CALLOUT: + case OP_CALLOUT_STR: + rrc = do_callout(F, mb, &length); + if (rrc > 0) RRETURN(MATCH_NOMATCH); + if (rrc < 0) RRETURN(rrc); + Fecode += length; + break; + + + /* ===================================================================== */ + /* Conditional group: compilation checked that there are no more than two + branches. If the condition is false, skipping the first branch takes us + past the end of the item if there is only one branch, but that's exactly + what we want. */ + + case OP_COND: + case OP_SCOND: + + /* The variable Flength will be added to Fecode when the condition is + false, to get to the second branch. Setting it to the offset to the ALT or + KET, then incrementing Fecode achieves this effect. However, if the second + branch is non-existent, we must point to the KET so that the end of the + group is correctly processed. We now have Fecode pointing to the condition + or callout. */ + + Flength = GET(Fecode, 1); /* Offset to the second branch */ + if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE; + Fecode += 1 + LINK_SIZE; /* From this opcode */ + + /* Because of the way auto-callout works during compile, a callout item is + inserted between OP_COND and an assertion condition. Such a callout can + also be inserted manually. */ + + if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR) + { + rrc = do_callout(F, mb, &length); + if (rrc > 0) RRETURN(MATCH_NOMATCH); + if (rrc < 0) RRETURN(rrc); + + /* Advance Fecode past the callout, so it now points to the condition. We + must adjust Flength so that the value of Fecode+Flength is unchanged. */ + + Fecode += length; + Flength -= length; + } + + /* Test the various possible conditions */ + + condition = FALSE; + switch(*Fecode) + { + case OP_RREF: /* Group recursion test */ + if (Fcurrent_recurse != RECURSE_UNSET) + { + number = GET2(Fecode, 1); + condition = (number == RREF_ANY || number == Fcurrent_recurse); + } + break; + + case OP_DNRREF: /* Duplicate named group recursion test */ + if (Fcurrent_recurse != RECURSE_UNSET) + { + int count = GET2(Fecode, 1 + IMM2_SIZE); + PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; + while (count-- > 0) + { + number = GET2(slot, 0); + condition = number == Fcurrent_recurse; + if (condition) break; + slot += mb->name_entry_size; + } + } + break; + + case OP_CREF: /* Numbered group used test */ + offset = (GET2(Fecode, 1) << 1) - 2; /* Doubled ref number */ + condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET; + break; + + case OP_DNCREF: /* Duplicate named group used test */ + { + int count = GET2(Fecode, 1 + IMM2_SIZE); + PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size; + while (count-- > 0) + { + offset = (GET2(slot, 0) << 1) - 2; + condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET; + if (condition) break; + slot += mb->name_entry_size; + } + } + break; + + case OP_FALSE: + case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */ + break; + + case OP_TRUE: + condition = TRUE; + break; + + /* The condition is an assertion. Run code similar to the assertion code + above. */ + +#define Lpositive F->temp_32[0] +#define Lstart_branch F->temp_sptr[0] + + default: + Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK); + Lstart_branch = Fecode; + + for (;;) + { + group_frame_type = GF_CONDASSERT | *Fecode; + RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5); + + switch(rrc) + { + case MATCH_ACCEPT: /* Save captures */ + memcpy(Fovector, + (char *)assert_accept_frame + offsetof(heapframe, ovector), + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + + /* Fall through */ + /* In the case of a match, the captures have already been put into + the current frame. */ + + case MATCH_MATCH: + condition = Lpositive; /* TRUE for positive assertion */ + break; + + /* PCRE doesn't allow the effect of (*THEN) to escape beyond an + assertion; it is therefore always treated as NOMATCH. */ + + case MATCH_NOMATCH: + case MATCH_THEN: + Lstart_branch += GET(Lstart_branch, 1); + if (*Lstart_branch == OP_ALT) continue; /* Try next branch */ + condition = !Lpositive; /* TRUE for negative assertion */ + break; + + /* These force no match without checking other branches. */ + + case MATCH_COMMIT: + case MATCH_SKIP: + case MATCH_PRUNE: + condition = !Lpositive; + break; + + default: + RRETURN(rrc); + } + break; /* Out of the branch loop */ + } + + /* If the condition is true, find the end of the assertion so that + advancing past it gets us to the start of the first branch. */ + + if (condition) + { + do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT); + } + break; /* End of assertion condition */ + } + +#undef Lpositive +#undef Lstart_branch + + /* Choose branch according to the condition. */ + + Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength; + + /* If the opcode is OP_SCOND it means we are at a repeated conditional + group that might match an empty string. We must therefore descend a level + so that the start is remembered for checking. For OP_COND we can just + continue at this level. */ + + if (Fop == OP_SCOND) + { + group_frame_type = GF_NOCAPTURE | Fop; + RMATCH(Fecode, RM35); + RRETURN(rrc); + } + break; + + + +/* ========================================================================= */ +/* End of start of parenthesis opcodes */ +/* ========================================================================= */ + + + /* ===================================================================== */ + /* Move the subject pointer back by one fixed amount. This occurs at the + start of each branch that has a fixed length in a lookbehind assertion. If + we are too close to the start to move back, fail. When working with UTF-8 + we move back a number of characters, not bytes. */ + + case OP_REVERSE: + number = GET2(Fecode, 1); +#ifdef SUPPORT_UNICODE + if (utf) + { + /* We used to do a simpler `while (number-- > 0)` but that triggers + clang's unsigned integer overflow sanitizer. */ + while (number > 0) + { + --number; + if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH); + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif + + /* No UTF support, or not in UTF mode: count is code unit count */ + + { + if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH); + Feptr -= number; + } + + /* Save the earliest consulted character, then skip to next opcode */ + + if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr; + Fecode += 1 + IMM2_SIZE; + break; + + + /* ===================================================================== */ + /* Move the subject pointer back by a variable amount. This occurs at the + start of each branch of a lookbehind assertion when the branch has a + variable, but limited, length. A loop is needed to try matching the branch + after moving back different numbers of characters. If we are too close to + the start to move back even the minimum amount, fail. When working with + UTF-8 we move back a number of characters, not bytes. */ + +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Leptr F->temp_sptr[0] + + case OP_VREVERSE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + Leptr = Feptr; + + /* Move back by the maximum branch length and then work forwards. This + ensures that items such as \d{3,5} get the maximum length, which is + relevant for captures, and makes for Perl compatibility. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (i = 0; i < Lmax; i++) + { + if (Feptr == mb->start_subject) + { + if (i < Lmin) RRETURN(MATCH_NOMATCH); + Lmax = i; + break; + } + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif + + /* No UTF support or not in UTF mode */ + + { + ptrdiff_t diff = Feptr - mb->start_subject; + uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0); + if (Lmin > available) RRETURN(MATCH_NOMATCH); + if (Lmax > available) Lmax = available; + Feptr -= Lmax; + } + + /* Now try matching, moving forward one character on failure, until we + reach the minimum back length. */ + + for (;;) + { + RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH); + Feptr++; +#ifdef SUPPORT_UNICODE + if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); } +#endif + } + PCRE2_UNREACHABLE(); /* Control never reaches here */ + +#undef Lmin +#undef Lmax +#undef Leptr + + /* ===================================================================== */ + /* An alternation is the end of a branch; scan along to find the end of the + bracketed group. */ + + case OP_ALT: + branch_end = Fecode; + do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT); + break; + + + /* ===================================================================== */ + /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the + starting frame was added to the chained frames in order to remember the + starting subject position for the group. (Not true for OP_BRA when it's a + whole pattern recursion, but that is handled separately below.)*/ + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: + case OP_KETRPOS: + + bracode = Fecode - GET(Fecode, 1); + + if (branch_end == NULL) branch_end = Fecode; + branch_start = bracode; + while (branch_start + GET(branch_start, 1) != branch_end) + branch_start += GET(branch_start, 1); + branch_end = NULL; + + /* Point N to the frame at the start of the most recent group, and P to its + predecessor. Remember the subject pointer at the start of the group. */ + + if (*bracode != OP_BRA && *bracode != OP_COND) + { + N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset); + P = (heapframe *)((char *)N - frame_size); + Flast_group_offset = P->last_group_offset; + +#ifdef DEBUG_SHOW_RMATCH + fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n", + N->rdepth, N->group_frame_type, + (char *)P->eptr - (char *)mb->start_subject); +#endif + + /* If we are at the end of an assertion that is a condition, first check + to see if we are at the end of a variable-length branch in a lookbehind. + If this is the case and we have not landed on the current character, + return no match. Compare code below for non-condition lookbehinds. In + other cases, return a match, discarding any intermediate backtracking + points. Copy back the mark setting and the captures into the frame before + N so that they are set on return. Doing this for all assertions, both + positive and negative, seems to match what Perl does. */ + + if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT) + { + if ((*bracode == OP_ASSERTBACK || *bracode == OP_ASSERTBACK_NOT) && + branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + memcpy((char *)P + offsetof(heapframe, ovector), Fovector, + Foffset_top * sizeof(PCRE2_SIZE)); + P->offset_top = Foffset_top; + P->mark = Fmark; + Fback_frame = (char *)F - (char *)P; + RRETURN(MATCH_MATCH); + } + } + else P = NULL; /* Indicates starting frame not recorded */ + + /* The group was not a conditional assertion. */ + + switch (*bracode) + { + /* Whole pattern recursion is handled as a recursion into group 0, but + the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing + group - a design mistake: it should perhaps have been capture group 0. + Anyway, that means the end of such recursion must be handled here. It is + detected by checking for an immediately following OP_END when we are + recursing in group 0. If this is not the end of a whole-pattern + recursion, there is nothing to be done. */ + + case OP_BRA: + if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break; + + /* It is the end of whole-pattern recursion. */ + + offset = Flast_group_offset; + + /* Corrupted heapframes?. Trigger an assert and return an error */ + PCRE2_ASSERT(offset != PCRE2_UNSET); + if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + Flast_group_offset = P->last_group_offset; + + /* Reinstate the previous set of captures and then carry on after the + recursion call. */ + + memcpy((char *)F + offsetof(heapframe, ovector), P->ovector, + Foffset_top * sizeof(PCRE2_SIZE)); + Foffset_top = P->offset_top; + Fcapture_last = P->capture_last; + Fcurrent_recurse = P->current_recurse; + Fecode = P->ecode + 1 + LINK_SIZE; + continue; /* With next opcode */ + + case OP_COND: /* No need to do anything for these */ + case OP_SCOND: + break; + + /* Non-atomic positive assertions are like OP_BRA, except that the + subject pointer must be put back to where it was at the start of the + assertion. For a variable lookbehind, check its end point. */ + + case OP_ASSERTBACK_NA: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT_NA: + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + Feptr = P->eptr; + break; + + /* Atomic positive assertions are like OP_ONCE, except that in addition + the subject pointer must be put back to where it was at the start of the + assertion. For a variable lookbehind, check its end point. */ + + case OP_ASSERTBACK: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT: + if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; + Feptr = P->eptr; + /* Fall through */ + + /* For an atomic group, discard internal backtracking points. We must + also ensure that any remaining branches within the top-level of the group + are not tried. Do this by adjusting the code pointer within the backtrack + frame so that it points to the final branch. */ + + case OP_ONCE: + Fback_frame = ((char *)F - (char *)P); + for (;;) + { + uint32_t y = GET(P->ecode,1); + if ((P->ecode)[y] != OP_ALT) break; + P->ecode += y; + } + break; + + /* A matching negative assertion returns MATCH, which is turned into + NOMATCH at the assertion level. For a variable lookbehind, check its end + point. */ + + case OP_ASSERTBACK_NOT: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT_NOT: + RRETURN(MATCH_MATCH); + + /* A scan substring group must preserve the current end_subject, + and restore it before the backtracking is performed into its sub + pattern. */ + + case OP_ASSERT_SCS: + F->temp_sptr[0] = mb->end_subject; + mb->end_subject = P->temp_sptr[0]; + mb->true_end_subject = mb->end_subject + P->temp_size; + Feptr = P->temp_sptr[1]; + + RMATCH(Fecode + 1 + LINK_SIZE, RM39); + + mb->end_subject = F->temp_sptr[0]; + mb->true_end_subject = mb->end_subject; + RRETURN(rrc); + break; + + /* At the end of a script run, apply the script-checking rules. This code + will never by exercised if Unicode support it not compiled, because in + that environment script runs cause an error at compile time. */ + + case OP_SCRIPT_RUN: + if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH); + break; + + /* Whole-pattern recursion is coded as a recurse into group 0, and is + handled with OP_BRA above. Other recursion is handled here. */ + + case OP_CBRA: + case OP_CBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + number = GET2(bracode, 1+LINK_SIZE); + + /* Handle a recursively called group. We reinstate the previous set of + captures and then carry on after the recursion call. */ + + if (Fcurrent_recurse == number) + { + P = (heapframe *)((char *)N - frame_size); + memcpy((char *)F + offsetof(heapframe, ovector), P->ovector, + Foffset_top * sizeof(PCRE2_SIZE)); + Foffset_top = P->offset_top; + Fcapture_last = P->capture_last; + Fcurrent_recurse = P->current_recurse; + Fecode = P->ecode + 1 + LINK_SIZE; + continue; /* With next opcode */ + } + + /* Deal with actual capturing. */ + + offset = (number << 1) - 2; + Fcapture_last = number; + Fovector[offset] = P->eptr - mb->start_subject; + Fovector[offset+1] = Feptr - mb->start_subject; + if (offset >= Foffset_top) Foffset_top = offset + 2; + break; + } /* End actions relating to the starting opcode */ + + /* OP_KETRPOS is a possessive repeating ket. Remember the current position, + and return the MATCH_KETRPOS. This makes it possible to do the repeats one + at a time from the outer level. This must precede the empty string test - + in this case that test is done at the outer level. */ + + if (*Fecode == OP_KETRPOS) + { + memcpy((char *)P + offsetof(heapframe, eptr), + (char *)F + offsetof(heapframe, eptr), + frame_copy_size); + RRETURN(MATCH_KETRPOS); + } + + /* Handle the different kinds of closing brackets. A non-repeating ket + needs no special action, just continuing at this level. This also happens + for the repeating kets if the group matched no characters, in order to + forcibly break infinite loops. Otherwise, the repeating kets try the rest + of the pattern or restart from the preceding bracket, in the appropriate + order. */ + + if (Fop != OP_KET && (P == NULL || Feptr != P->eptr)) + { + if (Fop == OP_KETRMIN) + { + RMATCH(Fecode + 1 + LINK_SIZE, RM6); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + Fecode -= GET(Fecode, 1); + break; /* End of ket processing */ + } + + /* Repeat the maximum number of times (KETRMAX) */ + + RMATCH(bracode, RM7); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + + /* Carry on at this level for a non-repeating ket, or after matching an + empty string, or after repeating for a maximum number of times. */ + + Fecode += 1 + LINK_SIZE; + break; + + + /* ===================================================================== */ + /* Start and end of line assertions, not multiline mode. */ + + case OP_CIRC: /* Start of line, unless PCRE2_NOTBOL is set. */ + if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + case OP_SOD: /* Unconditional start of subject */ + if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + /* When PCRE2_NOTEOL is unset, assert before the subject end, or a + terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */ + + case OP_DOLL: + if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH); + if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; + + /* Fall through */ + /* Unconditional end of subject assertion (\z). */ + + case OP_EOD: + if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH); + if (mb->partial != 0) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Fecode++; + break; + + /* End of subject or ending \n assertion (\Z) */ + + case OP_EODN: + ASSERT_NL_OR_EOS: + if (Feptr < mb->true_end_subject && + (!IS_NEWLINE(Feptr) || Feptr != mb->true_end_subject - mb->nllen)) + { + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21TEST(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + RRETURN(MATCH_NOMATCH); + } + + /* Either at end of string or \n before end. */ + + if (mb->partial != 0) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + Fecode++; + break; + + + /* ===================================================================== */ + /* Start and end of line assertions, multiline mode. */ + + /* Start of subject unless notbol, or after any newline except for one at + the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */ + + case OP_CIRCM: + if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject) + RRETURN(MATCH_NOMATCH); + if (Feptr != mb->start_subject && + ((Feptr == mb->end_subject && + (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) || + !WAS_NEWLINE(Feptr))) + RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + /* Assert before any newline, or before end of subject unless noteol is + set. */ + + case OP_DOLLM: + if (Feptr < mb->end_subject) + { + if (!IS_NEWLINE(Feptr)) + { + if (mb->partial != 0 && + Feptr + 1 >= mb->end_subject && + NLBLOCK->nltype == NLTYPE_FIXED && + NLBLOCK->nllen == 2 && + UCHAR21TEST(Feptr) == NLBLOCK->nl[0]) + { + mb->hitend = TRUE; + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; + } + RRETURN(MATCH_NOMATCH); + } + } + else + { + if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH); + SCHECK_PARTIAL(); + } + Fecode++; + break; + + + /* ===================================================================== */ + /* Start of match assertion */ + + case OP_SOM: + if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH); + Fecode++; + break; + + + /* ===================================================================== */ + /* Reset the start of match point */ + + case OP_SET_SOM: + Fstart_match = Feptr; + Fecode++; + break; + + + /* ===================================================================== */ + /* Word boundary assertions. Find out if the previous and current + characters are "word" characters. It takes a bit more work in UTF mode. + Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is + not set. When it is set, use Unicode properties if available, even when not + in UTF mode. Remember the earliest and latest consulted characters. */ + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + if (Feptr == mb->check_subject) prev_is_word = FALSE; else + { + PCRE2_SPTR lastptr = Feptr - 1; +#ifdef SUPPORT_UNICODE + if (utf) + { + BACKCHAR(lastptr); + GETCHAR(fc, lastptr); + } + else +#endif /* SUPPORT_UNICODE */ + fc = *lastptr; + if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr; +#ifdef SUPPORT_UNICODE + if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY) + { + int chartype = UCD_CHARTYPE(fc); + int category = PRIV(ucp_gentype)[chartype]; + prev_is_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + } + else +#endif /* SUPPORT_UNICODE */ + prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0; + } + + /* Get status of next character */ + + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + cur_is_word = FALSE; + } + else + { + PCRE2_SPTR nextptr = Feptr + 1; +#ifdef SUPPORT_UNICODE + if (utf) + { + FORWARDCHARTEST(nextptr, mb->end_subject); + GETCHAR(fc, Feptr); + } + else +#endif /* SUPPORT_UNICODE */ + fc = *Feptr; + if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr; +#ifdef SUPPORT_UNICODE + if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY) + { + int chartype = UCD_CHARTYPE(fc); + int category = PRIV(ucp_gentype)[chartype]; + cur_is_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); + } + else +#endif /* SUPPORT_UNICODE */ + cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0; + } + + /* Now see if the situation is what we want */ + + if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)? + cur_is_word == prev_is_word : cur_is_word != prev_is_word) + RRETURN(MATCH_NOMATCH); + break; + + + /* ===================================================================== */ + /* Backtracking (*VERB)s, with and without arguments. Note that if the + pattern is successfully matched, we do not come back from RMATCH. */ + + case OP_MARK: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12); + + /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an + argument, and we must check whether that argument matches this MARK's + argument. It is passed back in mb->verb_skip_ptr. If it does match, we + return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject + position that corresponds to this mark. Otherwise, pass back the return + code unaltered. */ + + if (rrc == MATCH_SKIP_ARG && + PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0) + { + mb->verb_skip_ptr = Feptr; /* Pass back current position */ + RRETURN(MATCH_SKIP); + } + RRETURN(rrc); + + case OP_FAIL: + RRETURN(MATCH_NOMATCH); + + /* Record the current recursing group number in mb->verb_current_recurse + when a backtracking return such as MATCH_COMMIT is given. This enables the + recurse processing to catch verbs from within the recursion. */ + + case OP_COMMIT: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_COMMIT); + + case OP_COMMIT_ARG: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_COMMIT); + + case OP_PRUNE: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_PRUNE); + + case OP_PRUNE_ARG: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_PRUNE); + + case OP_SKIP: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_skip_ptr = Feptr; /* Pass back current position */ + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_SKIP); + + /* Note that, for Perl compatibility, SKIP with an argument does NOT set + nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was + not a matching mark, we have to re-run the match, ignoring the SKIP_ARG + that failed and any that precede it (either they also failed, or were not + triggered). To do this, we maintain a count of executed SKIP_ARGs. If a + SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg + set to the count of the one that failed. */ + + case OP_SKIP_ARG: + mb->skip_arg_count++; + if (mb->skip_arg_count <= mb->ignore_skip_arg) + { + Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1]; + break; + } + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + + /* Pass back the current skip name and return the special MATCH_SKIP_ARG + return code. This will either be caught by a matching MARK, or get to the + top, where it causes a rematch with mb->ignore_skip_arg set to the value of + mb->skip_arg_count. */ + + mb->verb_skip_ptr = Fecode + 2; + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_SKIP_ARG); + + /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that + the branch in which it occurs can be determined. */ + + case OP_THEN: + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_ecode_ptr = Fecode; + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_THEN); + + case OP_THEN_ARG: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_ecode_ptr = Fecode; + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_THEN); + + + /* ===================================================================== */ + /* There's been some horrible disaster. Arrival here can only mean there is + something seriously wrong in the code above or the OP_xxx definitions. */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + + /* Do not insert any code in here without much thought; it is assumed + that "continue" in the code above comes out to here to repeat the main + loop. */ + + } /* End of main loop */ + +PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */ + +/* ========================================================================= */ +/* The RRETURN() macro jumps here. The number that is saved in Freturn_id +indicates which label we actually want to return to. The value in Frdepth is +the index number of the frame in the vector. The return value has been placed +in rrc. */ + +#define LBL(val) case val: goto L_RM##val; + +RETURN_SWITCH: +if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; +if (Frdepth == 0) return rrc; /* Exit from the top level */ +F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */ +mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */ + +#ifdef DEBUG_SHOW_RMATCH +fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id); +#endif + +switch (Freturn_id) + { + LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) + LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) + LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) + LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) + LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) + +#ifdef SUPPORT_WIDE_CHARS + LBL(100) LBL(101) LBL(102) LBL(103) +#endif + +#ifdef SUPPORT_UNICODE + LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206) + LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213) + LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220) + LBL(221) LBL(222) LBL(223) LBL(224) +#endif + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } +#undef LBL +} + + +/************************************************* +* Match a Regular Expression * +*************************************************/ + +/* This function applies a compiled pattern to a subject string and picks out +portions of the string if it matches. Two elements in the vector are set for +each substring: the offsets to the start and end of the substring. + +Arguments: + code points to the compiled expression + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + match_data points to a match_data block + mcontext points a PCRE2 context + +Returns: > 0 => success; value is the number of ovector pairs filled + = 0 => success, but ovector is not big enough + = -1 => failed to match (PCRE2_ERROR_NOMATCH) + = -2 => partial match (PCRE2_ERROR_PARTIAL) + < -2 => some kind of unexpected problem +*/ + +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, + PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext) +{ +int rc; +int was_zero_terminated = 0; +const uint8_t *start_bits = NULL; +const pcre2_real_code *re = (const pcre2_real_code *)code; + +BOOL anchored; +BOOL firstline; +BOOL has_first_cu = FALSE; +BOOL has_req_cu = FALSE; +BOOL startline; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +PCRE2_SPTR memchr_found_first_cu; +PCRE2_SPTR memchr_found_first_cu2; +#endif + +PCRE2_UCHAR first_cu = 0; +PCRE2_UCHAR first_cu2 = 0; +PCRE2_UCHAR req_cu = 0; +PCRE2_UCHAR req_cu2 = 0; + +PCRE2_SPTR bumpalong_limit; +PCRE2_SPTR end_subject; +PCRE2_SPTR true_end_subject; +PCRE2_SPTR start_match; +PCRE2_SPTR req_cu_ptr; +PCRE2_SPTR start_partial; +PCRE2_SPTR match_partial; + +#ifdef SUPPORT_JIT +BOOL use_jit; +#endif + +/* This flag is needed even when Unicode is not supported for convenience +(it is used by the IS_NEWLINE macro). */ + +BOOL utf = FALSE; + +#ifdef SUPPORT_UNICODE +BOOL ucp = FALSE; +BOOL allow_invalid; +uint32_t fragment_options = 0; +#ifdef SUPPORT_JIT +BOOL jit_checked_utf = FALSE; +#endif +#endif /* SUPPORT_UNICODE */ + +PCRE2_SIZE frame_size; +PCRE2_SIZE heapframes_size; + +/* We need to have mb as a pointer to a match block, because the IS_NEWLINE +macro is used below, and it expects NLBLOCK to be defined as a pointer. */ + +pcre2_callout_block cb; +match_block actual_match_block; +match_block *mb = &actual_match_block; + +/* Recognize NULL, length 0 as an empty string. */ + +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; +if (code == NULL || subject == NULL || match_data == NULL) + return PCRE2_ERROR_NULL; + +start_match = subject + start_offset; +req_cu_ptr = start_match - 1; +if (length == PCRE2_ZERO_TERMINATED) + { + length = PRIV(strlen)(subject); + was_zero_terminated = 1; + } +true_end_subject = end_subject = subject + length; + +if (start_offset > length) return PCRE2_ERROR_BADOFFSET; + +/* Check that the first field in the block is the magic number. */ + +if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; + +/* Check the code unit width. */ + +if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) + return PCRE2_ERROR_BADMODE; + +/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the +options variable for this function. Users of PCRE2 who are not calling the +function directly would like to have a way of setting these flags, in the same +way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with +constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and +(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now +transfer to the options for this function. The bits are guaranteed to be +adjacent, but do not have the same values. This bit of Boolean trickery assumes +that the match-time bits are not more significant than the flag bits. If by +accident this is not the case, a compile-time division by zero error will +occur. */ + +#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) +#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) +options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1))); +#undef FF +#undef OO + +/* If the pattern was successfully studied with JIT support, we will run the +JIT executable instead of the rest of this function. Most options must be set +at compile time for the JIT code to be usable. */ + +#ifdef SUPPORT_JIT +use_jit = (re->executable_jit != NULL && + (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0); +#endif + +/* Initialize UTF/UCP parameters. */ + +#ifdef SUPPORT_UNICODE +utf = (re->overall_options & PCRE2_UTF) != 0; +allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0; +ucp = (re->overall_options & PCRE2_UCP) != 0; +#endif /* SUPPORT_UNICODE */ + +/* Convert the partial matching flags into an integer. */ + +mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 : + ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0; + +/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same +time. */ + +if (mb->partial != 0 && + ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) + return PCRE2_ERROR_BADOPTION; + +/* It is an error to set an offset limit without setting the flag at compile +time. */ + +if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET && + (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) + return PCRE2_ERROR_BADOFFSETLIMIT; + +/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT, +free the memory that was obtained. Set the field to NULL for no match cases. */ + +if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) + { + match_data->memctl.free((void *)match_data->subject, + match_data->memctl.memory_data); + match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; + } +match_data->subject = NULL; + +/* Zero the error offset in case the first code unit is invalid UTF. */ + +match_data->startchar = 0; + + +/* ============================= JIT matching ============================== */ + +/* Prepare for JIT matching. Check a UTF string for validity unless no check is +requested or invalid UTF can be handled. We check only the portion of the +subject that might be be inspected during matching - from the offset minus the +maximum lookbehind to the given length. This saves time when a small part of a +large subject is being matched by the use of a starting offset. Note that the +maximum lookbehind is a number of characters, not code units. */ + +#ifdef SUPPORT_JIT +if (use_jit) + { +#ifdef SUPPORT_UNICODE + if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid) + { + + /* For 8-bit and 16-bit UTF, check that the first code unit is a valid + character start. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (start_match < end_subject && NOT_FIRSTCU(*start_match)) + { + if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET; +#if PCRE2_CODE_UNIT_WIDTH == 8 + return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ +#else + return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ +#endif + } +#endif /* WIDTH != 32 */ + + /* Move back by the maximum lookbehind, just in case it happens at the very + start of matching. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + for (unsigned int i = re->max_lookbehind; i > 0 && start_match > subject; i--) + { + start_match--; + while (start_match > subject && +#if PCRE2_CODE_UNIT_WIDTH == 8 + (*start_match & 0xc0) == 0x80) +#else /* 16-bit */ + (*start_match & 0xfc00) == 0xdc00) +#endif + start_match--; + } +#else /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* In the 32-bit library, one code unit equals one character. However, + we cannot just subtract the lookbehind and then compare pointers, because + a very large lookbehind could create an invalid pointer. */ + + if (start_offset >= re->max_lookbehind) + start_match -= re->max_lookbehind; + else + start_match = subject; +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* Validate the relevant portion of the subject. Adjust the offset of an + invalid code point to be an absolute offset in the whole string. */ + + match_data->rc = PRIV(valid_utf)(start_match, + length - (start_match - subject), &(match_data->startchar)); + if (match_data->rc != 0) + { + match_data->startchar += start_match - subject; + return match_data->rc; + } + jit_checked_utf = TRUE; + } +#endif /* SUPPORT_UNICODE */ + + /* If JIT returns BADOPTION, which means that the selected complete or + partial matching mode was not compiled, fall through to the interpreter. */ + + rc = pcre2_jit_match(code, subject, length, start_offset, options, + match_data, mcontext); + if (rc != PCRE2_ERROR_JIT_BADOPTION) + { + match_data->subject_length = length; + if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0) + { + length = CU2BYTES(length + was_zero_terminated); + match_data->subject = match_data->memctl.malloc(length, + match_data->memctl.memory_data); + if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy((void *)match_data->subject, subject, length); + match_data->flags |= PCRE2_MD_COPIED_SUBJECT; + } + return rc; + } + } +#endif /* SUPPORT_JIT */ + +/* ========================= End of JIT matching ========================== */ + + +/* Proceed with non-JIT matching. The default is to allow lookbehinds to the +start of the subject. A UTF check when there is a non-zero offset may change +this. */ + +mb->check_subject = subject; + +/* If a UTF subject string was not checked for validity in the JIT code above, +check it here, and handle support for invalid UTF strings. The check above +happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset. +If we get here in those circumstances, it means the subject string is valid, +but for some reason JIT matching was not successful. There is no need to check +the subject again. + +We check only the portion of the subject that might be be inspected during +matching - from the offset minus the maximum lookbehind to the given length. +This saves time when a small part of a large subject is being matched by the +use of a starting offset. Note that the maximum lookbehind is a number of +characters, not code units. + +Note also that support for invalid UTF forces a check, overriding the setting +of PCRE2_NO_CHECK_UTF. */ + +#ifdef SUPPORT_UNICODE +if (utf && +#ifdef SUPPORT_JIT + !jit_checked_utf && +#endif + ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid)) + { +#if PCRE2_CODE_UNIT_WIDTH != 32 + BOOL skipped_bad_start = FALSE; +#endif + + /* For 8-bit and 16-bit UTF, check that the first code unit is a valid + character start. If we are handling invalid UTF, just skip over such code + units. Otherwise, give an appropriate error. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (allow_invalid) + { + while (start_match < end_subject && NOT_FIRSTCU(*start_match)) + { + start_match++; + skipped_bad_start = TRUE; + } + } + else if (start_match < end_subject && NOT_FIRSTCU(*start_match)) + { + if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET; +#if PCRE2_CODE_UNIT_WIDTH == 8 + return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ +#else + return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ +#endif + } +#endif /* WIDTH != 32 */ + + /* The mb->check_subject field points to the start of UTF checking; + lookbehinds can go back no further than this. */ + + mb->check_subject = start_match; + + /* Move back by the maximum lookbehind, just in case it happens at the very + start of matching, but don't do this if we skipped bad 8-bit or 16-bit code + units above. */ + +#if PCRE2_CODE_UNIT_WIDTH != 32 + if (!skipped_bad_start) + { + unsigned int i; + for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--) + { + mb->check_subject--; + while (mb->check_subject > subject && +#if PCRE2_CODE_UNIT_WIDTH == 8 + (*mb->check_subject & 0xc0) == 0x80) +#else /* 16-bit */ + (*mb->check_subject & 0xfc00) == 0xdc00) +#endif + mb->check_subject--; + } + } +#else /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* In the 32-bit library, one code unit equals one character. However, + we cannot just subtract the lookbehind and then compare pointers, because + a very large lookbehind could create an invalid pointer. */ + + if (start_offset >= re->max_lookbehind) + mb->check_subject -= re->max_lookbehind; + else + mb->check_subject = subject; +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + + /* Validate the relevant portion of the subject. There's a loop in case we + encounter bad UTF in the characters preceding start_match which we are + scanning because of a lookbehind. */ + + for (;;) + { + match_data->rc = PRIV(valid_utf)(mb->check_subject, + length - (mb->check_subject - subject), &(match_data->startchar)); + + if (match_data->rc == 0) break; /* Valid UTF string */ + + /* Invalid UTF string. Adjust the offset to be an absolute offset in the + whole string. If we are handling invalid UTF strings, set end_subject to + stop before the bad code unit, and set the options to "not end of line". + Otherwise return the error. */ + + match_data->startchar += mb->check_subject - subject; + if (!allow_invalid || match_data->rc > 0) return match_data->rc; + end_subject = subject + match_data->startchar; + + /* If the end precedes start_match, it means there is invalid UTF in the + extra code units we reversed over because of a lookbehind. Advance past the + first bad code unit, and then skip invalid character starting code units in + 8-bit and 16-bit modes, and try again with the original end point. */ + + if (end_subject < start_match) + { + mb->check_subject = end_subject + 1; +#if PCRE2_CODE_UNIT_WIDTH != 32 + while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject)) + mb->check_subject++; +#endif + end_subject = true_end_subject; + } + + /* Otherwise, set the not end of line option, and do the match. */ + + else + { + fragment_options = PCRE2_NOTEOL; + break; + } + } + } +#endif /* SUPPORT_UNICODE */ + +/* A NULL match context means "use a default context", but we take the memory +control functions from the pattern. */ + +if (mcontext == NULL) + { + mcontext = (pcre2_match_context *)(&PRIV(default_match_context)); + mb->memctl = re->memctl; + } +else mb->memctl = mcontext->memctl; + +anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0; +firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0; +startline = (re->flags & PCRE2_STARTLINE) != 0; +bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)? + true_end_subject : subject + mcontext->offset_limit; + +/* Initialize and set up the fixed fields in the callout block, with a pointer +in the match block. */ + +mb->cb = &cb; +cb.version = 2; +cb.subject = subject; +cb.subject_length = (PCRE2_SIZE)(end_subject - subject); +cb.callout_flags = 0; + +/* Fill in the remaining fields in the match block, except for moptions, which +gets set later. */ + +mb->callout = mcontext->callout; +mb->callout_data = mcontext->callout_data; + +mb->start_subject = subject; +mb->start_offset = start_offset; +mb->end_subject = end_subject; +mb->true_end_subject = true_end_subject; +mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0; +mb->allowemptypartial = (re->max_lookbehind > 0) || + (re->flags & PCRE2_MATCH_EMPTY) != 0; +mb->poptions = re->overall_options; /* Pattern options */ +mb->ignore_skip_arg = 0; +mb->mark = mb->nomatch_mark = NULL; /* In case never set */ + +/* The name table is needed for finding all the numbers associated with a +given name, for condition testing. The code follows the name table. */ + +mb->name_table = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)); +mb->name_count = re->name_count; +mb->name_entry_size = re->name_entry_size; +mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start); + +/* Process the \R and newline settings. */ + +mb->bsr_convention = re->bsr_convention; +mb->nltype = NLTYPE_FIXED; +switch(re->newline_convention) + { + case PCRE2_NEWLINE_CR: + mb->nllen = 1; + mb->nl[0] = CHAR_CR; + break; + + case PCRE2_NEWLINE_LF: + mb->nllen = 1; + mb->nl[0] = CHAR_NL; + break; + + case PCRE2_NEWLINE_NUL: + mb->nllen = 1; + mb->nl[0] = CHAR_NUL; + break; + + case PCRE2_NEWLINE_CRLF: + mb->nllen = 2; + mb->nl[0] = CHAR_CR; + mb->nl[1] = CHAR_NL; + break; + + case PCRE2_NEWLINE_ANY: + mb->nltype = NLTYPE_ANY; + break; + + case PCRE2_NEWLINE_ANYCRLF: + mb->nltype = NLTYPE_ANYCRLF; + break; + + default: + PCRE2_DEBUG_UNREACHABLE(); + return PCRE2_ERROR_INTERNAL; + } + +/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE +vector at the end, whose size depends on the number of capturing parentheses in +the pattern. It is not used at all if there are no capturing parentheses. + + frame_size is the total size of each frame + match_data->heapframes is the pointer to the frames vector + match_data->heapframes_size is the allocated size of the vector + +We must pad the frame_size for alignment to ensure subsequent frames are as +aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE +array, that does not guarantee it is suitably aligned for pointers, as some +architectures have pointers that are larger than a size_t. */ + +frame_size = (offsetof(heapframe, ovector) + + re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) & + ~(HEAPFRAME_ALIGNMENT - 1); + +/* Limits set in the pattern override the match context only if they are +smaller. */ + +mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)? + mcontext->heap_limit : re->limit_heap); + +mb->match_limit = (mcontext->match_limit < re->limit_match)? + mcontext->match_limit : re->limit_match; + +mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)? + mcontext->depth_limit : re->limit_depth; + +/* If a pattern has very many capturing parentheses, the frame size may be very +large. Set the initial frame vector size to ensure that there are at least 10 +available frames, but enforce a minimum of START_FRAMES_SIZE. If this is +greater than the heap limit, get as large a vector as possible. */ + +heapframes_size = frame_size * 10; +if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE; +if (heapframes_size / 1024 > mb->heap_limit) + { + PCRE2_SIZE max_size = 1024 * mb->heap_limit; + if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT; + heapframes_size = max_size; + } + +/* If an existing frame vector in the match_data block is large enough, we can +use it. Otherwise, free any pre-existing vector and get a new one. */ + +if (match_data->heapframes_size < heapframes_size) + { + match_data->memctl.free(match_data->heapframes, + match_data->memctl.memory_data); + match_data->heapframes = match_data->memctl.malloc(heapframes_size, + match_data->memctl.memory_data); + if (match_data->heapframes == NULL) + { + match_data->heapframes_size = 0; + return PCRE2_ERROR_NOMEMORY; + } + match_data->heapframes_size = heapframes_size; + } + +/* Write to the ovector within the first frame to mark every capture unset and +to avoid uninitialized memory read errors when it is copied to a new frame. */ + +memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff, + frame_size - offsetof(heapframe, ovector)); + +/* Pointers to the individual character tables */ + +mb->lcc = re->tables + lcc_offset; +mb->fcc = re->tables + fcc_offset; +mb->ctypes = re->tables + ctypes_offset; + +/* Set up the first code unit to match, if available. If there's no first code +unit there may be a bitmap of possible first characters. */ + +if ((re->flags & PCRE2_FIRSTSET) != 0) + { + has_first_cu = TRUE; + first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit); + if ((re->flags & PCRE2_FIRSTCASELESS) != 0) + { + first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu); +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu); +#else + if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu); +#endif +#endif /* SUPPORT_UNICODE */ + } + } +else + if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0) + start_bits = re->start_bitmap; + +/* There may also be a "last known required character" set. */ + +if ((re->flags & PCRE2_LASTSET) != 0) + { + has_req_cu = TRUE; + req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit); + if ((re->flags & PCRE2_LASTCASELESS) != 0) + { + req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu); +#ifdef SUPPORT_UNICODE +#if PCRE2_CODE_UNIT_WIDTH == 8 + if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu); +#else + if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu); +#endif +#endif /* SUPPORT_UNICODE */ + } + } + + +/* ==========================================================================*/ + +/* Loop for handling unanchored repeated matching attempts; for anchored regexs +the loop runs just once. */ + +#ifdef SUPPORT_UNICODE +FRAGMENT_RESTART: +#endif + +start_partial = match_partial = NULL; +mb->hitend = FALSE; + +#if PCRE2_CODE_UNIT_WIDTH == 8 +memchr_found_first_cu = NULL; +memchr_found_first_cu2 = NULL; +#endif + +for(;;) + { + PCRE2_SPTR new_start_match; + + /* ----------------- Start of match optimizations ---------------- */ + + /* There are some optimizations that avoid running the match if a known + starting point is not found, or if a known later code unit is not present. + However, there is an option (settable at compile time) that disables these, + for testing and for ensuring that all callouts do actually occur. */ + + if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0) + { + /* If firstline is TRUE, the start of the match is constrained to the first + line of a multiline string. That is, the match must be before or at the + first newline following the start of matching. Temporarily adjust + end_subject so that we stop the scans for a first code unit at a newline. + If the match fails at the newline, later code breaks the loop. */ + + if (firstline) + { + PCRE2_SPTR t = start_match; +#ifdef SUPPORT_UNICODE + if (utf) + { + while (t < end_subject && !IS_NEWLINE(t)) + { + t++; + ACROSSCHAR(t < end_subject, t, t++); + } + } + else +#endif + while (t < end_subject && !IS_NEWLINE(t)) t++; + end_subject = t; + } + + /* Anchored: check the first code unit if one is recorded. This may seem + pointless but it can help in detecting a no match case without scanning for + the required code unit. */ + + if (anchored) + { + if (has_first_cu || start_bits != NULL) + { + BOOL ok = start_match < end_subject; + if (ok) + { + PCRE2_UCHAR c = UCHAR21TEST(start_match); + ok = has_first_cu && (c == first_cu || c == first_cu2); + if (!ok && start_bits != NULL) + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) c = 255; +#endif + ok = (start_bits[c/8] & (1u << (c&7))) != 0; + } + } + if (!ok) + { + rc = MATCH_NOMATCH; + break; + } + } + } + + /* Not anchored. Advance to a unique first code unit if there is one. */ + + else + { + if (has_first_cu) + { + if (first_cu != first_cu2) /* Caseless */ + { + /* In 16-bit and 32_bit modes we have to do our own search, so can + look for both cases at once. */ + +#if PCRE2_CODE_UNIT_WIDTH != 8 + PCRE2_UCHAR smc; + while (start_match < end_subject && + (smc = UCHAR21TEST(start_match)) != first_cu && + smc != first_cu2) + start_match++; +#else + /* In 8-bit mode, the use of memchr() gives a big speed up, even + though we have to call it twice in order to find the earliest + occurrence of the code unit in either of its cases. Caching is used + to remember the positions of previously found code units. This can + make a huge difference when the strings are very long and only one + case is actually present. */ + + PCRE2_SPTR pp1 = NULL; + PCRE2_SPTR pp2 = NULL; + PCRE2_SIZE searchlength = end_subject - start_match; + + /* If we haven't got a previously found position for first_cu, or if + the current starting position is later, we need to do a search. If + the code unit is not found, set it to the end. */ + + if (memchr_found_first_cu == NULL || + start_match > memchr_found_first_cu) + { + pp1 = memchr(start_match, first_cu, searchlength); + memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1; + } + + /* If the start is before a previously found position, use the + previous position, or NULL if a previous search failed. */ + + else pp1 = (memchr_found_first_cu == end_subject)? NULL : + memchr_found_first_cu; + + /* Do the same thing for the other case. */ + + if (memchr_found_first_cu2 == NULL || + start_match > memchr_found_first_cu2) + { + pp2 = memchr(start_match, first_cu2, searchlength); + memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2; + } + + else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : + memchr_found_first_cu2; + + /* Set the start to the end of the subject if neither case was found. + Otherwise, use the earlier found point. */ + + if (pp1 == NULL) + start_match = (pp2 == NULL)? end_subject : pp2; + else + start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; + +#endif /* 8-bit handling */ + } + + /* The caseful case is much simpler. */ + + else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (start_match < end_subject && UCHAR21TEST(start_match) != + first_cu) + start_match++; +#else + start_match = memchr(start_match, first_cu, end_subject - start_match); + if (start_match == NULL) start_match = end_subject; +#endif + } + + /* If we can't find the required first code unit, having reached the + true end of the subject, break the bumpalong loop, to force a match + failure, except when doing partial matching, when we let the next cycle + run at the end of the subject. To see why, consider the pattern + /(?<=abc)def/, which partially matches "abc", even though the string + does not contain the starting character "d". If we have not reached the + true end of the subject (PCRE2_FIRSTLINE caused end_subject to be + temporarily modified) we also let the cycle run, because the matching + string is legitimately allowed to start with the first code unit of a + newline. */ + + if (mb->partial == 0 && start_match >= mb->end_subject) + { + rc = MATCH_NOMATCH; + break; + } + } + + /* If there's no first code unit, advance to just after a linebreak for a + multiline match if required. */ + + else if (startline) + { + if (start_match > mb->start_subject + start_offset) + { +#ifdef SUPPORT_UNICODE + if (utf) + { + while (start_match < end_subject && !WAS_NEWLINE(start_match)) + { + start_match++; + ACROSSCHAR(start_match < end_subject, start_match, start_match++); + } + } + else +#endif + while (start_match < end_subject && !WAS_NEWLINE(start_match)) + start_match++; + + /* If we have just passed a CR and the newline option is ANY or + ANYCRLF, and we are now at a LF, advance the match position by one + more code unit. */ + + if (start_match[-1] == CHAR_CR && + (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) && + start_match < end_subject && + UCHAR21TEST(start_match) == CHAR_NL) + start_match++; + } + } + + /* If there's no first code unit or a requirement for a multiline line + start, advance to a non-unique first code unit if any have been + identified. The bitmap contains only 256 bits. When code units are 16 or + 32 bits wide, all code units greater than 254 set the 255 bit. */ + + else if (start_bits != NULL) + { + while (start_match < end_subject) + { + uint32_t c = UCHAR21TEST(start_match); +#if PCRE2_CODE_UNIT_WIDTH != 8 + if (c > 255) c = 255; +#endif + if ((start_bits[c/8] & (1u << (c&7))) != 0) break; + start_match++; + } + + /* See comment above in first_cu checking about the next few lines. */ + + if (mb->partial == 0 && start_match >= mb->end_subject) + { + rc = MATCH_NOMATCH; + break; + } + } + } /* End first code unit handling */ + + /* Restore fudged end_subject */ + + end_subject = mb->end_subject; + + /* The following two optimizations must be disabled for partial matching. */ + + if (mb->partial == 0) + { + PCRE2_SPTR p; + + /* The minimum matching length is a lower bound; no string of that length + may actually match the pattern. Although the value is, strictly, in + characters, we treat it as code units to avoid spending too much time in + this optimization. */ + + if (end_subject - start_match < re->minlength) + { + rc = MATCH_NOMATCH; + break; + } + + /* If req_cu is set, we know that that code unit must appear in the + subject for the (non-partial) match to succeed. If the first code unit is + set, req_cu must be later in the subject; otherwise the test starts at + the match point. This optimization can save a huge amount of backtracking + in patterns with nested unlimited repeats that aren't going to match. + Writing separate code for caseful/caseless versions makes it go faster, + as does using an autoincrement and backing off on a match. As in the case + of the first code unit, using memchr() in the 8-bit library gives a big + speed up. Unlike the first_cu check above, we do not need to call + memchr() twice in the caseless case because we only need to check for the + presence of the character in either case, not find the first occurrence. + + The search can be skipped if the code unit was found later than the + current starting point in a previous iteration of the bumpalong loop. + + HOWEVER: when the subject string is very, very long, searching to its end + can take a long time, and give bad performance on quite ordinary + anchored patterns. This showed up when somebody was matching something + like /^\d+C/ on a 32-megabyte string... so we don't do this when the + string is sufficiently long, but it's worth searching a lot more for + unanchored patterns. */ + + p = start_match + (has_first_cu? 1:0); + if (has_req_cu && p > req_cu_ptr) + { + PCRE2_SIZE check_length = end_subject - start_match; + + if (check_length < REQ_CU_MAX || + (!anchored && check_length < REQ_CU_MAX * 1000)) + { + if (req_cu != req_cu2) /* Caseless */ + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (p < end_subject) + { + uint32_t pp = UCHAR21INCTEST(p); + if (pp == req_cu || pp == req_cu2) { p--; break; } + } +#else /* 8-bit code units */ + PCRE2_SPTR pp = p; + p = memchr(pp, req_cu, end_subject - pp); + if (p == NULL) + { + p = memchr(pp, req_cu2, end_subject - pp); + if (p == NULL) p = end_subject; + } +#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */ + } + + /* The caseful case */ + + else + { +#if PCRE2_CODE_UNIT_WIDTH != 8 + while (p < end_subject) + { + if (UCHAR21INCTEST(p) == req_cu) { p--; break; } + } + +#else /* 8-bit code units */ + p = memchr(p, req_cu, end_subject - p); + if (p == NULL) p = end_subject; +#endif + } + + /* If we can't find the required code unit, break the bumpalong loop, + forcing a match failure. */ + + if (p >= end_subject) + { + rc = MATCH_NOMATCH; + break; + } + + /* If we have found the required code unit, save the point where we + found it, so that we don't search again next time round the bumpalong + loop if the start hasn't yet passed this code unit. */ + + req_cu_ptr = p; + } + } + } + } + + /* ------------ End of start of match optimizations ------------ */ + + /* Give no match if we have passed the bumpalong limit. */ + + if (start_match > bumpalong_limit) + { + rc = MATCH_NOMATCH; + break; + } + + /* OK, we can now run the match. If "hitend" is set afterwards, remember the + first starting point for which a partial match was found. */ + + cb.start_match = (PCRE2_SIZE)(start_match - subject); + cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH; + + mb->start_used_ptr = start_match; + mb->last_used_ptr = start_match; +#ifdef SUPPORT_UNICODE + mb->moptions = options | fragment_options; +#else + mb->moptions = options; +#endif + mb->match_call_count = 0; + mb->end_offset_top = 0; + mb->skip_arg_count = 0; + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Calling match()\n"); +#endif + + rc = match(start_match, mb->start_code, re->top_bracket, frame_size, + match_data, mb); + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ match() returned %d\n\n", rc); +#endif + + if (mb->hitend && start_partial == NULL) + { + start_partial = mb->start_used_ptr; + match_partial = start_match; + } + + switch(rc) + { + /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched + the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP + entirely. The only way we can do that is to re-do the match at the same + point, with a flag to force SKIP with an argument to be ignored. Just + treating this case as NOMATCH does not work because it does not check other + alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */ + + case MATCH_SKIP_ARG: + new_start_match = start_match; + mb->ignore_skip_arg = mb->skip_arg_count; + break; + + /* SKIP passes back the next starting point explicitly, but if it is no + greater than the match we have just done, treat it as NOMATCH. */ + + case MATCH_SKIP: + if (mb->verb_skip_ptr > start_match) + { + new_start_match = mb->verb_skip_ptr; + break; + } + /* Fall through */ + + /* NOMATCH and PRUNE advance by one character. THEN at this level acts + exactly like PRUNE. Unset ignore SKIP-with-argument. */ + + case MATCH_NOMATCH: + case MATCH_PRUNE: + case MATCH_THEN: + mb->ignore_skip_arg = 0; + new_start_match = start_match + 1; +#ifdef SUPPORT_UNICODE + if (utf) + ACROSSCHAR(new_start_match < end_subject, new_start_match, + new_start_match++); +#endif + break; + + /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */ + + case MATCH_COMMIT: + rc = MATCH_NOMATCH; + goto ENDLOOP; + + /* Any other return is either a match, or some kind of error. */ + + default: + goto ENDLOOP; + } + + /* Control reaches here for the various types of "no match at this point" + result. Reset the code to MATCH_NOMATCH for subsequent checking. */ + + rc = MATCH_NOMATCH; + + /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first + newline in the subject (though it may continue over the newline). Therefore, + if we have just failed to match, starting at a newline, do not continue. */ + + if (firstline && IS_NEWLINE(start_match)) break; + + /* Advance to new matching position */ + + start_match = new_start_match; + + /* Break the loop if the pattern is anchored or if we have passed the end of + the subject. */ + + if (anchored || start_match > end_subject) break; + + /* If we have just passed a CR and we are now at a LF, and the pattern does + not contain any explicit matches for \r or \n, and the newline option is CRLF + or ANY or ANYCRLF, advance the match position by one more code unit. In + normal matching start_match will aways be greater than the first position at + this stage, but a failed *SKIP can cause a return at the same point, which is + why the first test exists. */ + + if (start_match > subject + start_offset && + start_match[-1] == CHAR_CR && + start_match < end_subject && + *start_match == CHAR_NL && + (re->flags & PCRE2_HASCRORLF) == 0 && + (mb->nltype == NLTYPE_ANY || + mb->nltype == NLTYPE_ANYCRLF || + mb->nllen == 2)) + start_match++; + + mb->mark = NULL; /* Reset for start of next match attempt */ + } /* End of for(;;) "bumpalong" loop */ + +/* ==========================================================================*/ + +/* When we reach here, one of the following stopping conditions is true: + +(1) The match succeeded, either completely, or partially; + +(2) The pattern is anchored or the match was failed after (*COMMIT); + +(3) We are past the end of the subject or the bumpalong limit; + +(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because + this option requests that a match occur at or before the first newline in + the subject. + +(5) Some kind of error occurred. + +*/ + +ENDLOOP: + +/* If end_subject != true_end_subject, it means we are handling invalid UTF, +and have just processed a non-terminal fragment. If this resulted in no match +or a partial match we must carry on to the next fragment (a partial match is +returned to the caller only at the very end of the subject). A loop is used to +avoid trying to match against empty fragments; if the pattern can match an +empty string it would have done so already. */ + +#ifdef SUPPORT_UNICODE +if (utf && end_subject != true_end_subject && + (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL)) + { + for (;;) + { + /* Advance past the first bad code unit, and then skip invalid character + starting code units in 8-bit and 16-bit modes. */ + + start_match = end_subject + 1; + +#if PCRE2_CODE_UNIT_WIDTH != 32 + while (start_match < true_end_subject && NOT_FIRSTCU(*start_match)) + start_match++; +#endif + + /* If we have hit the end of the subject, there isn't another non-empty + fragment, so give up. */ + + if (start_match >= true_end_subject) + { + rc = MATCH_NOMATCH; /* In case it was partial */ + match_partial = NULL; + break; + } + + /* Check the rest of the subject */ + + mb->check_subject = start_match; + rc = PRIV(valid_utf)(start_match, length - (start_match - subject), + &(match_data->startchar)); + + /* The rest of the subject is valid UTF. */ + + if (rc == 0) + { + mb->end_subject = end_subject = true_end_subject; + fragment_options = PCRE2_NOTBOL; + goto FRAGMENT_RESTART; + } + + /* A subsequent UTF error has been found; if the next fragment is + non-empty, set up to process it. Otherwise, let the loop advance. */ + + else if (rc < 0) + { + mb->end_subject = end_subject = start_match + match_data->startchar; + if (end_subject > start_match) + { + fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL; + goto FRAGMENT_RESTART; + } + } + } + } +#endif /* SUPPORT_UNICODE */ + +/* Fill in fields that are always returned in the match data. */ + +match_data->code = re; +match_data->mark = mb->mark; +match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER; + +/* Handle a fully successful match. Set the return code to the number of +captured strings, or 0 if there were too many to fit into the ovector, and then +set the remaining returned values before returning. Make a copy of the subject +string if requested. */ + +if (rc == MATCH_MATCH) + { + match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)? + 0 : (int)mb->end_offset_top/2 + 1; + match_data->subject_length = length; + match_data->startchar = start_match - subject; + match_data->leftchar = mb->start_used_ptr - subject; + match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)? + mb->last_used_ptr : mb->end_match_ptr) - subject; + if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0) + { + length = CU2BYTES(length + was_zero_terminated); + match_data->subject = match_data->memctl.malloc(length, + match_data->memctl.memory_data); + if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; + memcpy((void *)match_data->subject, subject, length); + match_data->flags |= PCRE2_MD_COPIED_SUBJECT; + } + else match_data->subject = subject; + + return match_data->rc; + } + +/* Control gets here if there has been a partial match, an error, or if the +overall match attempt has failed at all permitted starting positions. Any mark +data is in the nomatch_mark field. */ + +match_data->mark = mb->nomatch_mark; + +/* For anything other than nomatch or partial match, just return the code. */ + +if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc; + +/* Handle a partial match. If a "soft" partial match was requested, searching +for a complete match will have continued, and the value of rc at this point +will be MATCH_NOMATCH. For a "hard" partial match, it will already be +PCRE2_ERROR_PARTIAL. */ + +else if (match_partial != NULL) + { + match_data->subject = subject; + match_data->subject_length = length; + match_data->ovector[0] = match_partial - subject; + match_data->ovector[1] = end_subject - subject; + match_data->startchar = match_partial - subject; + match_data->leftchar = start_partial - subject; + match_data->rightchar = end_subject - subject; + match_data->rc = PCRE2_ERROR_PARTIAL; + } + +/* Else this is the classic nomatch case. */ + +else match_data->rc = PCRE2_ERROR_NOMATCH; + +return match_data->rc; +} + +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + +/* End of pcre2_match.c */ --- pcre2-10.45.orig/.pc/CVE-2025-58050.patch/testdata/testinput2 +++ pcre2-10.45/.pc/CVE-2025-58050.patch/testdata/testinput2 @@ -0,0 +1,7771 @@ +# This set of tests is not Perl-compatible. It checks on special features +# of PCRE2's API, error diagnostics, and the compiled code of some patterns. +# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, +# Oniguruma). There are also some tests where PCRE2 and Perl differ, +# either because PCRE2 can't be compatible, or there is a possible Perl +# bug. + +# NOTE: This is a non-UTF set of tests. When UTF support is needed, use +# test 5. + +#forbid_utf +#newline_default lf any anycrlf + +# Test binary zeroes in the pattern + +# /a\0B/ where 0 is a binary zero +/61 5c 00 62/B,hex + a\x{0}b + +# /a0b/ where 0 is a binary zero +/61 00 62/B,hex + a\x{0}b + +# /(?#B0C)DE/ where 0 is a binary zero +/28 3f 23 42 00 43 29 44 45/B,hex + DE + +/(a)b|/I + +/abc/I + abc + defabc + abc\=anchored +\= Expect no match + defabc\=anchored + ABC + +/^abc/I + abc + abc\=anchored +\= Expect no match + defabc + defabc\=anchored + +/a+bc/I + +/a*bc/I + +/a{3}bc/I + +/(abc|a+z)/I + +/^abc$/I + abc +\= Expect no match + def\nabc + +/ab\idef/ + +/(?X)ab\idef/ + +/x{5,4}/ + +/z{65536}/ + +/[abcd/ + +/[\B]/B + +/[\R]/B + +/[\X]/B + +/[z-a]/ + +/^*/ + +/(abc/ + +/(?# abc/ + +/(?z)abc/ + +/.*b/I + +/.*?b/I + +/cat|dog|elephant/I + this sentence eventually mentions a cat + this sentences rambles on and on for a while and then reaches elephant + +/cat|dog|elephant/I + this sentence eventually mentions a cat + this sentences rambles on and on for a while and then reaches elephant + +/cat|dog|elephant/Ii + this sentence eventually mentions a CAT cat + this sentences rambles on and on for a while to elephant ElePhant + +/a|[bcd]/I + +/(a|[^\dZ])/I + +/(a|b)*[\s]/I + +/(ab\2)/ + +/{4,5}abc/ + +/(a)(b)(c)\2/I + abcb + abcb\=ovector=0 + abcb\=ovector=1 + abcb\=ovector=2 + abcb\=ovector=3 + abcb\=ovector=4 + +/(a)bc|(a)(b)\2/I + abc + abc\=ovector=0 + abc\=ovector=1 + abc\=ovector=2 + aba + aba\=ovector=0 + aba\=ovector=1 + aba\=ovector=2 + aba\=ovector=3 + aba\=ovector=4 + +/abc$/I,dollar_endonly + abc +\= Expect no match + abc\n + abc\ndef + +/(a)(b)(c)(d)(e)\6/ + +/the quick brown fox/I + the quick brown fox + this is a line with the quick brown fox + +/the quick brown fox/I,anchored + the quick brown fox +\= Expect no match + this is a line with the quick brown fox + +/ab(?z)cd/ + +/^abc|def/I + abcdef + abcdef\=notbol + +/.*((abc)$|(def))/I + defabc + defabc\=noteol + +/)/ + +/a[]b/ + +/[^aeiou ]{3,}/I + co-processors, and for + +/<.*>/I + abcghinop + +/<.*?>/I + abcghinop + +/<.*>/I,ungreedy + abcghinop + +/(?U)<.*>/I + abcghinop + +/<.*?>/I,ungreedy + abcghinop + +/={3,}/I,ungreedy + abc========def + +/(?U)={3,}?/I + abc========def + +/(?^abc)/Im + abc + def\nabc +\= Expect no match + defabc + +/(?<=ab(c+)d)ef/ + +/(?<=ab(?<=c+)d)ef/ + +/The next three are in testinput2 because they have variable length branches/ + +/(?<=bullock|donkey)-cart/I + the bullock-cart + a donkey-cart race +\= Expect no match + cart + horse-and-cart + +/(?<=ab(?i)x|y|z)/I + +/(?>.*)(?<=(abcd)|(xyz))/I + alphabetabcd + endingxyz + +/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I + abxyZZ + abXyZZ + ZZZ + zZZ + bZZ + BZZ +\= Expect no match + ZZ + abXYZZ + zzz + bzz + +/(?[^()]+) # Either a sequence of non-brackets (no backtracking) + | # Or + (?R) # Recurse - i.e. nested bracketed string + )* # Zero or more contents + \) # Closing ) + /Ix + (abcd) + (abcd)xyz + xyz(abcd) + (ab(xy)cd)pqr + (ab(xycd)pqr + () abc () + 12(abcde(fsh)xyz(foo(bar))lmno)89 +\= Expect no match + abcd + abcd) + (abcd + +/\( ( (?>[^()]+) | (?R) )* \) /Igx + (ab(xy)cd)pqr + 1(abcd)(x(y)z)pqr + +/\( (?: (?>[^()]+) | (?R) ) \) /Ix + (abcd) + (ab(xy)cd) + (a(b(c)d)e) + ((ab)) +\= Expect no match + () + +/\( (?: (?>[^()]+) | (?R) )? \) /Ix + () + 12(abcde(fsh)xyz(foo(bar))lmno)89 + +/\( ( (?>[^()]+) | (?R) )* \) /Ix + (ab(xy)cd) + +/\( ( ( (?>[^()]+) | (?R) )* ) \) /Ix + (ab(xy)cd) + +/\( (123)? ( ( (?>[^()]+) | (?R) )* ) \) /Ix + (ab(xy)cd) + (123ab(xy)cd) + +/\( ( (123)? ( (?>[^()]+) | (?R) )* ) \) /Ix + (ab(xy)cd) + (123ab(xy)cd) + +/\( (((((((((( ( (?>[^()]+) | (?R) )* )))))))))) \) /Ix + (ab(xy)cd) + +/\( ( ( (?>[^()<>]+) | ((?>[^()]+)) | (?R) )* ) \) /Ix + (abcd(xyz

qrs)123) + +/\( ( ( (?>[^()]+) | ((?R)) )* ) \) /Ix + (ab(cd)ef) + (ab(cd(ef)gh)ij) + +/^[[:alnum:]]/IB + +/^[[:^alnum:]]/IB + +/^[[:alpha:]]/IB + +/^[[:^alpha:]]/IB + +/[_[:alpha:]]/I + +/^[[:ascii:]]/IB + +/^[[:^ascii:]]/IB + +/^[[:blank:]]/IB + +/^[[:^blank:]]/IB + +/[\n\x0b\x0c\x0d[:blank:]]/I + +/^[[:cntrl:]]/IB + +/^[[:digit:]]/IB + +/^[[:graph:]]/IB + +/^[[:lower:]]/IB + +/^[[:print:]]/IB + +/^[[:punct:]]/IB + +/^[[:space:]]/IB + +/^[[:upper:]]/IB + +/^[[:xdigit:]]/IB + +/^[[:word:]]/IB + +/^[[:^cntrl:]]/IB + +/^[12[:^digit:]]/IB + +/^[[:^blank:]]/IB + +/[01[:alpha:]%]/IB + +/[[.ch.]]/I + +/[[=ch=]]/I + +/[[:rhubarb:]]/I + +/[[:upper:]]/Ii + A + a + +/[[:lower:]]/Ii + A + a + +/((?-i)[[:lower:]])[[:lower:]]/Ii + ab + aB +\= Expect no match + Ab + AB + +/[\200-\110]/I + +/^(?(0)f|b)oo/I + +# This one's here because of the large output vector needed + +/(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/I + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC\=ovector=300 + +# This one's here because Perl does this differently and PCRE2 can't at present + +/(main(O)?)+/I + mainmain + mainOmain + +# These are all cases where Perl does it differently (nested captures) + +/^(a(b)?)+$/I + aba + +/^(aa(bb)?)+$/I + aabbaa + +/^(aa|aa(bb))+$/I + aabbaa + +/^(aa(bb)??)+$/I + aabbaa + +/^(?:aa(bb)?)+$/I + aabbaa + +/^(aa(b(b))?)+$/I + aabbaa + +/^(?:aa(b(b))?)+$/I + aabbaa + +/^(?:aa(b(?:b))?)+$/I + aabbaa + +/^(?:aa(bb(?:b))?)+$/I + aabbbaa + +/^(?:aa(b(?:bb))?)+$/I + aabbbaa + +/^(?:aa(?:b(b))?)+$/I + aabbaa + +/^(?:aa(?:b(bb))?)+$/I + aabbbaa + +/^(aa(b(bb))?)+$/I + aabbbaa + +/^(aa(bb(bb))?)+$/I + aabbbbaa + +# ---------------- + +/#/IBx + +/a#/IBx + +/[\s]/IB + +/[\S]/IB + +/a(?i)b/IB + ab + aB +\= Expect no match + AB + +/(a(?i)b)/IB + ab + aB +\= Expect no match + AB + +/ (?i)abc/IBx + +/#this is a comment + (?i)abc/IBx + +/123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB + +/\Q123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB + +/\Q\E/IB + \ + +/\Q\Ex/IB + +/ \Q\E/IB + +/a\Q\E/IB + abc + bca + bac + +/a\Q\Eb/IB + abc + +/\Q\Eabc/IB + +/x*+\w/IB +\= Expect no match + xxxxx + +/x?+/IB + +/x++/IB + +# For comparison with the following test, which disables auto-possessification +# In this regex, x+ should be converted to x++ +/x+y/B,auto_possess + +# In this regex, x+ should not be converted to x++ +/x+y/B,auto_possess_off + +# Also in this regex, x+ should not be converted to x++ +/x+y/B,optimization_none + +# In this one too, x+ should not be converted to x++ +/x+y/B,no_auto_possess + +/x{1,3}+/B,no_auto_possess + +/x{1,3}+/Bi,no_auto_possess + +/[^x]{1,3}+/B,no_auto_possess + +/[^x]{1,3}+/Bi,no_auto_possess + +/x{1,3}+/IB,auto_possess_off + +/(x)*+/IB + +/^(\w++|\s++)*$/I + now is the time for all good men to come to the aid of the party +\= Expect no match + this is not a line with only words and spaces! + +/(\d++)(\w)/I + 12345a +\= Expect no match + 12345+ + +/a++b/I + aaab + +/(a++b)/I + aaab + +/(a++)b/I + aaab + +/([^()]++|\([^()]*\))+/I + ((abc(ade)ufh()()x + +/\(([^()]++|\([^()]+\))+\)/I + (abc) + (abc(def)xyz) +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/(abc){1,3}+/IB + +/a+?+/I + +/a{2,3}?+b/I + +/(?U)a+?+/I + +/a{2,3}?+b/I,ungreedy + +/x(?U)a++b/IB + xaaaab + +/(?U)xa++b/IB + xaaaab + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/IB + +/^x(?U)a+b/IB + +/^x(?U)(a+)b/IB + +/[.x.]/I + +/[=x=]/I + +/[:x:]/I + +/\F/I + +/\l/I + +/\L/I + +/\N{name}/I + +/\u/I + +/\U/I + +/\N{4}/ + abcdefg + +/\N{,}/ + +/\N{25,ab}/ + +/[\N]/ + +/[\N{4}]/ + +/[\N{name}]/ + +/a{1,3}b/ungreedy + ab + +/[/I + +/[a-/I + +/[[:space:]/I + +/[\s]/IB + +/[[:space:]]/IB + +/[[:space:]abcde]/IB + +/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/Ix + <> + + hij> + hij> + def> + +\= Expect no match + iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB + +/\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB + +/(.*)\d+\1/I + +/(.*)\d+/I + +/(.*)\d+\1/Is + +/(.*)\d+/Is + +/(.*(xyz))\d+\2/I + +/((.*))\d+\1/I + abc123bc + +/a[b]/I + +/(?=a).*/I + +/(?=abc).xyz/Ii + +/(?=abc)(?i).xyz/I + +/(?=a)(?=b)/I + +/(?=.)a/I + +/((?=abcda)a)/I + +/((?=abcda)ab)/I + +/()a/I + +/(?:(?=.)|(?abc>([^()]|\((?1)*\))*abc>123abc>1(2)3abc>(1(2)3)]*+) | (?2)) * >))/Ix + <> + + hij> + hij> + def> + +\= Expect no match + b|c)d(?Pe)/IB + abde + acde + +/(?:a(?Pc(?Pd)))(?Pa)/IB + +/(?Pa)...(?P=a)bbb(?P>a)d/IB + +/^\W*(?:(?P(?P.)\W*(?P>one)\W*(?P=two)|)|(?P(?P.)\W*(?P>three)\W*(?P=four)|\W*.\W*))\W*$/Ii + 1221 + Satan, oscillate my metallic sonatas! + A man, a plan, a canal: Panama! + Able was I ere I saw Elba. +\= Expect no match + The quick brown fox + +/((?(R)a|b))\1(?1)?/I + bb + bbaa + +/(.*)a/Is + +/(.*)a\1/Is + +/(.*)a(b)\2/Is + +/((.*)a|(.*)b)z/Is + +/((.*)a|(.*)b)z\1/Is + +/((.*)a|(.*)b)z\2/Is + +/((.*)a|(.*)b)z\3/Is + +/((.*)a|^(.*)b)z\3/Is + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a/Is + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\31/Is + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\32/Is + +/(a)(bc)/IB,no_auto_capture + abc + +/(?Pa)(bc)/IB,no_auto_capture + abc + +/(a)(?Pbc)/IB,no_auto_capture + +/(aaa(?C1)bbb|ab)/I + aaabbb + aaabbb\=callout_data=0 + aaabbb\=callout_data=1 +\= Expect no match + aaabbb\=callout_data=-1 + +/ab(?Pcd)ef(?Pgh)/I + abcdefgh + abcdefgh\=copy=1,get=two + abcdefgh\=copy=one,copy=two + abcdefgh\=copy=three + +/(?P)(?P)/IB + +/(?P)(?P)/IB + +/(?Pzz)(?Paa)/I + zzaa\=copy=Z + zzaa\=copy=A + +/(?Peks)(?Peccs)/I + +/(?Pabc(?Pdef)(?Pxyz))/I + +"\[((?P\d+)(,(?P>elem))*)\]"I + [10,20,30,5,5,4,4,2,43,23,4234] +\= Expect no match + [] + +"\[((?P\d+)(,(?P>elem))*)?\]"I + [10,20,30,5,5,4,4,2,43,23,4234] + [] + +/(a(b(?2)c))?/IB + +/(a(b(?2)c))*/IB + +/(a(b(?2)c)){0,2}/IB + +/[ab]{1}+/B + +/()(?1){1}/B + +/()(?1)/B + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii + Baby Bjorn Active Carrier - With free SHIPPING!! + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii + Baby Bjorn Active Carrier - With free SHIPPING!! + +/a*.*b/IB + +/(a|b)*.?c/IB + +/abc(?C255)de(?C)f/IB + +/abcde/IB,auto_callout + abcde +\= Expect no match + abcdfe + +/a*b/IB,auto_callout + ab + aaaab + aaaacb + +/a*b/IB,auto_callout + ab + aaaab + aaaacb + +/a+b/IB,auto_callout + ab + aaaab +\= Expect no match + aaaacb + +/(abc|def)x/IB,auto_callout + abcx + defx +\= Expect no match + abcdefzx + +/(abc|def)x/IB,auto_callout + abcx + defx +\= Expect no match + abcdefzx + +/(ab|cd){3,4}/I,auto_callout + ababab + abcdabcd + abcdcdcdcdcd + +/([ab]{,}c|xy)/IB,auto_callout +\= Expect no match + Note: that {,} does NOT introduce a quantifier + +/([ab]{,}c|xy)/IB,auto_callout +\= Expect no match + Note: that {,} does NOT introduce a quantifier + +/([ab]{1,4}c|xy){4,5}?123/IB,auto_callout + aacaacaacaacaac123 + +/\b.*/I + ab cd\=offset=1 + +/\b.*/Is + ab cd\=startoffset=1 + +/(?!.bcd).*/I + Xbcd12345 + +/abcde/I + ab\=ps + abc\=ps + abcd\=ps + abcde\=ps + the quick brown abc\=ps +\= Expect no match\=ps + the quick brown abxyz fox\=ps + +"^(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/(20)?\d\d$"I + 13/05/04\=ps + 13/5/2004\=ps + 02/05/09\=ps + 1\=ps + 1/2\=ps + 1/2/0\=ps + 1/2/04\=ps + 0\=ps + 02/\=ps + 02/0\=ps + 02/1\=ps +\= Expect no match\=ps + \=ps + 123\=ps + 33/4/04\=ps + 3/13/04\=ps + 0/1/2003\=ps + 0/\=ps + 02/0/\=ps + 02/13\=ps + +/0{0,2}ABC/I + +/\d{3,}ABC/I + +/\d*ABC/I + +/[abc]+DE/I + +/[abc]?123/I + 123\=ps + a\=ps + b\=ps + c\=ps + c12\=ps + c123\=ps + +/^(?:\d){3,5}X/I + 1\=ps + 123\=ps + 123X + 1234\=ps + 1234X + 12345\=ps + 12345X +\= Expect no match + 1X + 123456\=ps + +"<(\w+)/?>(.)*"Igms + \n\n\nPartner der LCO\nde\nPartner der LINEAS Consulting\nGmbH\nLINEAS Consulting GmbH Hamburg\nPartnerfirmen\n30 days\nindex,follow\n\nja\n3\nPartner\n\n\nLCO\nLINEAS Consulting\n15.10.2003\n\n\n\n\nDie Partnerfirmen der LINEAS Consulting\nGmbH\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\=jitstack=1024 + +/line\nbreak/I + this is a line\nbreak + line one\nthis is a line\nbreak in the second line + +/line\nbreak/I,firstline + this is a line\nbreak +\= Expect no match + line one\nthis is a line\nbreak in the second line + +/line\nbreak/Im,firstline + this is a line\nbreak +\= Expect no match + line one\nthis is a line\nbreak in the second line + +/(?i)(?-i)AbCd/I + AbCd +\= Expect no match + abcd + +/a{11111111111111111111}/I + +/(){64294967295}/I + +/(){2,4294967295}/I + +"(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"I + abcdefghijklAkB + +"(?Pa)(?Pb)(?Pc)(?Pd)(?Pe)(?Pf)(?Pg)(?Ph)(?Pi)(?Pj)(?Pk)(?Pl)A\11B"I + abcdefghijklAkB + +"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)A\11B"I + abcdefghijklAkB + +"(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)"I + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +"(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)"I + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +/[^()]*(?:\((?R)\)[^()]*)*/I + (this(and)that + (this(and)that) + (this(and)that)stuff + +/[^()]*(?:\((?>(?R))\)[^()]*)*/I + (this(and)that + (this(and)that) + +/[^()]*(?:\((?R)\))*[^()]*/I + (this(and)that + (this(and)that) + +/(?:\((?R)\))*[^()]*/I + (this(and)that + (this(and)that) + ((this)) + +/(?:\((?R)\))|[^()]*/I + (this(and)that + (this(and)that) + (this) + ((this)) + +/\x{0000ff}/I + +/^((?Pa1)|(?Pa2)b)/I + +/^((?Pa1)|(?Pa2)b)/I,dupnames + a1b\=copy=A + a2b\=copy=A + a1b\=copy=Z,copy=A + +/(?|(?)(?)(?)|(?)(?)(?))/I,dupnames + +/^(?Pa)(?Pb)/I,dupnames + ab\=copy=A + +/^(?Pa)(?Pb)|cd/I,dupnames + ab\=copy=A + cd\=copy=A + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames + cdefgh\=copy=A + +/^((?Pa1)|(?Pa2)b)/I,dupnames + a1b\=get=A + a2b\=get=A + a1b\=get=Z,get=A + +/^(?Pa)(?Pb)/I,dupnames + ab\=get=A + +/^(?Pa)(?Pb)|cd/I,dupnames + ab\=get=A + cd\=get=A + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames + cdefgh\=get=A + +/(?J)^((?Pa1)|(?Pa2)b)/I + a1b\=copy=A + a2b\=copy=A + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I + +# In this next test, J is not set at the outer level; consequently it isn't set +# in the pattern's options; consequently pcre2_substring_get_byname() produces +# a random value. + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I + a bc d\=copy=A,copy=B,copy=C + +/^(?Pa)?(?(A)a|b)/I + aabc + bc +\= Expect no match + abc + +/(?:(?(ZZ)a|b)(?PX))+/I + bXaX + +/(?:(?(2y)a|b)(X))+/I + +/(?:(?(ZA)a|b)(?PX))+/I + +/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?PX))+/I + bbXaaX + +/(?:(?(ZZ)a|\(b\))\\(?PX))+/I + (b)\\Xa\\X + +/(?PX|Y))+/I + bXXaYYaY + bXYaXXaX + +/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?PX|Y))+/I + bXXaYYaY + +/\s*,\s*/I + \x0b,\x0b + \x0c,\x0d + +/^abc/Im,newline=lf + xyz\nabc + xyz\r\nabc +\= Expect no match + xyz\rabc + xyzabc\r + xyzabc\rpqr + xyzabc\r\n + xyzabc\r\npqr + +/^abc/Im,newline=crlf + xyz\r\nabclf> +\= Expect no match + xyz\nabclf + xyz\rabclf + +/^abc/Im,newline=cr + xyz\rabc +\= Expect no match + xyz\nabc + xyz\r\nabc + +/^abc/Im,newline=bad + +/.*/I,newline=lf + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=cr + abc\ndef + abc\rdef + abc\r\ndef + +/.*/I,newline=crlf + abc\ndef + abc\rdef + abc\r\ndef + +/\w+(.)(.)?def/Is + abc\ndef + abc\rdef + abc\r\ndef + +/(?P25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I + +/()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + (.(.))/Ix + XY\=ovector=133 + +/(a*b|(?i:c*(?-i)d))/I + +/()[ab]xyz/I + +/(|)[ab]xyz/I + +/(|c)[ab]xyz/I + +/(|c?)[ab]xyz/I + +/(d?|c?)[ab]xyz/I + +/(d?|c)[ab]xyz/I + +/^a*b\d/IB + +/^a*+b\d/IB + +/^a*?b\d/IB + +/^a+A\d/IB + aaaA5 +\= Expect no match + aaaa5 + +/^a*A\d/IBi + aaaA5 + aaaa5 + a5 + +/(a*|b*)[cd]/I + +/(a+|b*)[cd]/I + +/(a*|b+)[cd]/I + +/(a+|b+)[cd]/I + +/(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + ((( + a + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + ))) +/Ix + large nest + +/a*\d/B + +/a*\D/B + +/0*\d/B + +/0*\D/B + +/a*\s/B + +/a*\S/B + +/ *\s/B + +/ *\S/B + +/a*\w/B + +/a*\W/B + +/=*\w/B + +/=*\W/B + +/\d*a/B + +/\d*2/B + +/\d*\d/B + +/\d*\D/B + +/\d*\s/B + +/\d*\S/B + +/\d*\w/B + +/\d*\W/B + +/\D*a/B + +/\D*2/B + +/\D*\d/B + +/\D*\D/B + +/\D*\s/B + +/\D*\S/B + +/\D*\w/B + +/\D*\W/B + +/\s*a/B + +/\s*2/B + +/\s*\d/B + +/\s*\D/B + +/\s*\s/B + +/\s*\S/B + +/\s*\w/B + +/\s*\W/B + +/\S*a/B + +/\S*2/B + +/\S*\d/B + +/\S*\D/B + +/\S*\s/B + +/\S*\S/B + +/\S*\w/B + +/\S*\W/B + +/\w*a/B + +/\w*2/B + +/\w*\d/B + +/\w*\D/B + +/\w*\s/B + +/\w*\S/B + +/\w*\w/B + +/\w*\W/B + +/\W*a/B + +/\W*2/B + +/\W*\d/B + +/\W*\D/B + +/\W*\s/B + +/\W*\S/B + +/\W*\w/B + +/\W*\W/B + +/[^a]+a/B + +/[^a]+a/Bi + +/[^a]+A/Bi + +/[^a]+b/B + +/[^a]+\d/B + +/a*[^a]/B + +/(?Px)(?Py)/I + xy\=copy=abc,copy=xyz + +/(?x)(?'xyz'y)/I + xy\=copy=abc,copy=xyz + +/(?x)(?'xyz>y)/I + +/(?P'abc'x)(?Py)/I + +/^(?:(?(ZZ)a|b)(?X))+/ + bXaX + bXbX +\= Expect no match + aXaX + aXbX + +/^(?P>abc)(?xxx)/ + +/^(?P>abc)(?x|y)/ + xx + xy + yy + yx + +/^(?P>abc)(?Px|y)/ + xx + xy + yy + yx + +/^((?(abc)a|b)(?x|y))+/ + bxay + bxby +\= Expect no match + axby + +/^(((?P=abc)|X)(?x|y))+/ + XxXxxx + XxXyyx + XxXyxx +\= Expect no match + x + +/^(?1)(abc)/ + abcabc + +/^(?:(?:\1|X)(a|b))+/ + Xaaa + Xaba + +/^[\E\Qa\E-\Qz\E]+/B + +/^[a\Q]bc\E]/B + +/^[a-\Q\E]/B + +/^(?P>abc)[()](?)/B + +/^((?(abc)y)[()](?Px))+/B + (xy)x + +/^(?P>abc)\Q()\E(?)/B + +/^(?P>abc)[a\Q(]\E(](?)/B + +/^(?P>abc) # this is (a comment) + (?)/Bx + +/^\W*(?:(?(?.)\W*(?&one)\W*\k|)|(?(?.)\W*(?&three)\W*\k'four'|\W*.\W*))\W*$/Ii + 1221 + Satan, oscillate my metallic sonatas! + A man, a plan, a canal: Panama! + Able was I ere I saw Elba. +\= Expect no match + The quick brown fox + +/(?=(\w+))\1:/I + abcd: + +/(?=(?'abc'\w+))\k:/I + abcd: + +/(?'abc'a|b)(?d|e)\k{2}/dupnames + adaa +\= Expect no match + addd + adbb + +/(?'abc'a|b)(?d|e)(?&abc){2}/dupnames + bdaa + bdab +\= Expect no match + bddd + +/(?( (?'B' abc (?(R) (?(R&A)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x + abcabc1Xabc2XabcXabcabc + +/(? (?'B' abc (?(R) (?(R&C)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x + +/^(?(DEFINE) abc | xyz ) /x + +/(?(DEFINE) abc) xyz/Ix + +/(a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4\=ovector=0 +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\=ovector=0 + +/^a.b/newline=lf + a\rb +\= Expect no match + a\nb + +/^a.b/newline=cr + a\nb +\= Expect no match + a\rb + +/^a.b/newline=anycrlf + a\x85b +\= Expect no match + a\rb + +/^a.b/newline=any +\= Expect no match + a\nb + a\rb + a\x85b + +/^abc./gmx,newline=any + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK + +/abc.$/gmx,newline=any + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7 abc9 + +/^a\Rb/bsr=unicode + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b +\= Expect no match + a\n\rb + +/^a\R*b/bsr=unicode + ab + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b + a\n\rb + a\n\r\x85\x0cb + +/^a\R+b/bsr=unicode + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x85b + a\n\rb + a\n\r\x85\x0cb +\= Expect no match + ab + +/^a\R{1,3}b/bsr=unicode + a\nb + a\n\rb + a\n\r\x85b + a\r\n\r\nb + a\r\n\r\n\r\nb + a\n\r\n\rb + a\n\n\r\nb +\= Expect no match + a\n\n\n\rb + a\r + +/(?&abc)X(?P)/I + abcPXP123 + +/(?1)X(?P)/I + abcPXP123 + +/(?:a(?&abc)b)*(?x)/ + 123axbaxbaxbx456 + 123axbaxbaxb456 + +/(?:a(?&abc)b){1,5}(?x)/ + 123axbaxbaxbx456 + +/(?:a(?&abc)b){2,5}(?x)/ + 123axbaxbaxbx456 + +/(?:a(?&abc)b){2,}(?x)/ + 123axbaxbaxbx456 + +/(abc)(?i:(?1))/ + defabcabcxyz +\= Expect no match + DEFabcABCXYZ + +/(abc)(?:(?i)(?1))/ + defabcabcxyz +\= Expect no match + DEFabcABCXYZ + +/^(a)\g-2/ + +/^(a)\g/ + +/^(a)\g{0}/ + +/^(a)\g{3/ + +/^(a)\g{aa}/ + +/^a.b/newline=lf + a\rb +\= Expect no match + a\nb + +/.+foo/ + afoo +\= Expect no match + \r\nfoo + \nfoo + +/.+foo/newline=crlf + afoo + \nfoo +\= Expect no match + \r\nfoo + +/.+foo/newline=any + afoo +\= Expect no match + \nfoo + \r\nfoo + +/.+foo/s + afoo + \r\nfoo + \nfoo + +/^$/gm,newline=any + abc\r\rxyz + abc\n\rxyz +\= Expect no match + abc\r\nxyz + +/(?m)^$/g,newline=any,aftertext + abc\r\n\r\n + +/(?m)^$|^\r\n/g,newline=any,aftertext + abc\r\n\r\n + +/(?m)$/g,newline=any,aftertext + abc\r\n\r\n + +/abc.$/gmx,newline=anycrlf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9 + +/^X/m + XABC +\= Expect no match + XABC\=notbol + +/(ab|c)(?-1)/B + abc + +/xy(?+1)(abc)/B + xyabcabc +\= Expect no match + xyabc + +/x(?-0)y/ + +/x(?-1)y/ + +/x(?+0)y/ + +/x(?+1)y/ + +/^(abc)?(?(-1)X|Y)/B + abcX + Y +\= Expect no match + abcY + +/^((?(+1)X|Y)(abc))+/B + YabcXabc + YabcXabcXabc +\= Expect no match + XabcXabc + +/(?(-1)a)/B + +/((?(-1)a))/B + +/((?(-2)a))/B + +/^(?(+1)X|Y)(.)/B + Y! + +/(?tom|bon)-\k{A}/ + tom-tom + bon-bon +\= Expect no match + tom-bon + +/\g{A/ + +/(?|(abc)|(xyz))/B + >abc< + >xyz< + +/(x)(?|(abc)|(xyz))(x)/B + xabcx + xxyzx + +/(x)(?|(abc)(pqr)|(xyz))(x)/B + xabcpqrx + xxyzx + +/\H++X/B +\= Expect no match + XXXX + +/\H+\hY/B + XXXX Y + +/\H+ Y/B + +/\h+A/B + +/\v*B/B + +/\V+\x0a/B + +/A+\h/B + +/ *\H/B + +/A*\v/B + +/\x0b*\V/B + +/\d+\h/B + +/\d*\v/B + +/S+\h\S+\v/B + +/\w{3,}\h\w+\v/B + +/\h+\d\h+\w\h+\S\h+\H/B + +/\v+\d\v+\w\v+\S\v+\V/B + +/\H+\h\H+\d/B + +/\V+\v\V+\w/B + +/\( (?: [^()]* | (?R) )* \)/x +(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(00)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)\=jitstack=1024 + +/[\E]AAA/ + +/[\Q\E]AAA/ + +/[^\E]AAA/ + +/[^\Q\E]AAA/ + +/[\E^]AAA/ + +/[\Q\E^]AAA/ + +/A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B + +/^a+(*FAIL)/auto_callout +\= Expect no match + aaaaaa + +/a+b?c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a+b?(*PRUNE)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a+b?(*COMMIT)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a+b?(*SKIP)c+(*FAIL)/auto_callout +\= Expect no match + aaabcccaaabccc + +/a+b?(*THEN)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc + +/a(*MARK)b/ + +/\g6666666666/ + +/[\g6666666666]/B + +/(?1)\c[/ + +/.+A/newline=crlf +\= Expect no match + \r\nA + +/\nA/newline=crlf + \r\nA + +/[\r\n]A/newline=crlf + \r\nA + +/(\r|\n)A/newline=crlf + \r\nA + +/a(*CR)b/ + +/(*CR)a.b/ + a\nb +\= Expect no match + a\rb + +/(*CR)a.b/newline=lf + a\nb +\= Expect no match + a\rb + +/(*LF)a.b/newline=CRLF + a\rb +\= Expect no match + a\nb + +/(*CRLF)a.b/ + a\rb + a\nb +\= Expect no match + a\r\nb + +/(*ANYCRLF)a.b/newline=CR +\= Expect no match + a\rb + a\nb + a\r\nb + +/(*ANY)a.b/newline=cr +\= Expect no match + a\rb + a\nb + a\r\nb + a\x85b + +/(*ANY).*/g + abc\r\ndef + +/(*ANYCRLF).*/g + abc\r\ndef + +/(*CRLF).*/g + abc\r\ndef + +/(*NUL)^.*/ + a\nb\x00ccc + +/(*NUL)^.*/s + a\nb\x00ccc + +/^x/m,newline=NUL + ab\x00xy + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + +/(*NUL)^X\NY/ + X\nY + X\rY +\= Expect no match + X\x00Y + +/a\Rb/I,bsr=anycrlf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x85b + a\x0bb + +/a\Rb/I,bsr=unicode + a\rb + a\nb + a\r\nb + a\x85b + a\x0bb + +/a\R?b/I,bsr=anycrlf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x85b + a\x0bb + +/a\R?b/I,bsr=unicode + a\rb + a\nb + a\r\nb + a\x85b + a\x0bb + +/a\R{2,4}b/I,bsr=anycrlf + a\r\n\nb + a\n\r\rb + a\r\n\r\n\r\n\r\nb +\= Expect no match + a\x85\x85b + a\x0b\x0bb + +/a\R{2,4}b/I,bsr=unicode + a\r\rb + a\n\n\nb + a\r\n\n\r\rb + a\x85\x85b + a\x0b\x0bb +\= Expect no match + a\r\r\r\r\rb + +/(*BSR_ANYCRLF)a\Rb/I + a\nb + a\rb + +/(*BSR_UNICODE)a\Rb/I + a\x85b + +/(*BSR_ANYCRLF)(*CRLF)a\Rb/I + a\nb + a\rb + +/(*CRLF)(*BSR_UNICODE)a\Rb/I + a\x85b + +/(*CRLF)(*BSR_ANYCRLF)(*CR)ab/I + +/(?)(?&)/ + +/(?)(?&a)/ + +/(?)(?&aaaaaaaaaaaaaaaaaaaaaaa)/ + +/(?+-a)/ + +/(?-+a)/ + +/(?(-1))/ + +/(?(+10))/ + +/(?(10))/ + +/(?(+2))()()/ + +/(?(2))()()/ + +/\k''/ + +/\k<>/ + +/\k{}/ + +/\k/ + +/\kabc/ + +/(?P=)/ + +/(?P>)/ + +/[[:foo:]]/ + +/[[:1234:]]/ + +/[[:f\oo:]]/ + +/[[: :]]/ + +/[[:...:]]/ + +/[[:l\ower:]]/ + +/[[:abc\:]]/ + +/[abc[:x\]pqr:]]/ + +/[[:a\dz:]]/ + +/(^(a|b\g<-1'c))/ + +/^(?+1)(?x|y){0}z/ + xzxx + yzyy +\= Expect no match + xxz + +/(\3)(\1)(a)/ +\= Expect no match + cat + +/cat[]/B,allow_empty_class + cat\=ph + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + +/TA]/ + The ACTA] comes + +/TA]/allow_empty_class,match_unset_backref,dupnames + The ACTA] comes + +/(?2)[]a()b](abc)/ + abcbabc + +/(?2)[^]a()b](abc)/ + abcbabc + +/(?1)[]a()b](abc)/ + abcbabc +\= Expect no match + abcXabc + +/(?1)[^]a()b](abc)/ + abcXabc +\= Expect no match + abcbabc + +/(?2)[]a()b](abc)(xyz)/ + xyzbabcxyz + +/(?&N)[]a(?)](?abc)/ + abc)](abc)/ + abcY)/ + XYabcdY + +/Xa{2,4}b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}?b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}+b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\d{2,4}b/ + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}?b/ + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}+b/ + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\D{2,4}b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}?b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}+b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}?b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}+b/ + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[^a]{2,4}b/ + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}?b/ + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}+b/ + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/(Y)X\1{2,4}b/ + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}?b/ + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}+b/ + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/\++\KZ|\d+X|9+Y/startchar + ++++123999\=ps + ++++123999Y\=ps + ++++Z1234\=ps + +/Z(*F)/ +\= Expect no match + Z\=ps + ZA\=ps + +/Z(?!)/ +\= Expect no match + Z\=ps + ZA\=ps + +/dog(sbody)?/ + dogs\=ps + dogs\=ph + +/dog(sbody)??/ + dogs\=ps + dogs\=ph + +/dog|dogsbody/ + dogs\=ps + dogs\=ph + +/dogsbody|dog/ + dogs\=ps + dogs\=ph + +/\bthe cat\b/ + the cat\=ps + the cat\=ph + +/abc/ + abc\=ps + abc\=ph + +/abc\K123/startchar + xyzabc123pqr + xyzabc12\=ps + xyzabc12\=ph + +/(?<=abc)123/ + xyzabc123pqr + xyzabc12\=ps + xyzabc12\=ph + +/\babc\b/ + +++abc+++ + +++ab\=ps + +++ab\=ph + +/(?&word)(?&element)(?(DEFINE)(?<[^m][^>]>[^<])(?\w*+))/B + +/(?&word)(?&element)(?(DEFINE)(?<[^\d][^>]>[^<])(?\w*+))/B + +/(ab)(x(y)z(cd(*ACCEPT)))pq/B + +/abc\K/aftertext,startchar + abcdef + abcdef\=notempty_atstart + xyzabcdef\=notempty_atstart +\= Expect no match + abcdef\=notempty + xyzabcdef\=notempty + +/^(?:(?=abc)|abc\K)/aftertext,startchar + abcdef + abcdef\=notempty_atstart +\= Expect no match + abcdef\=notempty + +/a?b?/aftertext + xyz + xyzabc + xyzabc\=notempty + xyzabc\=notempty_atstart + xyz\=notempty_atstart +\= Expect no match + xyz\=notempty + +/^a?b?/aftertext + xyz + xyzabc +\= Expect no match + xyzabc\=notempty + xyzabc\=notempty_atstart + xyz\=notempty_atstart + xyz\=notempty + +/^(?a|b\gc)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(?a|b\g'name'c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(a|b\g<1>c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(a|b\g'1'c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/^(a|b\g'-1'c)/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/(^(a|b\g<-1>c))/ + aaaa + bacxxx + bbaccxxx + bbbacccxx + +/(?-i:\g)(?i:(?a))/ + XaaX + XAAX + +/(?i:\g)(?-i:(?a))/ + XaaX +\= Expect no match + XAAX + +/(?-i:\g<+1>)(?i:(a))/ + XaaX + XAAX + +/(?=(?(?#simplesyntax)\$(?[a-zA-Z_\x{7f}-\x{ff}][a-zA-Z0-9_\x{7f}-\x{ff}]*)(?:\[(?[a-zA-Z0-9_\x{7f}-\x{ff}]+|\$\g)\]|->\g(\(.*?\))?)?|(?#simple syntax withbraces)\$\{(?:\g(?\[(?:\g|'(?:\\.|[^'\\])*'|"(?:\g|\\.|[^"\\])*")\])?|\g|\$\{\g\})\}|(?#complexsyntax)\{(?\$(?\g(\g*|\(.*?\))?)(?:->\g)*|\$\g|\$\{\g\})\}))\{/ + +/(?a|b|c)\g*/ + abc + accccbbb + +/^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/ + XYabcdY + +/(?<=b(?1)|zzz)(a)/ + xbaax + xzzzax + +/(a)(?<=b\1)/ + +/(a)(?<=b+(?1))/ + +/(a+)(?<=b(?1))/ + +/(a(?<=b(?1)))/ + +/(?<=b(?1))xyz/ + +/(?<=b(?1))xyz(b+)pqrstuvew/ + +/(a|bc)\1/I + +/(a|bc)\1{2,3}/I + +/(a|bc)(?1)/I + +/(a|b\1)(a|b\1)/I + +/(a|b\1){2}/I + +/(a|bbbb\1)(a|bbbb\1)/I + +/(a|bbbb\1){2}/I + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/I + +/]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/Iis + +"(?>.*/)foo"I + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /Ix + +/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/Ii + +/(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/I + +/A)|(?
B))/I + AB\=copy=a + BA\=copy=a + +/(?|(?A)|(?B))/ + +/(?:a(? (?')|(?")) | + b(? (?')|(?")) ) + (?('quote')[a-z]+|[0-9]+)/Ix,dupnames + a"aaaaa + b"aaaaa +\= Expect no match + b"11111 + a"11111 + +/(?:a(?[0-5])|b(?[4-7]))c(?()d|e)/B,dupnames + a4cd + b4cd +\= Expect no match + a6cd + a6ce + +/^(?|(a)(b)(c)(?d)|(?e)) (?('D')X|Y)/IBx,dupnames + abcdX + eX +\= Expect no match + abcdY + ey + +/(?a) (b)(c) (?d (?(R&A)$ | (?4)) )/IBx,dupnames + abcdd +\= Expect no match + abcdde + +/abcd*/ + xxxxabcd\=ps + xxxxabcd\=ph + +/abcd*/i + xxxxabcd\=ps + xxxxabcd\=ph + XXXXABCD\=ps + XXXXABCD\=ph + +/abc\d*/ + xxxxabc1\=ps + xxxxabc1\=ph + +/(a)bc\1*/ + xxxxabca\=ps + xxxxabca\=ph + +/abc[de]*/ + xxxxabcde\=ps + xxxxabcde\=ph + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + +/(\3)(\1)(a)/I,allow_empty_class,match_unset_backref,dupnames + cat + +/(\3)(\1)(a)/I +\= Expect no match + cat + +/i(?(DEFINE)(?a))/I + i + +/()i(?(1)a)/I + ia + +/(?i)a(?-i)b|c/B + XabX + XAbX + CcC +\= Expect no match + XABX + +/(?i)a(?s)b|c/B + +/(?i)a(?s-i)b|c/B + +/^(ab(c\1)d|x){2}$/B + xabcxd + +/^(?&t)*+(?(DEFINE)(?.))$/B + +/^(?&t)*(?(DEFINE)(?.))$/B + +# This one is here because Perl gives the match as "b" rather than "ab". I +# believe this to be a Perl bug. + +/(?>a\Kb)z|(ab)/ + ab\=startchar + +/(?P(?P0|)|(?P>L2)(?P>L1))/ + abcd + 0abc + +/abc(*MARK:)pqr/ + +/abc(*:)pqr/ + +/(*COMMIT:X)/B + +# This should, and does, fail. In Perl, it does not, which I think is a +# bug because replacing the B in the pattern by (B|D) does make it fail. +# Turning off Perl's optimization by inserting (??{""}) also makes it fail. + +/A(*COMMIT)B/aftertext,mark +\= Expect no match + ACABX + +# These should be different, but in Perl they are not, which I think +# is a bug in Perl. + +/A(*THEN)B|A(*THEN)C/mark + AC + +/A(*PRUNE)B|A(*PRUNE)C/mark +\= Expect no match + AC + +# Mark names can be duplicated. Perl doesn't give a mark for this one, +# though PCRE2 does. + +/^A(*:A)B|^X(*:A)Y/mark +\= Expect no match + XAQQ + +# COMMIT at the start of a pattern should be the same as an anchor. Perl +# optimizations defeat this. So does the PCRE2 optimization unless we disable +# it. + +/(*COMMIT)ABC/ + ABCDEFG + +/(*COMMIT)ABC/no_start_optimize +\= Expect no match + DEFGABC + +/^(ab (c+(*THEN)cd) | xyz)/x +\= Expect no match + abcccd + +/^(ab (c+(*PRUNE)cd) | xyz)/x +\= Expect no match + abcccd + +/^(ab (c+(*FAIL)cd) | xyz)/x +\= Expect no match + abcccd + +# Perl gets some of these wrong + +/(?>.(*ACCEPT))*?5/ + abcde + +/(.(*ACCEPT))*?5/ + abcde + +/(.(*ACCEPT))5/ + abcde + +/(.(*ACCEPT))*5/ + abcde + +/A\NB./B + ACBD +\= Expect no match + A\nB + ACB\n + +/A\NB./Bs + ACBD + ACB\n +\= Expect no match + A\nB + +/A\NB/newline=crlf + A\nB + A\rB +\= Expect no match + A\r\nB + +/\R+b/B + +/\R+\n/B + +/\R+\d/B + +/\d*\R/B + +/\s*\R/B + \x20\x0a + \x20\x0d + \x20\x0d\x0a + +/\S*\R/B + a\x0a + +/X\h*\R/B + X\x20\x0a + +/X\H*\R/B + X\x0d\x0a + +/X\H+\R/B + X\x0d\x0a + +/X\H++\R/B +\= Expect no match + X\x0d\x0a + +/(?<=abc)def/ + abc\=ph + +/abc$/ + abc + abc\=ps + abc\=ph + +/abc$/m + abc + abc\n + abc\=ph + abc\n\=ph + abc\=ps + abc\n\=ps + +/abc\z/ + abc + abc\=ps + abc\=ph + +/abc\Z/ + abc + abc\=ps + abc\=ph + +/abc\b/ + abc + abc\=ps + abc\=ph + +/abc\B/ + abc\=ps + abc\=ph +\= Expect no match + abc + +/.+/ +\= Bad offsets + abc\=offset=4 + abc\=offset=-4 +\= Valid data + abc\=offset=0 + abc\=offset=1 + abc\=offset=2 +\= Expect no match + abc\=offset=3 + +/^\cÄ£/ + +/(?P(?P=abn)xxx)/B + +/(a\1z)/B + +/(?P(?P=abn)(?(?P=axn)xxx)/B + +/(?P(?P=axn)xxx)(?yy)/B + +# These tests are here because Perl gets the first one wrong. + +/(\R*)(.)/s + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/(\R)*(.)/s + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +# ------------- + +/^abc$/B + +/^abc$/Bm + +/^(a)*+(\w)/ + aaaaX +\= Expect no match + aaaa + +/^(?:a)*+(\w)/ + aaaaX +\= Expect no match + aaaa + +/(a)++1234/IB + +/([abc])++1234/I + +/(?<=(abc)+)X/ + +/(^ab)/I + +/(^ab)++/I + +/(^ab|^)+/I + +/(^ab|^)++/I + +/(?:^ab)/I + +/(?:^ab)++/I + +/(?:^ab|^)+/I + +/(?:^ab|^)++/I + +/(.*ab)/I + +/(.*ab)++/I + +/(.*ab|.*)+/I + +/(.*ab|.*)++/I + +/(?:.*ab)/I + +/(?:.*ab)++/I + +/(?:.*ab|.*)+/I + +/(?:.*ab|.*)++/I + +/(?=a)[bcd]/I + +/((?=a))[bcd]/I + +/((?=a))+[bcd]/I + +/((?=a))++[bcd]/I + +/(?=a+)[bcd]/Ii + +/(?=a+?)[bcd]/Ii + +/(?=a++)[bcd]/Ii + +/(?=a{3})[bcd]/Ii + +/(abc)\1+/ + +# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) + +/(?1)(?:(b(*ACCEPT))){0}/ + b + +/(?1)(?:(b(*ACCEPT))){0}c/ + bc +\= Expect no match + b + +/(?1)(?:((*ACCEPT))){0}c/ + c + c\=notempty + +/^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match + ba + +/^.*?(?(?=a)a|bc)/ + ba + +/^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match + ac + +/^.*?(?(?=a)a(*THEN)b)c/ +\= Expect no match + ac + +/^.*?(a(*THEN)b)c/ +\= Expect no match + aabc + +/^.*? (?1) c (?(DEFINE)(a(*THEN)b))/x + aabc + +/^.*?(a(*THEN)b|z)c/ + aabc + +/^.*?(z|a(*THEN)b)c/ + aabc + +# These are here because they are not Perl-compatible; the studying means the +# mark is not seen. + +/(*MARK:A)(*SKIP:B)(C|X)/mark + C +\= Expect no match + D + +/(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match + AAAC + +# ---------------------------- + +"(?=a*(*ACCEPT)b)c" + c + c\=notempty + +/(?1)c(?(DEFINE)((*ACCEPT)b))/ + c + c\=notempty + +/(?>(*ACCEPT)b)c/ + c +\= Expect no match + c\=notempty + +/(?:(?>(a)))+a%/allaftertext + %aa% + +/(a)b|ac/allaftertext + ac\=ovector=1 + +/(a)(b)x|abc/allaftertext + abc\=ovector=2 + +/(a)bc|(a)(b)\2/ + abc\=ovector=1 + abc\=ovector=2 + aba\=ovector=1 + aba\=ovector=2 + aba\=ovector=3 + aba\=ovector=4 + +/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I + +/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I + +/(a(?2)|b)(b(?1)|a)(?1)(?2)/I + +/(abc)(?1)/I + +/(?:(foo)|(bar)|(baz))X/allcaptures + bazfooX + foobazbarX + barfooX + bazX + foobarbazX + bazfooX\=ovector=0 + bazfooX\=ovector=1 + bazfooX\=ovector=2 + bazfooX\=ovector=3 + +/(?=abc){3}abc/B + +/(?=abc)+abc/B + +/(?=abc)++abc/B + +/(?=abc){0}xyz/B + +/(?=(a))?./B + +/(?=(a))??./B + +/^(?=(a)){0}b(?1)/B + +/(?(DEFINE)(a))?b(?1)/B + +/^(?=(?1))?[az]([abc])d/B + +/^(?!a){0}\w+/B + +/(?<=(abc))?xyz/B + +/[:a[:abc]b:]/B + +/^(a(*:A)(d|e(*:B))z|aeq)/auto_callout + adz + aez + aeqwerty + +/.(*F)/ +\= Expect no match + abc\=ph + +/\btype\b\W*?\btext\b\W*?\bjavascript\b/I + +/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|a+)(?>(z+))\w/B + aaaazzzzb +\= Expect no match + aazz + +/(.)(\1|a(?2))/ + bab + +/\1|(.)(?R)\1/ + cbbbc + +/(.)((?(1)c|a)|a(?2))/ +\= Expect no match + baa + +/(?P(?P=abn)xxx)/B + +/(a\1z)/B + +/^a\x41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz +\= Expect no match + ax41z + +/^a[m\x41]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + +/^a\x1z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + ax1z + +/^a\u0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz +\= Expect no match + au0041z + +/^a[m\u0041]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + +/^a\u041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + au041z +\= Expect no match + aAz + +/^a\U0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aU0041z +\= Expect no match + aAz + +/^\u{7a}/alt_bsux + u{7a} +\= Expect no match + zoo + +/^\u{7a}/extra_alt_bsux + zoo + +/\u{}/extra_alt_bsux + u{} + +/\u{Q12}/extra_alt_bsux + --u{Q12}-- + +/\u{ 12}/extra_alt_bsux + --u{ 12}-- + +/\u{{3}}/extra_alt_bsux + --u{{{}-- + +/(?(?=c)c|d)++Y/B + +/(?(?=c)c|d)*+Y/B + +/a[\NB]c/ + aNc + +/a[B-\Nc]/ + +/a[B\Nc]/ + +/(a)(?2){0,1999}?(b)/ + +/(a)(?(DEFINE)(b))(?2){0,1999}?(?2)/ + +# This test, with something more complicated than individual letters, causes +# different behaviour in Perl. Perhaps it disables some optimization; no tag is +# passed back for the failures, whereas in PCRE2 there is a tag. + +/(A|P)(*:A)(B|P) | (X|P)(X|P)(*:B)(Y|P)/x,mark + AABC + XXYZ +\= Expect no match + XAQQ + XAQQXZZ + AXQQQ + AXXQQQ + +# Perl doesn't give marks for these, though it does if the alternatives are +# replaced by single letters. + +/(b|q)(*:m)f|a(*:n)w/mark + aw +\= Expect no match + abc + +/(q|b)(*:m)f|a(*:n)w/mark + aw +\= Expect no match + abc + +# After a partial match, the behaviour is as for a failure. + +/^a(*:X)bcde/mark + abc\=ps + +# These are here because Perl doesn't return a mark, except for the first. + +/(?=(*:x))(q|)/aftertext,mark + abc + +/(?=(*:x))((*:y)q|)/aftertext,mark + abc + +/(?=(*:x))(?:(*:y)q|)/aftertext,mark + abc + +/(?=(*:x))(?>(*:y)q|)/aftertext,mark + abc + +/(?=a(*:x))(?!a(*:y)c)/aftertext,mark + ab + +/(?=a(*:x))(?=a(*:y)c|)/aftertext,mark + ab + +/(..)\1/ + ab\=ps + aba\=ps + abab\=ps + +/(..)\1/i + ab\=ps + abA\=ps + aBAb\=ps + +/(..)\1{2,}/ + ab\=ps + aba\=ps + abab\=ps + ababa\=ps + ababab\=ps + ababab\=ph + abababa\=ps + abababa\=ph + +/(..)\1{2,}/i + ab\=ps + aBa\=ps + aBAb\=ps + AbaBA\=ps + abABAb\=ps + aBAbaB\=ph + abABabA\=ps + abaBABa\=ph + +/(..)\1{2,}?x/i + ab\=ps + abA\=ps + aBAb\=ps + abaBA\=ps + abAbaB\=ps + abaBabA\=ps + abAbABaBx\=ps + +/^(..)\1/ + aba\=ps + +/^(..)\1{2,3}x/ + aba\=ps + ababa\=ps + ababa\=ph + abababx + ababababx + +/^(..)\1{2,3}?x/ + aba\=ps + ababa\=ps + ababa\=ph + abababx + ababababx + +/^(..)(\1{2,3})ab/ + abababab + +/^\R/ + \r\=ps + \r\=ph + +/^\R{2,3}x/ + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + \r\rx + \r\r\rx + +/^\R{2,3}?x/ + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + \r\rx + \r\r\rx + +/^\R?x/ + \r\=ps + \r\=ph + x + \rx + +/^\R+x/ + \r\=ps + \r\=ph + \r\n\=ps + \r\n\=ph + \rx + +/^a$/newline=crlf + a\r\=ps + a\r\=ph + +/^a$/m,newline=crlf + a\r\=ps + a\r\=ph + +/^(a$|a\r)/newline=crlf + a\r\=ps + a\r\=ph + +/^(a$|a\r)/m,newline=crlf + a\r\=ps + a\r\=ph + +/./newline=crlf + \r\=ps + \r\=ph + +/.{2,3}/newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/.{2,3}?/newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +"AB(C(D))(E(F))?(?(?=\2)(?=\4))" + ABCDGHI\=ovector=01 + +# These are all run as real matches in test 1; here we are just checking the +# settings of the anchored and startline bits. + +/(?>.*?a)(?<=ba)/I + +/(?:.*?a)(?<=ba)/I + +/.*?a(*PRUNE)b/I + +/.*?a(*PRUNE)b/Is + +/^a(*PRUNE)b/Is + +/.*?a(*SKIP)b/I + +/(?>.*?a)b/Is + +/(?>.*?a)b/I + +/(?>^a)b/Is + +/(?>.*?)(?<=(abcd)|(wxyz))/I + +/(?>.*)(?<=(abcd)|(wxyz))/I + +"(?>.*)foo"I + +"(?>.*?)foo"I + +/(?>^abc)/Im + +/(?>.*abc)/Im + +/(?:.*abc)/Im + +/(?:(a)+(?C1)bb|aa(?C2)b)/ + aab\=callout_capture + +/(?:(a)++(?C1)bb|aa(?C2)b)/ + aab\=callout_capture + +/(?:(?>(a))(?C1)bb|aa(?C2)b)/ + aab\=callout_capture + +/(?:(?1)(?C1)x|ab(?C2))((a)){0}/ + aab\=callout_capture + +/(?1)(?C1)((a)(?C2)){0}/ + aab\=callout_capture + +/(?:(a)+(?C1)bb|aa(?C2)b)++/ + aab\=callout_capture + aab\=callout_capture,ovector=1 + +/(ab)x|ab/ + ab\=ovector=0 + ab\=ovector=1 + +/(?<=123)(*MARK:xx)abc/mark + xxxx123a\=ph + xxxx123a\=ps + +/123\Kabc/startchar + xxxx123a\=ph + xxxx123a\=ps + +/^(?(?=a)aa|bb)/auto_callout + bb + +/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/ + bb + +# Perl seems to have a bug with this one. + +/aaaaa(*COMMIT)(*PRUNE)b|a+c/ + aaaaaac + +# Here are some that Perl treats differently because of the way it handles +# backtracking verbs. + +/(?!a(*COMMIT)b)ac|ad/ + ac + ad + +/^(?!a(*THEN)b|ac)../ + ad +\= Expect no match + ac + +/^(?=a(*THEN)b|ac)/ + ac + +/\A.*?(?:a|b(*THEN)c)/ + ba + +/\A.*?(?:a|b(*THEN)c)++/ + ba + +/\A.*?(?:a|b(*THEN)c|d)/ + ba + +/(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/ + aac + +/\A.*?(a|b(*THEN)c)/ + ba + +/^(A(*THEN)B|A(*THEN)D)/ + AD + +/(?!b(*THEN)a)bn|bnn/ + bnn + +/(?(?=b(*SKIP)a)bn|bnn)/ + bnn + +/(?=b(*THEN)a|)bn|bnn/ + bnn + +# This test causes a segfault with Perl 5.18.0 + +/^(?=(a)){0}b(?1)/ + backgammon + +/(?|(?f)|(?b))/I,dupnames + +/(?abc)(?z)\k()/IB,dupnames + +/a*[bcd]/B + +/[bcd]*a/B + +# A complete set of tests for auto-possessification of character types, but +# omitting \C because it might be disabled (it has its own tests). + +/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx + +/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx + +/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx + +/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx + +/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx + +/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx + +/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx + +/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx + +/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx + +/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx + +/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx + +/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx + +/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx + +/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx + +/(?=a+)a(a+)++a/B + +/a+(bb|cc)a+(?:bb|cc)a+(?>bb|cc)a+(?:bb|cc)+a+(aa)a+(?:bb|aa)/B + +/a+(bb|cc)?#a+(?:bb|cc)??#a+(?:bb|cc)?+#a+(?:bb|cc)*#a+(bb|cc)?a#a+(?:aa)?/B + +/a+(?:bb)?a#a+(?:|||)#a+(?:|b)a#a+(?:|||)?a/B + +/[ab]*/B + aaaa + +/[ab]*?/B + aaaa + +/[ab]?/B + aaaa + +/[ab]??/B + aaaa + +/[ab]+/B + aaaa + +/[ab]+?/B + aaaa + +/[ab]{2,3}/B + aaaa + +/[ab]{2,3}?/B + aaaa + +/[ab]{2,}/B + aaaa + +/[ab]{2,}?/B + aaaa + +/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B + +/[a-d]{5,12}[e-z0-9]*#[^a-z]+[b-y]*a[2-7]?[^0-9a-z]+/B + +/[a-z]*\s#[ \t]?\S#[a-c]*\S#[C-G]+?\d#[4-8]*\D#[4-9,]*\D#[!$]{0,5}\w#[M-Xf-l]+\W#[a-c,]?\W/B + +/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/B + +/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/B + +/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/B + +/[a-c]*d/IB + +/[a-c]+d/IB + +/[a-c]?d/IB + +/[a-c]{4,6}d/IB + +/[a-c]{0,6}d/IB + +# End of special auto-possessive tests + +/^A\o{1239}B/ + A\123B + +/^A\oB/ + +/^A\x{zz}B/ + +/^A\x{12Z/ + +/^A\x{/ + +/[ab]++/B,no_auto_possess + +/[^ab]*+/B,no_auto_possess + +/a{4}+/B,no_auto_possess + +/a{4}+/Bi,no_auto_possess + +/[a-[:digit:]]+/ + +/[A-[:digit:]]+/ + +/[a-[.xxx.]]+/ + +/[a-[=xxx=]]+/ + +/[a-[!xxx!]]+/ + +/[A-[!xxx!]]+/ + A]]] + +/[a-\d]+/ + +/(?<0abc>xx)/ + +/(?&1abc)xx(?<1abc>y)/ + +/(?xx)/ + +/(?'0abc'xx)/ + +/(?P<0abc>xx)/ + +/\k<5ghj>/ + +/\k'5ghj'/ + +/\k{2fgh}/ + +/(?P=8yuki)/ + +/\g{4df}/ + +/(?&1abc)xx(?<1abc>y)/ + +/(?P>1abc)xx(?<1abc>y)/ + +/\g'3gh'/ + +/\g<5fg>/ + +/(?(<4gh>)abc)/ + +/(?('4gh')abc)/ + +/(?(4gh)abc)/ + +/(?(R&6yh)abc)/ + +/(((a\2)|(a*)\g<-1>))*a?/B + +# Test the ugly "start or end of word" compatibility syntax. + +/[[:<:]]red[[:>:]]/B + little red riding hood + a /red/ thing + red is a colour + put it all on red +\= Expect no match + no reduction + Alfred Winifred + +/[[:<:]]+red/B + little red riding hood + red is a colour +\= Expect no match + Alfred + +/[a[:<:]] should give error/ + +/(?=ab\K)/aftertext,allow_lookaround_bsk + abcd\=startchar + +/abcd/newline=lf,firstline +\= Expect no match + xx\nxabcd + +# Test stack guard external calls. + +/(((a)))/stackguard=1 + +/(((a)))/stackguard=2 + +/(((a)))/stackguard=3 + +/(((((a)))))/ + +# End stack guard tests + +/^\w+(?>\s*)(?<=\w)/B + +/\othing/ + +/\o{}/ + +/\o{whatever}/ + +/\xthing/ + +/^A\xZ/ + +/^A\x/ + +/\x{}/ + +/\x{whatever}/ + +/A\8B/ + +/A\9B/ + +# This one is here because Perl fails to match "12" for this pattern when the $ +# is present. + +/^(?(?=abc)\w{3}:|\d\d)$/ + abc: + 12 +\= Expect no match + 123 + xyz + +# Perl gets this one wrong, giving "a" as the after text for ca and failing to +# match for cd. + +/(?(?=ab)ab)/aftertext + abxxx + ca + cd + +# This should test both paths for processing OP_RECURSE. + +/(?(R)a+|(?R)b)/ + aaaabcde + aaaabcde\=ovector=100 + +/a*?b*?/ + ab + +/(*NOTEMPTY)a*?b*?/ + ab + ba + cb + +/(*NOTEMPTY_ATSTART)a*?b*?/aftertext + ab + cdab + +/(?(VERSION>=10.0)yes|no)/I + yesno + +/(?(VERSION>=10.04)yes|no)/ + yesno + +/(?(VERSION=8)yes){3}/BI,aftertext + yesno + +/(?(VERSION=8)yes|no){3}/I + yesnononoyes +\= Expect no match + yesno + +/(?:(?abc)|xyz)(?(VERSION)yes|no)/I + abcyes + xyzno +\= Expect no match + abcno + xyzyes + +/(?(VERSION<10)yes|no)/ + +/(?(VERSION>10)yes|no)/ + +/(?(VERSION>=10.0.0)yes|no)/ + +/(?(VERSION=10.101)yes|no)/ + +# We should see the starting code unit, required code unit, and minimum length set for this regex: +/abcd/I + +# None of the following three should have the starting code unit, required code unit, and minimum length set: +/abcd/I,no_start_optimize + +/abcd/I,start_optimize_off + +/abcd/I,optimization_none + +/(|ab)*?d/I + abd + xyd + +/(|ab)*?d/I,no_start_optimize + abd + xyd + +/\k*(?aa)(?bb)/match_unset_backref,dupnames + aabb + +/(((((a)))))/parens_nest_limit=2 + +/abc/replace=XYZ + 123123 + 123abc123 + 123abc123abc123 + 123123\=zero_terminate + 123abc123\=zero_terminate + 123abc123abc123\=zero_terminate + +/abc/g,replace=XYZ + 123abc123 + 123abc123abc123 + +/abc/replace=X$$Z + 123abc123 + +/abc/g,replace=X$$Z + 123abc123abc123 + +/a(b)c(d)e/replace=X$1Y${2}Z + "abcde" + +/a(b)c(d)e/replace=X$1Y${2}Z,global + "abcde-abcde" + +/a(?b)c(?d)e/replace=X$ONE+${TWO}Z + "abcde" + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z + "abcde-abcde-" + +/abc/replace=a$++ + 123abc + +/abc/replace=a$bad + 123abc + +/abc/replace=a${A234567890123456789_123456789012}z + 123abc + +/abc/replace=a${A23456789012345678901234567890123}z + 123abc + +/abc/replace=a${bcd + 123abc + +/abc/replace=a${b+d}z + 123abc + +/abc/replace=[10]XYZ + 123abc123 + +/abc/replace=[9]XYZ + 123abc123 + +/abc/replace=xyz + 1abc2\=partial_hard + +/abc/replace=xyz + 123abc456 + 123abc456\=replace=pqr + 123abc456abc789 + 123abc456abc789\=g + +/(?<=abc)(|def)/g,replace=<$0> + 123abcxyzabcdef789abcpqr + +/./replace=$0 + a + +/(.)(.)/replace=$2+$1 + abc + +/(?.)(?.)/replace=$B+$A + abc + +/(.)(.)/g,replace=$2$1 + abcdefgh + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + apple strudel + fruitless + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + apple strudel + fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry + apple lemon blackberry\=substitute_overflow_length + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry + +/"(*:fruit" 00 "juice)apple"/hex,g,replace=${*MARK} + apple lemon blackberry + +/abc/ + 123abc123\=replace=XYZ + 123abc123\=replace=[10]XYZ +\= Expect error + 123abc123\=replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[6]XYZ + 123abc123\=substitute_overflow_length,replace=[1]XYZ + 123abc123\=substitute_overflow_length,replace=[0]XYZ + +/abc/ + 123abc123\=replace=XY + 123abc123\=replace=[9]XY + 123abc123\=replace=[9]XY,substitute_literal +\= Expect error + 123abc123\=replace=[8]XY,substitute_overflow_length + 123abc123\=replace=[8]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[6]XY,substitute_overflow_length + 123abc123\=replace=[6]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[5]XY,substitute_overflow_length + 123abc123\=replace=[5]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[4]XY,substitute_overflow_length + 123abc123\=replace=[4]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[3]XY,substitute_overflow_length + 123abc123\=replace=[3]XY,substitute_overflow_length,substitute_literal + 123abc123\=replace=[2]XY,substitute_overflow_length + 123abc123\=replace=[2]XY,substitute_overflow_length,substitute_literal + +/abc/substitute_literal + 123abc123\=replace=XYZ + 123abc123\=replace=[10]XYZ +\= Expect error + 123abc123\=replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[9]XYZ + 123abc123\=substitute_overflow_length,replace=[6]XYZ + 123abc123\=substitute_overflow_length,replace=[1]XYZ + 123abc123\=substitute_overflow_length,replace=[0]XYZ + +/a(b)c/ + 123abc123\=replace=[9]x$1z + 123abc123\=substitute_overflow_length,replace=[9]x$1z + 123abc123\=substitute_overflow_length,replace=[6]x$1z + 123abc123\=substitute_overflow_length,replace=[1]x$1z + 123abc123\=substitute_overflow_length,replace=[0]x$1z + +/a(b)c/substitute_extended + ZabcZ\=replace=>\1< + ZabcZ\=replace=>\2< + ZabcZ\=replace=>\8< + ZabcZ\=replace=>${1}< + ZabcZ\=replace=>${ 1 }< + ZabcZ\=replace=>${2}< + ZabcZ\=replace=>${8}< + ZabcZ\=replace=>$<1>< + ZabcZ\=replace=>$< 1 >< + ZabcZ\=replace=>$<2>< + ZabcZ\=replace=>$<8>< + ZabcZ\=replace=>\g<-1>< + ZabcZ\=replace=>\g<0>< + ZabcZ\=replace=>\g<1>< + ZabcZ\=replace=>\g< 1 >< + ZabcZ\=replace=>\g<2>< + ZabcZ\=replace=>\g<8>< + +/(*:pear)apple/substitute_extended + ZappleZ\=replace=>${*MARK}< + ZappleZ\=replace=>$<*MARK>< + ZappleZ\=replace=>\g<*MARK>< + +/a(?b)c/substitute_extended + ZabcZ\=replace=>${named}< + ZabcZ\=replace=>${noexist}< + ZabcZ\=replace=>${}< + ZabcZ\=replace=>${ }< + ZabcZ\=replace=>${ named }< + ZabcZ\=replace=>$< + ZabcZ\=replace=>$< + ZabcZ\=replace=>$<>< + ZabcZ\=replace=>$< >< + ZabcZ\=replace=>$< named >< + ZabcZ\=replace=>\g< + ZabcZ\=replace=>\g< + ZabcZ\=replace=>\g<>< + ZabcZ\=replace=>\g< >< + ZabcZ\=replace=>\g< named >< + +/a(b)c/substitute_extended + ZabcZ\=replace=>${1:+ yes : no } + ZabcZ\=replace=>${1:+ \o{100} : \o{100} } + ZabcZ\=replace=>${1:+ \o{Z} : no } + ZabcZ\=replace=>${1:+ yes : \o{Z} } + ZabcZ\=replace=>${1:+ \g<1> : no } + ZabcZ\=replace=>${1:+ yes : \g<1> } + ZabcZ\=replace=>${1:+ \g<1 : no } + ZabcZ\=replace=>${1:+ yes : \g<1 } + ZabcZ\=replace=>${1:+ $<1> : no } + ZabcZ\=replace=>${1:+ yes : $<1> } + ZabcZ\=replace=>${1:+ $<1 : no } + ZabcZ\=replace=>${1:+ yes : $<1 } + +/a(b)c/substitute_extended + ZabcZ\=replace=>${ + ZabcZ\=replace=>${1 + ZabcZ\=replace=>${1Z + ZabcZ\=replace=>${1; + ZabcZ\=replace=>$< + ZabcZ\=replace=>$<1 + ZabcZ\=replace=>$<1Z + ZabcZ\=replace=>$<1; + ZabcZ\=replace=>\g< + ZabcZ\=replace=>\g<1 + ZabcZ\=replace=>\g<1Z + ZabcZ\=replace=>\g<1; + +"((?=(?(?=(?(?=(?(?=()))))))))" + a + +"(?(?=)==)(((((((((?=)))))))))" +\= Expect no match + a + +/(a)(b)|(c)/ + XcX\=ovector=2,get=1,get=2,get=3,get=4,getall + +/x(?=ab\K)/allow_lookaround_bsk + xab\=get=0 + xab\=copy=0 + xab\=getall + +/(?a)|(?b)/dupnames + a\=ovector=1,copy=A,get=A,get=2 + a\=ovector=2,copy=A,get=A,get=2 + b\=ovector=2,copy=A,get=A,get=2 + +/a(b)c(d)/ + abc\=ph,copy=0,copy=1,getall + +/^abc/info + +/^abc/info,no_dotstar_anchor + +/^abc/info,dotstar_anchor_off + +# For comparison with the following tests, which disable automatic dotstar anchoring +/.*abc/BI + +/.*abc/BI,dotstar_anchor_off + +/.*abc/BI,start_optimize_off + +/.*abc/BI,optimization_none + +/.*abc/BI,no_dotstar_anchor + +/.*\d/info,auto_callout +\= Expect no match + aaa + +/.*\d/info,no_dotstar_anchor,auto_callout +\= Expect no match + aaa + +/.*\d/dotall,info + +/.*\d/dotall,no_dotstar_anchor,info + +/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info + +'^(?:(a)|b)(?(1)A|B)' + aA123\=ovector=1 + aA123\=ovector=2 + +'^(?:(?a)|b)(?()A|B)' + aA123\=ovector=1 + aA123\=ovector=2 + +'^(?)(?:(?a)|b)(?()A|B)'dupnames + aA123\=ovector=1 + aA123\=ovector=2 + aA123\=ovector=3 + +'^(?:(?X)|)(?:(?a)|b)\k{AA}'dupnames + aa123\=ovector=1 + aa123\=ovector=2 + aa123\=ovector=3 + +/(?(?J)(?1(111111)11|)1|1|)(?()1)/ + +/(?(?J)(?))(?-J)\k/ + +# Quantifiers are not allowed on condition assertions, but are otherwise +# OK in conditions. + +/(?(?=0)?)+/ + +/(?(?=0)(?=00)?00765)/ + 00765 + +/(?(?=0)(?=00)?00765|(?!3).56)/ + 00765 + 456 +\= Expect no match + 356 + +'^(a)*+(\w)' + g + g\=ovector=1 + +'^(?:a)*+(\w)' + g + g\=ovector=1 + +# These two pattern showeds up compile-time bugs + +"((?2){0,1999}())?" + +/((?+1)(\1))/B + +# Callouts with string arguments + +/a(?C"/ + +/a(?C"a/ + +/a(?C"a"/ + +/a(?C"a"bcde(?C"b")xyz/ + +/a(?C"a)b""c")/B + +/ab(?C" any text with spaces ")cde/B + abcde + 12abcde + +/^a(b)c(?C1)def/ + abcdef + +/^a(b)c(?C"AB")def/ + abcdef + +/^a(b)c(?C1)def/ + abcdef\=callout_capture + +/^a(b)c(?C{AB})def/B + abcdef\=callout_capture + +/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info + +/(?:a(?C`code`)){3}/B + +/^(?(?C25)(?=abc)abcd|xyz)/B,callout_info + abcdefg + xyz123 + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B + abcdefg + xyz123 + +/^ab(?C'first')cd(?C"second")ef/ + abcdefg + +/(?:a(?C`code`)){3}X/ + aaaXY + +# Binary zero in callout string +# a ( ? C ' x z ' ) b +/ 61 28 3f 43 27 78 00 7a 27 29 62/hex,callout_info + abcdefgh + +/(?(?!)^)/ + +/(?(?!)a|b)/ + bbb +\= Expect no match + aaa + +# JIT gives a different error message for the infinite recursion + +"(*NO_JIT)((?2)+)((?1)){" + abcd{ + +# Perl fails to diagnose the absence of an assertion + +"(?(?.*!.*)?)" + +"X((?2)()*+){2}+"B + +"X((?2)()*+){2}"B + +/(?<=\bABQ(3(?-7)))/ + +/(?<=\bABQ(3(?+7)))/ + +";(?<=()((?3))((?2)))" + +# Perl loops on this (PCRE2 used to!) + +/(?<=\Ka)/g,aftertext,allow_lookaround_bsk + aaaaa + +/(?<=\Ka)/altglobal,aftertext,allow_lookaround_bsk + aaaaa + +/((?2){73}(?2))((?1))/info + +/abc/ +\= Expect no match + \[9x!xxx(]{9999} + +/(abc)*/ + \[abc]{5} + +/^/gm + \n\n\n + +/^/gm,alt_circumflex + \n\n\n + +/((((((((x))))))))\81/ + xx1 + +/((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/ + xx + +/\80/ + +/A\8B\9C/ + A8B9C + +/(?x:((?'a')) # comment (with parentheses) and | vertical +(?-x:#not a comment (?'b')) # this is a comment () +(?'c')) # not a comment (?'d')/info + +/(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames + A23B + B32A + +# These are some patterns that used to cause buffer overflows or other errors +# while compiling. + +/.((?2)(?R)|\1|$)()/B + +/.((?3)(?R)()(?2)|\1|$)()/B + +/(\9*+(?2);\3++()2|)++{/ + +/\V\x85\9*+((?2)\3++()2)*:2/ + +/(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames + +/(((?(X)){0,2}) (?'x'((?'X')((?'X')))))/dupnames + +/(((?(R)){0,2}) (?'x'((?'X')((?'R')))))/ + +"(?J)(?'d'(?'d'\g{d}))" + +"(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" + +/A(?'')Z/ + +"(?J:(?|(?'R')(\k'R')|((?'R'))))" + +/(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ + +/^(?:(?(1)x|)+)+$()/B + +/[[:>:]](?<)/ + +/((?x)(*:0))#(?'/ + +/(?C$[$)(?<]/ + +/(?C$)$)(?<]/ + +/(?(R))*+/B + abcd + +/((?x)(?#))#(?'/ + +/((?x)(?#))#(?'abc')/I + +/[[:\\](?<[::]/ + +/[[:\\](?'abc')[a:]/I + +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" + +/()(?(R)0)*+/B + +/(?R-:(?>abcd<< + +/abcd/g,replace=\$1$2\,substitute_literal + XabcdYabcdZ + +/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended + abcDE + +/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended + Hello between wORLD + +/abcd/replace=xy\kz,substitute_extended + abcd + +/a(?:(b)|(c))/substitute_extended,replace=X${1:+1:-1}X${2:+2:-2} + ab + ac + ab\=replace=${1:+$1\:$1:$2} + ac\=replace=${1:+$1\:$1:$2} + >>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal + +/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2} + ab + ac + +/(a)/substitute_extended,replace=>${1:+\Q$1:{}$$\E+\U$1}< + a + +/X(b)Y/substitute_extended + XbY\=replace=x${1:+$1\U$1}y + XbY\=replace=\Ux${1:+$1$1}y + +/a/substitute_extended,replace=${*MARK:+a:b} + a + +/(abcd)/replace=${1:+xy\kz},substitute_extended + abcd + +/(abcd)/ + abcd\=replace=${1:+xy\kz},substitute_extended + +/abcd/substitute_extended,replace=>$1< + abcd + +/abcd/substitute_extended,replace=>xxx${xyz}<<< + abcd + +/(?J)(?:(?a)|(?b))/replace=<$A> + [a] + [b] +\= Expect error + (a)\=ovector=1 + +/(a)|(b)/replace=<$1> +\= Expect error + b + +/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 + aaBB + +/abcd/replace=wxyz,substitute_matched + abcd + pqrs + +/abcd/g + >abcd1234abcd5678<\=replace=wxyz,substitute_matched + +/abc/substitute_extended,replace=>\045< + abc + +/abc/substitute_extended,replace=>\45< + abc + +/abc/substitute_extended,replace=>\o{45}< + abc + +/abc/substitute_extended,replace=>\845< + abc + +/a(b)(c)/substitute_extended,replace=>\1< + abc + +/a(b)(c)/substitute_extended,replace=>\2< + abc + +/a(b)(c)/substitute_extended,replace=>\3< + abc + +/a(?b)c/substitute_extended + abc\=replace=>${namED_1}< + +/a(?b)c/substitute_extended + abc\=replace=>${namedverylongbutperfectlylegalsoyoushouldnthaveaproblem_1}< + +/abc/substitute_extended + abc\=replace=\a\b\e\f\n\r\t\v\\ + +/a(b)c/ + LabcR\=replace=>$&< + LabcR\=replace=>$`< + LabcR\=replace=>$'< + LabcR\=replace=>$_< + +/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I + +/((p(?'K/ + +/((p(?'K/no_auto_capture + +/abc/replace=A$3123456789Z + abc + +/(?$1<,substitute_unset_empty + cat + xbcom + +/a|(b)c/ + cat\=replace=>$1< + cat\=replace=>$1<,substitute_unset_empty + xbcom\=replace=>$1<,substitute_unset_empty + +/a|(b)c/substitute_extended + cat\=replace=>${2:-xx}< + cat\=replace=>${2:-xx}<,substitute_unknown_unset + cat\=replace=>${X:-xx}<,substitute_unknown_unset + +/a|(?'X'b)c/replace=>$X<,substitute_unset_empty + cat + xbcom + +/a|(?'X'b)c/replace=>$Y<,substitute_unset_empty + cat + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty + +/a|(b)c/replace=>$2<,substitute_unset_empty + cat + cat\=substitute_unknown_unset + cat\=substitute_unknown_unset,-substitute_unset_empty + +/()()()/use_offset_limit + \=ovector=11000000000 + \=callout_fail=11000000000 + \=callout_fail=1:11000000000 + \=callout_data=11000000000 + \=callout_data=-11000000000 + \=offset_limit=1100000000000000000000 + \=copy=11000000000 + +/(*MARK:A\x00b)/mark + abc + +/(*MARK:A\x00b)/mark,alt_verbnames + abc + +/"(*MARK:A" 00 "b)"/mark,hex + abc + +/"(*MARK:A" 00 "b)"/mark,hex,alt_verbnames + abc + +/efg/hex + +/eff/hex + +/effg/hex + +/(?J)(?'a'))(?'a')/ + +/(?<=((?C)0))/ + 9010 +\= Expect no match + abc + +/aaa/ +\[abc]{10000000000000000000000000000} +\[a]{3} + +/\[AB]{6000000000000000000000}/expand + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex + +/'(*'/hex + +/'('/hex + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + b11bz +\= Expect no match + b11az + +/(?|([ab]))...(?<=\1)z/ + +/([ab])(\1)...(?<=\2)z/ + aa11az + +/(a\2)(b\1)(?<=\2)/ + +/(?[ab])...(?<=\k'A')z/ + a11az + b11bz +\= Expect no match + b11az + +/(?[ab])...(?<=\k'A')(?)z/dupnames + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + +/ab(?C1)c/auto_callout + abc + +/'ab(?C1)c'/hex,auto_callout + abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ + a-a9-a + +/[A-[:digit:]]+/ + A-A9-A + +/[a-\d]+/ + a-a9-a + +/(?abc)(?(R)xyz)/B + +/(?abc)(?(R)xyz)/B + +/(?=.*[A-Z])/I + +/()(?<=(?0))/ + +/(?*?\g'0/use_length + +/.>*?\g'0/ + +/{„Í„ÍÍ„Í{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́ÍÍ„Í{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 + +"(?(?C))" + +/(?(?(?(?(?(?))))))/ + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc + +//replace=0 + \=offset=7 + +/(?<=\G.)/g,replace=+ + abc + +".+\QX\E+"B,no_auto_possess + +".+\QX\E+"B,auto_callout,no_auto_possess + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 + +/()(\g+65534)/ + +/()(\g+65533)/ + +/Á\x00\x00\x00š(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00ÿÿ\x00š(\1{50779}?)J\w2/I + +/(a)(b)\2\1\1\1\1/I + +/(?a)(?b)\g{b}\g{a}\g{a}\g{a}\g{a}(?xx)(?zz)/I,dupnames + +// + \=ovector=7777777777 + +# This is here because Perl matches, even though a COMMIT is encountered +# outside of the recursion. + +/(?1)(A(*COMMIT)|B)D/ + BAXBAD + +"(?1){2}(a)"B + +"(?1){2,4}(a)"B + +# This test differs from Perl for the first subject. Perl ends up with +# $1 set to 'B'; PCRE2 has it unset (which I think is right). + +/^(?: +(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D) +(Z) +)+$/x + AZB + AZBDZ + +# The first of these, when run by Perl, gives the mark 'aa', which is wrong. + +'(?>a(*:aa))b|ac' mark + ac + +'(?:a(*:aa))b|ac' mark + ac + +/(R?){65}/ + (R?){65} + +/\[(a)]{60}/expand + aaaa + +/(?=999)yes)^bc/I + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I + +/(*LIMIT_HEAP=0)xxx/I + +/(*LIMIT_HEAP=123/use_length + +/(*LIMIT_MATCH=/use_length + +/(*CRLF)(*LIMIT_DEPTH=/use_length + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length + +/\d{0,3}(*:abc)(?C1)xxx/callout_info + +# ---------------------------------------------------------------------- + +# These are a whole pile of tests that touch lines of code that are not +# used by any other tests (at least when these were created). + +/^a+?x/i,no_start_optimize,no_auto_possess +\= Expect no match + aaa + +/^[^a]{3,}?x/i,no_start_optimize,no_auto_possess +\= Expect no match + bbb + cc + +/^X\S/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\W/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\H/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + XY + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X\n + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + XX + +/^X.+?/s,no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\R+?/no_start_optimize,no_auto_possess +\= Expect no match + XX + +/^X\H+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\h+?/no_start_optimize,no_auto_possess +\= Expect no match + X + +/^X\V+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X\n + +/^X\D+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X9 + +/^X\S+?/no_start_optimize,no_auto_possess +\= Expect no match + X + X\n + +/^X\W+?/no_start_optimize,no_auto_possess +\= Expect no match + X + XX + +/^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + +/(*CRLF)^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\r\=ps + +/^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\r\n + X\n\rY + X\n\nY + X\n\x{0c}Y + +/(*BSR_ANYCRLF)^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX + X\n\r\n + X\n\rY + X\n\nY + X\n\x{0c}Y + +/^X\H+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\t + XYY + +/^X\h+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\v+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\D+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY9 + XYY + +/^X\d+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X99 + X9Y + +/^X\S+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^X\s+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n + X\nY + +/^X\W+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X.A + X++ + +/^X\w+?Z/no_start_optimize,no_auto_possess +\= Expect no match + Xa. + Xaa + +/^X.{1,3}Z/s,no_start_optimize,no_auto_possess +\= Expect no match + Xa.bd + +/^X\h+Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t + X\tY + +/^X\V+Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n + XYY + +/^(X(*THEN)Y|AB){0}(?1)/ + ABX +\= Expect no match + XAB + +/^(?!A(?C1)B)C/ + ABC\=callout_error=1,no_jit + +/^(?!A(?C1)B)C/no_start_optimize + ABC\=callout_error=1 + +/^(?(?!A(?C1)B)C)/ + ABC\=callout_error=1 + +# ---------------------------------------------------------------------- + +/[a b c]/BxxI + +/[a b c]/BxxxI + +/[a b c]/B,extended_more + +/[ a b c ]/B,extended_more + +/[a b](?xx: [ 12 ] (?-xx:[ 34 ]) )y z/B + +# Unsetting /x also unsets /xx + +/[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B + +/(a)(?-n:(b))(c)/nB + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal + +/\N{\c/IB,bad_escape_is_literal + +/[\j\x{z}\o\gAb\g]/B,bad_escape_is_literal + +/[Q-\N]/B,bad_escape_is_literal + +/[\s-_]/bad_escape_is_literal + +/[_-\s]/bad_escape_is_literal + +/[\B\R\X]/B + +/[\B\R\X]/B,bad_escape_is_literal + +/[A-\BP-\RV-\X]/B + +/[A-\BP-\RV-\X]/B,bad_escape_is_literal + +# ---------------------------------------------------------------------- + +/a\b(c/literal + a\\b(c + +/a\b(c/literal,caseless + a\\b(c + a\\B(c + +/a\b(c/literal,firstline + XYYa\\b(c +\= Expect no match + X\na\\b(c + +/a\b?c/literal,use_offset_limit + XXXXa\\b?c\=offset_limit=4 +\= Expect no match + XXXXa\\b?c\=offset_limit=3 + +/a\b(c/literal,anchored,endanchored + a\\b(c +\= Expect no match + Xa\\b(c + a\\b(cX + Xa\\b(cX + +//literal,extended + +/a\b(c/literal,auto_callout,no_start_optimize + XXXXa\\b(c + +/a\b(c/literal,auto_callout + XXXXa\\b(c + +/(*CR)abc/literal + (*CR)abc + +/cat|dog/I,match_word + the cat sat +\= Expect no match + caterpillar + snowcat + syndicate + +/(cat)|dog/I,match_line,literal + (cat)|dog +\= Expect no match + the cat sat + caterpillar + snowcat + syndicate + +# Confirm that the pcre2_set_optimize API does not have any undesired effect on literal patterns +/(cat)|dog/I,literal,auto_possess_off + (cat)|dog +\= Expect no match + the cat sat + +/(cat)|dog/I,literal,dotstar_anchor_off + (cat)|dog +\= Expect no match + the cat sat + +/(cat)|dog/I,literal,optimization_none + (cat)|dog +\= Expect no match + the cat sat + +# These should result in errors, since it is not permitted to use the +# PCRE2_NO_AUTO_POSSESS and PCRE2_NO_DOTSTAR_ANCHOR options on a literal pattern +/(cat)|dog/literal,no_auto_possess + +/(cat)|dog/literal,no_dotstar_anchor + +/a whole line/match_line,multiline + Rhubarb \na whole line\n custard +\= Expect no match + Not a whole line + +# Perl gets this wrong, failing to capture 'b' in group 1. + +/^(b+|a){1,2}?bc/ + bbc + +# And again here, for the "babc" subject string. + +/^(b*|ba){1,2}?bc/ + babc + bbabc + bababc +\= Expect no match + bababbc + babababc + +/[[:digit:]-a]/ + +/[[:digit:]-[:print:]]/ + +/[\d-a]/ + +/[\H-z]/ + +/[\d-[:print:]]/ + +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I + ab + aaab + +"(?=(a))\1?b"I + ab + aaab + +# JIT does not support callout_extra + +/(*NO_JIT)(a+)b/auto_callout,no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra + +/(*NO_JIT)a+(?C'XXX')b/no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra + +/\n/firstline + xyz\nabc + +/\nabc/firstline + xyz\nabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc + +/[abc]/firstline +\= Expect no match + \na + +# These tests are matched in test 1 as they are Perl compatible. Here we are +# looking at what does and does not get auto-possessified. + +/(?(DEFINE)(?a?))^(?&optional_a)a$/B + +/(?(DEFINE)(?a?)X)^(?&optional_a)a$/B + +/^(a?)b(?1)a/B + +/^(a?)+b(?1)a/B + +/^(a?)++b(?1)a/B + +/^(a?)+b/B + +/(?=a+)a(a+)++b/B + +/(?<=(?=.){4,5}x)/B + +# Perl behaves differently with these when optimization is turned off + +/a(*PRUNE:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy + +/a(*THEN:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy + +/(?^x-i)AB/ + +/(?^-i)AB/ + +/(?x-i-i)/ + +/(?(?=^))b/I + abc + +/(?(?=^)|)b/I + abc + +/(?(?=^)|^)b/I + bbc +\= Expect no match + abc + +/(?(1)^|^())/I + +/(?(1)^())b/I + +/(?(1)^())+b/I,aftertext + abc + +/(?(1)^()|^)+b/I,aftertext + bbc +\= Expect no match + abc + +/(?(1)^()|^)*b/I,aftertext + bbc + abc + xbc + +/(?(1)^())+b/I,aftertext + abc + +/(?(1)^a()|^a)+b/I,aftertext + abc +\= Expect no match + bbc + +/(?(1)^|^(a))+b/I,aftertext + abc +\= Expect no match + bbc + +/(?(1)^a()|^a)*b/I,aftertext + abc + bbc + xbc + +/a(b)c|xyz/g,allvector,replace=<$0> + abcdefabcpqr\=ovector=4 + abxyz\=ovector=4 + abcdefxyz\=ovector=4 + +/a(b)c|xyz/allvector + abcdef\=ovector=4 + abxyz\=ovector=4 + +/a(b)c|xyz/g,replace=<$0>,substitute_callout + abcdefabcpqr + abxyzpqrabcxyz + 12abc34xyz99abc55\=substitute_stop=2 + 12abc34xyz99abc55\=substitute_skip=1 + 12abc34xyz99abc55\=substitute_skip=2 + +/a(b)c|xyz/g,replace=<$0> + abcdefabcpqr + abxyzpqrabcxyz + 12abc34xyz\=substitute_stop=2 + 12abc34xyz\=substitute_skip=1 + +/a(b)c|xyz/replace=<$0> + abcdefabcpqr + 12abc34xyz\=substitute_skip=1 + 12abc34xyz\=substitute_stop=1 + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[1]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[5]1234 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[1]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + abc + +/a(b)c/substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + abc + +/abc\rdef/ + abc\ndef + +/abc\rdef\x{0d}xyz/escaped_cr_is_lf + abc\ndef\rxyz +\= Expect no match + abc\ndef\nxyz + +/(?(*ACCEPT)xxx)/ + +/(?(*atomic:xx)xxx)/ + +/(?(*script_run:xxx)zzz)/ + +/foobar/ + the foobar thing\=copy_matched_subject + the foobar thing\=copy_matched_subject,zero_terminate + +/foobar/g + the foobar thing foobar again\=copy_matched_subject + +/(*:XX)^abc/I + +/(*COMMIT:XX)^abc/I + +/(*ACCEPT:XX)^abc/I + +/abc/replace=xyz + abc\=null_context + +/abc/replace=xyz,substitute_callout + abc +\= Expect error message + abc\=null_context + +/\[()]{65535}()/expand + +/\[()]{65535}(?)/expand + +/a(?:(*ACCEPT))??bc/ + abc + axy + +/a(*ACCEPT)??bc/ + abc + axy + +/a(*ACCEPT:XX)??bc/mark + abc + axy + +/(*:\)?/ + +/(*:\Q \E){5}/alt_verbnames + +/(?=abc)/I + +/(?|(X)|(XY))\1abc/I + +/(?|(a)|(bcde))(c)\2/I + +/(?|(a)|(bcde))(c)\1/I + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'B'(?'A')/I,dupnames + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'A'(?'A')/I,dupnames + +/((a|)+)+Z/I + +/((?=a))[abcd]/I + +/A(?:(*ACCEPT))?B/info + +/(A(*ACCEPT)??B)C/ + ABC + AXY + +/(?<=(?<=a)b)c.*/I + abc\=ph +\= Expect no match + xbc\=ph + +/(?<=ab)c.*/I + abc\=ph +\= Expect no match + xbc\=ph + +/(?<=a(?<=a|a)c)/I + +/(?<=a(?<=a|ba)c)/I + +/(?<=(?<=a)b)(?.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + +/\A(*napla:.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + +/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + +/(*plb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + abcda\=offset=4 + +/(*naplb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + abcda\=offset=4 + +/(?<*(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + abcda\=offset=4 + +/(*non_atomic_positive_lookahead:ab)/B + +/(*non_atomic_positive_lookbehind:ab)/B + +/(*pla:ab+)/B + +/(*napla:ab+)/B + +/(*napla:)+/ + +/(*naplb:)+/ + +/(*napla:^x|^y)/I + +/(*napla:abc|abd)/I + +/(*napla:a|(.)(*ACCEPT)zz)\1../ + abcd + +/(*napla:a(*ACCEPT)zz|(.))\1../ + abcd + +/(*napla:a|(*COMMIT)(.))\1\1/ + aabc +\= Expect no match + abbc + +/(*napla:a|(.))\1\1/ + aabc + abbc + +/(*naplb:ab?c|PQ).../g + abcdefgacxyzPQR123 + +# ---- + +# Expect error (recursion => not fixed length) +/(\2)((?=(?<=\1)))/ + +/c*+(?<=[bc])/ + abc\=ph + ab\=ph + abc\=ps + ab\=ps + +/c++(?<=[bc])/ + abc\=ph + ab\=ph + +/(?<=(?=.(?<=x)))/ + abx + ab\=ph + bxyz + xyz + +/\z/ + abc\=ph + abc\=ps + +/\Z/ + abc\=ph + abc\=ps + abc\n\=ph + abc\n\=ps + +/(?![ab]).*/ + ab\=ph + +/c*+/ + ab\=ph,offset=2 + +/\A\s*(a|(?:[^`]{28500}){4})/I + a + +/\A\s*((?:[^`]{28500}){4})/I + +/\A\s*((?:[^`]{28500}){4}|a)/I + a + +/(?a)(?()b)((?<=b).*)/B + +/(?(1)b)((?<=b).*)/B + +/(?(R1)b)((?<=b).*)/B + +/(?(DEFINE)b)((?<=b).*)/B + +/(?(VERSION=10.3)b)((?<=b).*)/B + +/[aA]b[cC]/IB + +/[cc]abcd/I + +/[Cc]abcd/I + +/[c]abcd/I + +/(?:c|C)abcd/I + +/(a)?a/I + manm + +/^(?|(\*)(*napla:\S*_(\2?+.+))|(\w)(?=\S*_(\2?+\1)))+_\2$/ + *abc_12345abc + +/^(?|(\*)(*napla:\S*_(\3?+.+))|(\w)(?=\S*_((\2?+\1))))+_\2$/ + *abc_12345abc + +/^((\1+)(?C)|\d)+133X$/ + 111133X\=callout_capture + +/abc/replace=xyz,substitute_replacement_only + 123abc456 + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z,substitute_replacement_only + "abcde-abcde-" + +/a(b)c|xyz/g,replace=<$0>,substitute_callout,substitute_replacement_only + abcdefabcpqr + abxyzpqrabcxyz + 12abc34xyz99abc55\=substitute_stop=2 + 12abc34xyz99abc55\=substitute_skip=1 + 12abc34xyz99abc55\=substitute_skip=2 + +/a(..)d/replace=>$1<,substitute_matched + xyzabcdxyzabcdxyz + xyzabcdxyzabcdxyz\=ovector=2 +\= Expect error + xyzabcdxyzabcdxyz\=ovector=1 + +/a(..)d/g,replace=>$1<,substitute_matched + xyzabcdxyzabcdxyz + xyzabcdxyzabcdxyz\=ovector=2 +\= Expect error + xyzabcdxyzabcdxyz\=ovector=1 + xyzabcdxyzabcdxyz\=ovector=1,substitute_unset_empty + +/55|a(..)d/g,replace=>$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty +\= Expect error + xyz55abcdxyzabcdxyz\=ovector=2 + +/55|a(..)d/replace=>$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + +/55|a(..)d/replace=>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + +/55|a(..)d/g,replace=>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + +/abc/replace=,caseless + XabcY + XABCY + +/abc/replace=[4],caseless + XabcY + XABCY + +/abc/replace=*,caseless + XabcY + XABCY + XabcY\=replace= + +/abc/replace=\U$0,substitute_extended,substitute_case_callout + XabcY +\= Expect not supported + XabcY\=null_context + +/a/substitute_extended,substitute_case_callout + XaY\=replace=\U$0 + XaY\=replace=\L$0 + XaY\=replace=\u\L$0 + XaY\=replace=\l\U$0 + +# Expect non-fixed-length error + +"(?<=X(?(DEFINE)(.*))(?1))." + +/\sxxx\s/tables=1 +\= Expect no match + AB\x{85}xxx\x{a0}XYZ + +/\sxxx\s/tables=2 + AB\x{85}xxx\x{a0}XYZ + +/^\w+/tables=2 + École + +/^\w+/tables=3 + École + +#loadtables ./testbtables + +/^\w+/tables=3 + École + +/"(*MARK:>" 00 "<).."/hex,mark,no_start_optimize + AB + A\=ph +\= Expect no match + A + +/"(*MARK:>" 00 "<).(?C1)."/hex,mark,no_start_optimize + AB + +/(?(VERSION=0.0/ + +# Perl has made \K in lookarounds an error. PCRE2 now rejects as well, unless +# explicitly authorized. + +/(?=a\Kb)ab/ + +/(?=a\Kb)ab/allow_lookaround_bsk + ab + +/(?!a\Kb)ac/ + +/(?!a\Kb)ac/allow_lookaround_bsk + ac + +/^abc(?<=b\Kc)d/ + +/^abc(?<=b\Kc)d/allow_lookaround_bsk + abcd + +/^abc(?X<\=null_replacement + +/X+/replace=[20] + >XX<\=null_replacement + +# --------- + +/[Aa]{2}/BI + aabcd + +/A{2}/iBI + aabcd + +/[Aa]{2,3}/BI + aabcd + +-- + \[X]{-10} + +# Check imposition of maximum by match_data_create(). + +/abcd/ + abcd\=ovector=65536 + +# Use recurse to test \K and Mark in atomic scope. +/(?>this line\s*((?R)|)\K)/ + this line this line this line + +/(?>this line\s*((?R)|)(*MARK:A))/ + this line this line this line + +# Check use of NULL pattern with zero length. + +//null_pattern,use_length + abc + +//null_pattern + +/bad null pattern/null_pattern,use_length + +/bad null pattern/null_pattern + +# -------- Variable length lookbehinds -------- +/12345(?<=\d{1,256})X/ + +/(?<=(\d{1,256}))X/max_varlookbehind=256 + 12345XYZ + +/12345(?<=a?bc)X/max_varlookbehind=0 + +/12345(?<=abc)X/max_varlookbehind=0 + +/(?a)|(?Pb))(?P=same))+/g,dupnames + bbbaaabaabb + +# -------- + +/ +/anchored, firstline + \x0a + +/ +/anchored,firstline,no_start_optimize + \x0a + +/ +/firstline + \x0a + abc\x0adef + +/|a(?0)/endanchored + aaaa + +/A +/extended + +/(*ACCEPT)+/B,auto_callout + +/a\z/ + a + a\=noteol + +# This matches a character that only exists once in the subject, sort of like a +# hypothetical "(.)(?|b?)./B + +/(?<=xy|a.b?|cd)/B + +# Tests for scan substring, a non Perl feature of PCRE2 + +# Parse errors first + +/(*scs:/ + +/(*scan_substring:(/ + +/(*scs:('name'/ + +/(*scs:(1)a|b)/ + +/(*scs:(0)a)/ + +/(*scan_substring:(1)a|b)/ + +/(*scs:()a|b)/ + +/(*scan_substring:()a|b)/ + +/()(*scs:(1)+a)/ + +/()(*scs:(1,1,1,1,1,1,1,1,2))/ + +/()()(*scs:(1,2,1,2,1,2,2,'XYZ'))/ + +# Tests for iterating scan_substring + +/(a)(*scs:(1)b)*c/B + +/(a)(*scs:(1)b)*?c/B + +/(a)(*scs:(1)b)*+c/B + +/(a)(*scs:(1)b)+c/B + +/(a)(*scs:(1)b)+?c/B + +/(a)(*scs:(1)b)++c/B + +/(a)(*scs:(1)b)?c/B + +/(a)(*scs:(1)b)??c/B + +/(a)(*scs:(1)b)?+c/B + +/(a)(*scs:(1)b){3}c/B + +/(a)(*scs:(1)b){3,5}?c/B + +/(a)(*scs:(1)b){3,}+c/B + +/(\w++)=(?(*scs:(1)(abc))pqr|xyz)(\w++)/ + +# Tests for scan_substring + +/([a-z]++)(*scs:(1)(stx)|(ne))(.)/B + ##string##next!## + __aastxaa:__ + __abababab:__ + +/(?[a-z]++)##(*scan_substring:('XX').*(..)$)\2/B + ##abcd##abcd##cd## + ##abcd##abcd##abcd## + +/([a-z])([a-z]++)(#+)(*scs:(2)(ab.))/ + xab## + yabc### + zababc#### + +/(?:(?[a-z]++)|(?[0-9]++)|$)(*scan_substring:('YYY')((?.).*\k$))/dupnames + $$abacd$$112345$$abca$$ + $$abcdeaf$$1234567819$$123456781$$ + +/([a-zA-Z]+)(*scs:(1).*?(?[A-Z]+)(*scan_substring:('ABC').*(.)\3))#+/ + ##abABCtuTUVXz##abCDEFGxyCDEEFGhi## + ##abAABCtuTUVXXz!!abCDEFGxyCDEFGGhi## + +/([a-zA-Z]+)(*scs:(1)(xy|ab(*ACCEPT)cd))/B + ##cdefgh##cdeabxy## + +/(?[a-zA-Z]+)(*scs:('AA')(ab(*ACCEPT)cd|xy))/B + ##cdefgh##cdeabxy## + +/([a-z]++)##(*scs:(1)(abc))?!/ + ##xyz##abc##! + ##xyz##! + ##xyz## + +/([a-z]++)##(*scs:(1)(abc))??(?(2)!|:)/ + ##abc##abc##! + ##abc##xyz##: + ##abc### + +/([a-z]++)##(*scs:(1)(abc)|xyz){8}(?(2)!|:)/ + ##abc##abc##! + ##abc##xyz##: + ##nnn##! + ##nnn##: + +/[A-Z]{3}([A-Z]++)#(*scs:(1)(?<=BC)XY)#/ + ABCXY##AKCXY## + +/()(\w++)=(*scs:(2)(?=abc))(\w++)/ + xabcx=pqr. + +/(\d++)(*scs:(1)\d+\z)(\w+)/ + X123XYZ + +/(\d++)(*scs:(1)\d+\Z)(\w+)/ + X123XYZ + +/(\d++)(*scs:(1)\d+$)(\w+)/ + X123XYZ + +/([a-z]{2})[a-z](*scs:(1)(.*?))\2$/ + abcab + abcabc + +/^(([a-z]([a-z]*+))(*scs:(2).(?=(?1)|$)\3)|#){5}/ + abcdefg#hijk#! + abcdefg#hijk#lmnopqr# + +/(*scs:(1)a)(a)|x/ + a + x + +/(*scs:()a)(?a)(?b)(?c)(?d)|x/dupnames + abcd + x + +/(*scs:(1)a)?(a)/ + b + a + +/(*scs:(1)a)??(a)/ + b + a + +# Custom backtrack, goes back n - 1 characters in the input (n=8) +/x(?|(*scs:(1)(?<=(.)))|()){8}/ + abcdefghx + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE)x)).+|(.+)/ + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr)x)).+|(.+)/mark + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr))).+|(.+)/mark + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT)x)).+|(.+)/ + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr)x)).+|(.+)/mark + abcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr))).+|(.+)/mark + abcdef + +/(abc)(def)(*scs:(1)(*scs:(2)de(*SKIP)x)).+|(.+)/ + abcdefghi + +/(abc)(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(.+)/ + abcdefghi + +/(?<=(abc))(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(ef.+)/ + abcdefghi + +/(abc)(def)(*scs:(2)(?:(*scs:(1)abc(*SKIP:notfound)x|abcd|(abc)))).+/ + abcdefghi + +/(abc)(def)(*MARK:markstr)(*scs:(2)(?:(*scs:(1)abc(*SKIP:markstr)x))).+|(.+)/ + abcdefghi + +/^([a-z]++)(?:((?6))|((?7))|((?8))|(#))(?(DEFINE)((*scs:(1)abc(*PRUNE)d))((*scs:(1)abc(*COMMIT)e))((*scs:(1)abc(*SKIP)f)))/ + abcd# + abce# + abcf# + abc# + +/\b(\w++)(*scs:(1)^)/ + sausages and mash +\= Expect no match + !sausages and mash + +/(\b\w{3,}+\b)(*scs:(1)\W*+(?:((.)\W*+(?2)\W*+\3|)|((.)\W*+(?4)\W*+\5|\W*+.\W*+))\W*+$)/ig + ipsum lorem revel level able was I ere I saw Elba + +/(?:(?'A'a)|(?b))(*scs:('A')b)c/dupnames + abc + +# Relative reference +/(xyz)(abc)(*scs:(-1)abc)(*scs:(-2)\1)/ + >xyzabc< + +/^([a-z]++)#(*scs:(1)a|ab|abc|abcd|abcde|abcdef|(abcdefg))\2/ + abcdefg#abcdefg + +/^([a-z]++)(*scs:(1)(a+)(*THEN)b|(a+)(*THEN)c|(aa))/ + aaaax + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b)|(a+)(*THEN)c|(aa))/ + aaaax + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b))?/ + aaaax + +/^([a-z]++)(*scs:(1)(abc|(a+)(*THEN)b))?/ + aaaax + +/^(?:(.){20,30}#|([a-z]++)(*scs:(1)(a+)(*THEN)b){20,30}#|(.){20,30}!)/ + aaaaaaaaaaaaaaaaaaaaaaaaab! + +# List of captures + +/(?:(abc)|(?def)|ghi)(*scs:(1,'PP').(.))/B + abc + def + ghi + +/(?:(?abc)|(?def)|(ghi)|(?'NN'jkl)|mno)(*scs:('MM',3,).(.))/B,dupnames + abc + def + ghi + jkl + mno + +/f(?:(*scs:(+1,+2)(?<=(.)))|()){16}/ + 1234567890abcdef + 1ffffffffffffff + +/(?a)(*scan_substring:(1,'AA',1,)a)b/B + ab + ac + +/()()()(?<=ab(*scs:(1,2,3))cd)xyz/ + abcdxyz + +/()()()(?<=ab(*ACCEPT)(*scs:(1,2,3))cd|efg)xyz/ + abxyz + efgxyz + +# Tests for pcre2_set_optimize() + +/abc/I,optimization_none + +/abc/I,optimization_none,auto_possess + +/abc/I,optimization_none,dotstar_anchor,auto_possess + +/abc/I,optimization_none,start_optimize + +/abc/I,dotstar_anchor_off,optimization_full + +# If pcre2_set_optimize() is used to turn on some optimization, but at the same time, +# the compile options word turns it off... the compile options word "wins": + +/abc/I,no_auto_possess,auto_possess + +/abc/I,no_dotstar_anchor,dotstar_anchor + +/abc/I,no_start_optimize,start_optimize + +# -------------- + +# larger than GROUP_MAX, smaller than INT_MAX +/a\800000b/ + +# coming up on INT_MAX... (used to succeed with \8 being literal 8) +/a\800000000b/ + +# over INT_MAX (used to succeed with \8 being literal 8) +/a\8000000000b/ + +# -------------- + +# no_bs0 + +/a\0b\x00c\00d/ + a\x{00}b\x{00}c\x{00}d + +/a\0b/no_bs0 + +/b\x00c\00d/no_bs0 + b\x{00}c\x{00}d + +/abc/substitute_extended + abc\=replace=a\0b\x00c\00d + +/abc/substitute_extended,no_bs0 + abc\=replace=a\0b + abc\=replace=b\x00c\00d + +# python_octal + +/\0-\00-\01-\012-\0123-\123-\1234/ + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + +/\1/ + +/\12/ + \o{12} + +/abc/substitute_extended + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + abc\=replace=\1 + abc\=replace=\12 + +/\0-\00-\01-\012-\0123-\123-\1234/python_octal + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + +/\1/python_octal + +/\12/python_octal + +/abc/substitute_extended,python_octal + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + abc\=replace=\1 + abc\=replace=\12 + +# -------------- + +/a(?C)b/ + abc + abc\=callout_none + +/a(?C)b/never_callout + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[a[]/ + [ + +/[a[]/alt_extended_class + +/[a[B]/alt_extended_class + +/[a[B]]C/B,alt_extended_class + aC + BC +\= Expect no match + [C + +/[[A][B]]/B,alt_extended_class + A + B +\= Expect no match + [ + ] + +/[[A]||[B]]/B,alt_extended_class + A + B +\= Expect no match + C + +/[[^A][B]]/B,alt_extended_class + B + C +\= Expect no match + A + +/[^[A][B]]/B,alt_extended_class + C +\= Expect no match + A + B + +/[^[A]&&[B]]/B,alt_extended_class + A + B + C + +/[[AC]||[BC]]/B,alt_extended_class + A + B + C +\= Expect no match + D + +/[[AC]&&[BC]]/B,alt_extended_class + C +\= Expect no match + A + B + D + +/[[AC]--[BC]]/B,alt_extended_class + A +\= Expect no match + B + C + D + +/[[AC]~~[BC]]/B,alt_extended_class + A + B +\= Expect no match + C + D + +/[A[]]]/B,alt_extended_class + A + ] +\= Expect no match + [ + +/[A[^]]]/B,alt_extended_class + A + [ + C +\= Expect no match + ] + +/[A[]]/B,alt_extended_class,allow_empty_class + A +\= Expect no match + ] + [ + +/[A[^]]/B,alt_extended_class,allow_empty_class + A + C + [ + ] + +/[A-C--B]/B,alt_extended_class + A + C +\= Expect no match + B + +/[^A-C--B]/B,alt_extended_class + B +\= Expect no match + A + C + +/[[\d\D]--b]/B,alt_extended_class + a + c +\= Expect no match + b + +/[\dAC-E[:space:]&&[^z]]/B,alt_extended_class + 0 + A + C + D + E + \t +\= Expect no match + B + F + ; + +/[z||[^\dAC-E[:space:]]]/B,alt_extended_class + z + B + F + ; +\= Expect no match + 0 + A + C + D + E + \t + +/[ab||cd]/B,alt_extended_class + a + c +\= Expect no match + e + +/[[a]b||[c]d]/B,alt_extended_class + a + c +\= Expect no match + e + +/[a[b]||c[d]]/B,alt_extended_class + a + c +\= Expect no match + e + +/[-&&-]/B,alt_extended_class + - +\= Expect no match + a + +/[a-&&-a]/B,alt_extended_class + - + a +\= Expect no match + b + +/[-a&&a-]/B,alt_extended_class + - + a +\= Expect no match + b + +/[[a]-&&-[a]]/B,alt_extended_class + - + a +\= Expect no match + b + +/[-[a]&&[a]-]/B,alt_extended_class + - + a +\= Expect no match + b + +/(?xx:[ ^ a[ ^ b] ])/B,alt_extended_class + b +\= Expect no match + A + a + c + +/[ ^ a[ ^ b] ]/B,alt_extended_class + \x20 + ^ + a + b +\= Expect no match + c + +/[a-c--b]+/B,alt_extended_class + ac + a +\= Expect no match + b + +/[a-c--b]{2,3}/B,alt_extended_class + ac + cac +\= Expect no match + a + bb + +/x[a-c--b]+y/B,alt_extended_class + xacy + xaay + xay +\= Expect no match + zacy + xacz + xy + xby + +/[A--B--C--D]/B,alt_extended_class + A +\= Expect no match + B + +/[A--A--A]/B,alt_extended_class +\= Expect no match + A + B + +/[[A--A]--A]/B,alt_extended_class +\= Expect no match + A + B + +/[A--[A--A]]/B,alt_extended_class + A +\= Expect no match + B + +/[A--^B]/B,alt_extended_class + A +\= Expect no match + B + ^ + z + +/([a-z--n])\1/B,alt_extended_class + aa + zz +\= Expect no match + az + nn + +/(x[a-z--n]y)\1/B,alt_extended_class + xayxay + xzyxzy +\= Expect no match + xnyxny + +/(?:_\1|([a-z--n])){2}/B,alt_extended_class + a_a + z_z +\= Expect no match + a_z + n_n + +/(?:_\1|([a-z--n]))+/B,alt_extended_class + a_a + z_z + a_partial +\= Expect no match + n_n + +/[\d-[z]]/B,alt_extended_class + 1 + - + z + +/[\d-||z]/B,alt_extended_class + 1 + - + z + +/[z[\d-]]/B,alt_extended_class + 1 + - + z + +/[1-[z]]/B,alt_extended_class + 1 + - + z + +/[1-||z]/B,alt_extended_class + 1 + - + z + +/[z[1-]]/B,alt_extended_class + 1 + - + z + +/[a--/alt_extended_class + +/[a--a/alt_extended_class + +/[a--[a/alt_extended_class + +/[a--[a]/alt_extended_class + +/[a--[a]--/alt_extended_class + +/[a--]/alt_extended_class + +/[--a]/alt_extended_class + +/[^--a]/alt_extended_class + +/[--]/alt_extended_class + +/[a---b]/alt_extended_class + +/[a----b]/alt_extended_class + +/[a&&&b]/alt_extended_class + +/[a|||b]/alt_extended_class + +/[a~~~b]/alt_extended_class + +/[a~~~~b]/alt_extended_class + +/[a~~/alt_extended_class + +/[a~~~/alt_extended_class + +/[a~~~~/alt_extended_class + +/[a||b&&c]/alt_extended_class + +/[a||b~~c]/alt_extended_class + +/[a~~b&&c]/alt_extended_class + +/[a--b~~c]/alt_extended_class + +/[a--b&&c]/alt_extended_class + +/[a||b--c]/alt_extended_class + +/[a||[b--c]]/alt_extended_class + a + b +\= Expect no match + c + +/[\d-z]/B,alt_extended_class + +/[z-\d]/B,alt_extended_class + +/[abc -- b]+/B,alt_extended_class + acacbac + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a]]]]]]]]]]]]]]]/alt_extended_class + a +\= Expect no match + b + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[z]&&a]]]]]]]]]]]]]]]/alt_extended_class + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a[z]]]]]]]]]]]]]]]]/alt_extended_class + +/[z&/alt_extended_class + +/[[^]~~[^]]/B,alt_extended_class,allow_empty_class +\= Expect no match + a + +/[^[[^]~~[^]]]/B,alt_extended_class,allow_empty_class + a + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +# allow-empty-class does nothing inside (?[...]) +/(?[ []] ])/B,allow_empty_class + ] + +# bad-escape-is-literal does nothing inside (?[...]) +/[ \j ]/ + +/[ /\ + +/(?[ \j ])/ + +/(?[ /\ + +/[ \j ]/bad_escape_is_literal + j +\= Expect no match + k + +/[ /\bad_escape_is_literal + +/(?[ \j ])/bad_escape_is_literal + +/(?[ /\bad_escape_is_literal + +/(?[ [\j] ])/bad_escape_is_literal + +/(?[ (\j) ])/bad_escape_is_literal + +# We can't test error cases in testinput1 + +/(?[])/ + +/(?[/ + +/(?[]/ + +/(?[\n/ + +/(?[\n]/ + +/(?[\n]z)/ + +/(?[\n] )/ + +/(?[(/ + +/(?[( / + +/(?[(\n/ + +/(?[ \n + () ])/ + +/(?[1])/ + +/(?[a])/ + +/(?[a-c])/ + +/(?[(])/ + +/(?[(\n])/ + +/(?[\n)])/ + +/(?[^\n])/ + +/(?[ \n \t ])/ + +/(?[ \d \t ])/ + +/(?[ [\n] \t ])/ + +/(?[ (\n) \t ])/ + +/(?[ [:alpha:] \t ])/ + +/(?[ \n + \t \d ])/ + +/(?[ !\n \t ])/ + +/(?[ \n [:alpha:] ])/ + +/(?[ \n [\d] ])/ + +/(?[ \n (\t) ])/ + +/(?[ \n !\t ])/ + +/(?[ \n \t ])/ + +/(?[:graph:])/ + +/(?[\Qn\E])/ + +# maximum depth tests + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&\n))))))))))))))])/ + \n +\= Expect no match + a + b + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+([\n]&\n))))))))))))))])/ + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&[\n]))))))))))))))])/ + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+((\n)&\n))))))))))))))])/ + +# -------------- + +/[[:digit:] -Z]/xx + +/[\d -Z]/xx + +/[[:digit:]\E-H]/ + +/[[:digit:]\Q\E-H]+/ + +/[z-[:space:]]/ + +/[z-\d]/ + +/[[:space:]-z]/ + +/[\d-z]/ + +/[\d-\w]/ + +/[\Q/ + +/[\Q/\ + +/[\Q\E/ + +/[\Q\n/ + +/[\Q\n]/ + +/[\Q\n/\ + +/[\Q\n\]/ + +/[\Q\n\E/ + +/[\Q\n\E]/ + \\ + n +\= Expect no match + \n + Q + +/[z\Q/ + +/[z\Q/\ + +/[z\Q\E/ + +/[/\ + +/[\n/ + +/[\E/ + +/[\^z]/B + +/[ \^]/B + +/[\\z]/B + +/[0-z]/B + +/[0\-z]/B + +/[]z]/B + +/[ \]]/B + +/[ --]/B + +/[A-\]]/B + +/[A-\\]/B + +/[\A]/ + +/[\Z]/ + +/[\z]/ + +/[\G]/ + +/[\K]/ + +/[\g<1>]/ + < + g +\= Expect no match + \\ + +/[\k<1>]/ + < + k +\= Expect no match + \\ + +/[\u{ 1z}]/alt_bsux,extra_alt_bsux + u + { + } + \x20 + 1 +\= Expect no match + \\ + +/[a\x{e1}]/iB + a + A + \x{e1} + +# -------------- + +# Attempt at full coverage of the substitution buffer-management code - not +# just covering each line in each macro, but covering each instantiation of each +# line in those macros. + +# +# CHECKMEMCPY tests +# +# Four conditions for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Additionally some CHECKMEMCPYs have a substitute_replacement_only branch. +# + +# pre-start-offset fragment +# no "overflow after previous overflow" condition +/a/ + XYaZ\=offset=2,replace=foo + XYaZ\=offset=2,replace=[1]foo + XYaZ\=offset=2,substitute_overflow_length,replace=[1]foo + XYaZ\=offset=2,substitute_replacement_only,replace=foo + +# pre-match fragment +/a/ + XYaZ\=replace=foo + XYaZ\=replace=[1]foo + XYaZ\=substitute_overflow_length,replace=[1]foo + XXYaZ\=offset=2,substitute_overflow_length,replace=[1]foo + XYaZ\=substitute_replacement_only,replace=foo + +# empty match bumpalong +/(?<=abc)(|DEF)/g + abcDEFabcZ\=replace=+ + abcDEFabcZ\=replace=[5]+ + abcDEFabcZ\=substitute_overflow_length,replace=[5]+ + abcDEFabcZ\=replace=[9]+ + abcDEFabcZ\=substitute_overflow_length,replace=[9]+ + abcDEFabcZ\=substitute_overflow_length,replace=[1]+ + abcDEFabcZ\=substitute_replacement_only,replace=+ + +# literal replacement +/a/ + XYaZ\=substitute_literal,replace=$0 + XYaZ\=substitute_literal,replace=[3]$0 + XYaZ\=substitute_literal,substitute_overflow_length,replace=[3]$0 + XYaZ\=substitute_literal,substitute_overflow_length,replace=[1]$0 + +# a MARK +/(*:pear)apple/ + XappleY\=replace=${*MARK} + XappleY\=replace=[3]${*MARK} + XappleY\=substitute_overflow_length,replace=[3]${*MARK} + XXappleY\=substitute_overflow_length,replace=[1]${*MARK} + +# a subject fragment +/a(bb)c/ + XabbcY\=replace=$1 + XabbcY\=replace=[2]$1 + XabbcY\=substitute_overflow_length,replace=[2]$1 + XXabbcY\=substitute_overflow_length,replace=[1]$1 + +# a zero-length subject fragment +/a()c/ + XacY\=replace=$1 + XacY\=replace=[2]$1 + XacY\=substitute_overflow_length,replace=[2]$1 + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\x{48} + XabcY\=replace=[1]\x{48} + XabcY\=substitute_overflow_length,replace=[1]\x{48} + XXabcY\=substitute_overflow_length,replace=[1]\x{48} + +# a replacement literal character +/abc/ + XabcY\=replace=Z + XabcY\=replace=[1]Z + XabcY\=substitute_overflow_length,replace=[1]Z + XXabcY\=substitute_overflow_length,replace=[1]Z + +# a cancelled substitution +# no "overflow after previous overflow" condition +/abc/substitute_skip=1 + XabcY\=replace=Z + XabcY\=replace=[3]Z + XabcY\=substitute_overflow_length,replace=[3]Z + XabcY\=substitute_replacement_only,replace=Z + +# the rest of the subject +/abc/ + XabcYY\=replace=Z + XabcYY\=replace=[3]Z + XabcYY\=substitute_overflow_length,replace=[3]Z + XabcYY\=substitute_overflow_length,replace=[1]Z + XabcYY\=substitute_replacement_only,replace=Z + +# the trailing NULL +/abc/ + XabcY\=replace=Z + XabcY\=replace=[3]Z + XabcY\=substitute_overflow_length,replace=[3]Z + XabcY\=substitute_overflow_length,replace=[1]Z + +# +# CHECKCASECPY tests +# +# The same four conditions for CHECKCASECPY as for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Also the condition where CHECKCASECPY isn't called due to a custom callout +# + +# a MARK +/(*:pear)apple/substitute_extended + XappleY\=replace=\U${*MARK} + XappleY\=replace=[3]\U${*MARK} + XappleY\=substitute_overflow_length,replace=[3]\U${*MARK} + XXappleY\=substitute_overflow_length,replace=[1]\U${*MARK} + XappleY\=substitute_case_callout,replace=\U${*MARK} + +# a subject fragment +/a(bb)c/substitute_extended + XabbcY\=replace=\U$1 + XabbcY\=replace=[2]\U$1 + XabbcY\=substitute_overflow_length,replace=[2]\U$1 + XXabbcY\=substitute_overflow_length,replace=[1]\U$1 + XabbcY\=substitute_case_callout,replace=\U$1 + +# a zero-length subject fragment +/a()c/substitute_extended + XacY\=replace=\U$1 + XacY\=replace=[2]\U$1 + XacY\=substitute_overflow_length,replace=[2]\U$1 + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\U\x{48} + XabcY\=replace=[1]\U\x{48} + XabcY\=substitute_overflow_length,replace=[1]\U\x{48} + XXabcY\=substitute_overflow_length,replace=[1]\U\x{48} + XabcY\=substitute_case_callout,replace=\U\x{48} + +# a replacement literal character +/abc/substitute_extended + XabcY\=replace=\UZ + XabcY\=replace=[1]\UZ + XabcY\=substitute_overflow_length,replace=[1]\UZ + XXabcY\=substitute_overflow_length,replace=[1]\UZ + XabcY\=substitute_case_callout,replace=\UZ + +# +# DELAYEDFORCECASE tests +# +# Some different triggering conditions for DELAYEDFORCECASE: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# if there was a previous overflow, then the case callout can't be invoked +# Also, the CASEERROR branch. +# Also, the branch for where chars_outstanding is zero, both with and without +# a previous overflow. +# + +# on set casing mode +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=\Uf\Lq + XabcY\=replace=[2]\Uf\Lq + XabcY\=substitute_overflow_length,replace=[2]\Uf\Lq + XabcY\=substitute_overflow_length,replace=[1]\Uf\Lq + XabcY\=replace=\U!\Lq + XabcY\=replace=\U\Lq + XXabcY\=substitute_overflow_length,replace=[1]\U\Lq + +# trailing fragment +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=f + XabcY\=replace=\Uf + XabcY\=replace=[2]\Uf + XabcY\=substitute_overflow_length,replace=[2]\Uf + XabcY\=substitute_overflow_length,replace=[1]\Uf + XabcY\=replace=\U! + XabcY\=replace=\U + XXabcY\=substitute_overflow_length,replace=[1]\U + +# +# do_case_copy tests +# + +/aa/i,substitute_extended + XaaY\=replace=\Uaa\uaa\LAA\lAA\U\lAA\L\uaa\u\LaaA\l\UAAa + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lAA + XaaY\=replace=[2]\lAA + XaaY\=replace=[3]\lAA + XaaY\=replace=[4]\lAA + XaaY\=replace=[5]\lAA + XAAY\=replace=[1]\l$0 + XAAY\=replace=[2]\l$0 + XAAY\=replace=[3]\l$0 + XAAY\=replace=[4]\l$0 + XAAY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UAa + XaaY\=replace=[2]\l\UAa + XaaY\=replace=[3]\l\UAa + XaaY\=replace=[4]\l\UAa + XaaY\=replace=[5]\l\UAa + XAaY\=replace=[1]\l\U$0 + XAaY\=replace=[2]\l\U$0 + XAaY\=replace=[3]\l\U$0 + XAaY\=replace=[4]\l\U$0 + XAaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaA + XaaY\=replace=[2]\u\LaA + XaaY\=replace=[3]\u\LaA + XaaY\=replace=[4]\u\LaA + XaaY\=replace=[5]\u\LaA + XaAY\=replace=[1]\u\L$0 + XaAY\=replace=[2]\u\L$0 + XaAY\=replace=[3]\u\L$0 + XaAY\=replace=[4]\u\L$0 + XaAY\=replace=[5]\u\L$0 + +/aa/i,substitute_extended,substitute_overflow_length + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lAA + XaaY\=replace=[2]\lAA + XaaY\=replace=[3]\lAA + XaaY\=replace=[4]\lAA + XaaY\=replace=[5]\lAA + XAAY\=replace=[1]\l$0 + XAAY\=replace=[2]\l$0 + XAAY\=replace=[3]\l$0 + XAAY\=replace=[4]\l$0 + XAAY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UAa + XaaY\=replace=[2]\l\UAa + XaaY\=replace=[3]\l\UAa + XaaY\=replace=[4]\l\UAa + XaaY\=replace=[5]\l\UAa + XAaY\=replace=[1]\l\U$0 + XAaY\=replace=[2]\l\U$0 + XAaY\=replace=[3]\l\U$0 + XAaY\=replace=[4]\l\U$0 + XAaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaA + XaaY\=replace=[2]\u\LaA + XaaY\=replace=[3]\u\LaA + XaaY\=replace=[4]\u\LaA + XaaY\=replace=[5]\u\LaA + XaAY\=replace=[1]\u\L$0 + XaAY\=replace=[2]\u\L$0 + XaAY\=replace=[3]\u\L$0 + XaAY\=replace=[4]\u\L$0 + XaAY\=replace=[5]\u\L$0 + +/aa/i,substitute_extended,substitute_case_callout + XaaY\=replace=\Uaa\uaa\LBB\lBB\U\lBB\L\uaa\u\LaaB\l\UBBa + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lBB + XaaY\=replace=[2]\lBB + XaaY\=replace=[3]\lBB + XaaY\=replace=[4]\lBB + XaaY\=replace=[5]\lBB + XBBY\=replace=[1]\l$0 + XBBY\=replace=[2]\l$0 + XBBY\=replace=[3]\l$0 + XBBY\=replace=[4]\l$0 + XBBY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UBa + XaaY\=replace=[2]\l\UBa + XaaY\=replace=[3]\l\UBa + XaaY\=replace=[4]\l\UBa + XaaY\=replace=[5]\l\UBa + XBaY\=replace=[1]\l\U$0 + XBaY\=replace=[2]\l\U$0 + XBaY\=replace=[3]\l\U$0 + XBaY\=replace=[4]\l\U$0 + XBaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaB + XaaY\=replace=[2]\u\LaB + XaaY\=replace=[3]\u\LaB + XaaY\=replace=[4]\u\LaB + XaaY\=replace=[5]\u\LaB + XaBY\=replace=[1]\u\L$0 + XaBY\=replace=[2]\u\L$0 + XaBY\=replace=[3]\u\L$0 + XaBY\=replace=[4]\u\L$0 + XaBY\=replace=[5]\u\L$0 + +/aa/i,substitute_extended,substitute_case_callout,substitute_overflow_length + XaaY\=replace=[1]\uaa + XaaY\=replace=[2]\uaa + XaaY\=replace=[3]\uaa + XaaY\=replace=[4]\uaa + XaaY\=replace=[5]\uaa + XaaY\=replace=[1]\u$0 + XaaY\=replace=[2]\u$0 + XaaY\=replace=[3]\u$0 + XaaY\=replace=[4]\u$0 + XaaY\=replace=[5]\u$0 + XaaY\=replace=[1]\lBB + XaaY\=replace=[2]\lBB + XaaY\=replace=[3]\lBB + XaaY\=replace=[4]\lBB + XaaY\=replace=[5]\lBB + XBBY\=replace=[1]\l$0 + XBBY\=replace=[2]\l$0 + XBBY\=replace=[3]\l$0 + XBBY\=replace=[4]\l$0 + XBBY\=replace=[5]\l$0 + XaaY\=replace=[1]\l\UBa + XaaY\=replace=[2]\l\UBa + XaaY\=replace=[3]\l\UBa + XaaY\=replace=[4]\l\UBa + XaaY\=replace=[5]\l\UBa + XBaY\=replace=[1]\l\U$0 + XBaY\=replace=[2]\l\U$0 + XBaY\=replace=[3]\l\U$0 + XBaY\=replace=[4]\l\U$0 + XBaY\=replace=[5]\l\U$0 + XaaY\=replace=[1]\u\LaB + XaaY\=replace=[2]\u\LaB + XaaY\=replace=[3]\u\LaB + XaaY\=replace=[4]\u\LaB + XaaY\=replace=[5]\u\LaB + XaBY\=replace=[1]\u\L$0 + XaBY\=replace=[2]\u\L$0 + XaBY\=replace=[3]\u\L$0 + XaBY\=replace=[4]\u\L$0 + XaBY\=replace=[5]\u\L$0 + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l\U!a + XaaY\=replace=\l\Ua! + XaaY\=replace=\ufa + XaaY\=replace=[3]\ufa + XaaY\=replace=\l\Uaoo + XaaY\=replace=[4]\l\Uaoo + XaaY\=replace=\l\UPa + XaaY\=replace=[3]\l\UPa + XaaY\=replace=[4]\l\UPa + XaaY\=replace=\l\UPo + XaaY\=replace=[3]\l\UPo + XaaY\=replace=[4]\l\UPo + XaaY\=replace=\l\UPpp + XaaY\=replace=[4]\l\UPpp + XaaY\=replace=[5]\l\UPpp + +# +# special test-callback case transformation tests +# + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l! + XaaY\=replace=\ua\lB + XaaY\=replace=\LdDZ\UdDZ\ud\uD\uZ + XaaY\=replace=\uf\Uf\Lf\Us\Ls\uS\lS + XaaY\=replace=\LOO\LOQ\UOO\uo\lo + XaaY\=replace=\upq\upp\lpp\Upp\Lpp\lP\uP + XaaY\=replace=\ll\ul\Ul\LMmNn\UMmNn + XaaY\=replace=\Uac\Uaca\Uak\Uaka\Lck\LBK\LBKB\LBK \UK + Xaay\=replace=\u\Lqj\u\Lij\u\LIj\u\LiJ\u\LIJ\u\Liq\u\Lij\Uij\UiIjJ\LiIjJ + Xaay\=replace=\Uaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + +# -------------- + +# End of testinput2 --- pcre2-10.45.orig/.pc/CVE-2025-58050.patch/testdata/testoutput2 +++ pcre2-10.45/.pc/CVE-2025-58050.patch/testdata/testoutput2 @@ -0,0 +1,21830 @@ +# This set of tests is not Perl-compatible. It checks on special features +# of PCRE2's API, error diagnostics, and the compiled code of some patterns. +# It also checks the non-Perl syntax that PCRE2 supports (Python, .NET, +# Oniguruma). There are also some tests where PCRE2 and Perl differ, +# either because PCRE2 can't be compatible, or there is a possible Perl +# bug. + +# NOTE: This is a non-UTF set of tests. When UTF support is needed, use +# test 5. + +#forbid_utf +#newline_default lf any anycrlf + +# Test binary zeroes in the pattern + +# /a\0B/ where 0 is a binary zero +/61 5c 00 62/B,hex +------------------------------------------------------------------ + Bra + a\x00b + Ket + End +------------------------------------------------------------------ + a\x{0}b + 0: a\x00b + +# /a0b/ where 0 is a binary zero +/61 00 62/B,hex +------------------------------------------------------------------ + Bra + a\x00b + Ket + End +------------------------------------------------------------------ + a\x{0}b + 0: a\x00b + +# /(?#B0C)DE/ where 0 is a binary zero +/28 3f 23 42 00 43 29 44 45/B,hex +------------------------------------------------------------------ + Bra + DE + Ket + End +------------------------------------------------------------------ + DE + 0: DE + +/(a)b|/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + +/abc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + defabc + 0: abc + abc\=anchored + 0: abc +\= Expect no match + defabc\=anchored +No match + ABC +No match + +/^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + abc + 0: abc + abc\=anchored + 0: abc +\= Expect no match + defabc +No match + defabc\=anchored +No match + +/a+bc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/a*bc/I +Capture group count = 0 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 2 + +/a{3}bc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 5 + +/(abc|a+z)/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 2 + +/^abc$/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + abc + 0: abc +\= Expect no match + def\nabc +No match + +/ab\idef/ +Failed: error 103 at offset 3: unrecognized character follows \ + +/(?X)ab\idef/ +Failed: error 111 at offset 2: unrecognized character after (? or (?- + +/x{5,4}/ +Failed: error 104 at offset 5: numbers out of order in {} quantifier + +/z{65536}/ +Failed: error 105 at offset 7: number too big in {} quantifier + +/[abcd/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[\B]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\R]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\X]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[z-a]/ +Failed: error 108 at offset 3: range out of order in character class + +/^*/ +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + +/(abc/ +Failed: error 114 at offset 4: missing closing parenthesis + +/(?# abc/ +Failed: error 118 at offset 7: missing ) after (?# comment + +/(?z)abc/ +Failed: error 111 at offset 2: unrecognized character after (? or (?- + +/.*b/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 1 + +/.*?b/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 1 + +/cat|dog|elephant/I +Capture group count = 0 +Starting code units: c d e +Subject length lower bound = 3 + this sentence eventually mentions a cat + 0: cat + this sentences rambles on and on for a while and then reaches elephant + 0: elephant + +/cat|dog|elephant/I +Capture group count = 0 +Starting code units: c d e +Subject length lower bound = 3 + this sentence eventually mentions a cat + 0: cat + this sentences rambles on and on for a while and then reaches elephant + 0: elephant + +/cat|dog|elephant/Ii +Capture group count = 0 +Options: caseless +Starting code units: C D E c d e +Subject length lower bound = 3 + this sentence eventually mentions a CAT cat + 0: CAT + this sentences rambles on and on for a while to elephant ElePhant + 0: elephant + +/a|[bcd]/I +Capture group count = 0 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a|[^\dZ])/I +Capture group count = 1 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y [ \ ] ^ _ ` a b c d + e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 + \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 + \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 + \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 + \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf + \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/(a|b)*[\s]/I +Capture group count = 1 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b +Subject length lower bound = 1 + +/(ab\2)/ +Failed: error 115 at offset 4: reference to non-existent subpattern + +/{4,5}abc/ +Failed: error 109 at offset 4: quantifier does not follow a repeatable item + +/(a)(b)(c)\2/I +Capture group count = 3 +Max back reference = 2 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 4 + abcb + 0: abcb + 1: a + 2: b + 3: c + abcb\=ovector=0 + 0: abcb + 1: a + 2: b + 3: c + abcb\=ovector=1 +Matched, but too many substrings + 0: abcb + abcb\=ovector=2 +Matched, but too many substrings + 0: abcb + 1: a + abcb\=ovector=3 +Matched, but too many substrings + 0: abcb + 1: a + 2: b + abcb\=ovector=4 + 0: abcb + 1: a + 2: b + 3: c + +/(a)bc|(a)(b)\2/I +Capture group count = 3 +Max back reference = 2 +First code unit = 'a' +Subject length lower bound = 3 + abc + 0: abc + 1: a + abc\=ovector=0 + 0: abc + 1: a + abc\=ovector=1 +Matched, but too many substrings + 0: abc + abc\=ovector=2 + 0: abc + 1: a + aba + 0: aba + 1: + 2: a + 3: b + aba\=ovector=0 + 0: aba + 1: + 2: a + 3: b + aba\=ovector=1 +Matched, but too many substrings + 0: aba + aba\=ovector=2 +Matched, but too many substrings + 0: aba + 1: + aba\=ovector=3 +Matched, but too many substrings + 0: aba + 1: + 2: a + aba\=ovector=4 + 0: aba + 1: + 2: a + 3: b + +/abc$/I,dollar_endonly +Capture group count = 0 +Options: dollar_endonly +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc +\= Expect no match + abc\n +No match + abc\ndef +No match + +/(a)(b)(c)(d)(e)\6/ +Failed: error 115 at offset 16: reference to non-existent subpattern + +/the quick brown fox/I +Capture group count = 0 +First code unit = 't' +Last code unit = 'x' +Subject length lower bound = 19 + the quick brown fox + 0: the quick brown fox + this is a line with the quick brown fox + 0: the quick brown fox + +/the quick brown fox/I,anchored +Capture group count = 0 +Options: anchored +First code unit = 't' +Subject length lower bound = 19 + the quick brown fox + 0: the quick brown fox +\= Expect no match + this is a line with the quick brown fox +No match + +/ab(?z)cd/ +Failed: error 111 at offset 4: unrecognized character after (? or (?- + +/^abc|def/I +Capture group count = 0 +Starting code units: a d +Subject length lower bound = 3 + abcdef + 0: abc + abcdef\=notbol + 0: def + +/.*((abc)$|(def))/I +Capture group count = 3 +First code unit at start or follows newline +Subject length lower bound = 3 + defabc + 0: defabc + 1: abc + 2: abc + defabc\=noteol + 0: def + 1: def + 2: + 3: def + +/)/ +Failed: error 122 at offset 0: unmatched closing parenthesis + +/a[]b/ +Failed: error 106 at offset 4: missing terminating ] for character class + +/[^aeiou ]{3,}/I +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 + 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ + \ ] ^ _ ` b c d f g h j k l m n p q r s t v w x y z { | } ~ \x7f \x80 \x81 + \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 + \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f + \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 3 + co-processors, and for + 0: -pr + +/<.*>/I +Capture group count = 0 +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: ghi + +/<.*?>/I +Capture group count = 0 +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: + +/<.*>/I,ungreedy +Capture group count = 0 +Options: ungreedy +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: + +/(?U)<.*>/I +Capture group count = 0 +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: + +/<.*?>/I,ungreedy +Capture group count = 0 +Options: ungreedy +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + abcghinop + 0: ghi + +/={3,}/I,ungreedy +Capture group count = 0 +Options: ungreedy +First code unit = '=' +Last code unit = '=' +Subject length lower bound = 3 + abc========def + 0: === + +/(?U)={3,}?/I +Capture group count = 0 +First code unit = '=' +Last code unit = '=' +Subject length lower bound = 3 + abc========def + 0: ======== + +/(? +Overall options: anchored +First code unit = '1' +Subject length lower bound = 4 + +/(^b|(?i)^d)/I +Capture group count = 1 +Compile options: +Overall options: anchored +Starting code units: D b d +Subject length lower bound = 1 + +/(?s).*/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/[abcd]/I +Capture group count = 0 +Starting code units: a b c d +Subject length lower bound = 1 + +/(?i)[abcd]/I +Capture group count = 0 +Starting code units: A B C D a b c d +Subject length lower bound = 1 + +/(?m)[xy]|(b|c)/I +Capture group count = 1 +Starting code units: b c x y +Subject length lower bound = 1 + +/(^a|^b)/Im +Capture group count = 1 +Options: multiline +First code unit at start or follows newline +Subject length lower bound = 1 + +/(?i)(^a|^b)/Im +Capture group count = 1 +Options: multiline +First code unit at start or follows newline +Subject length lower bound = 1 + +/(a)(?(1)a|b|c)/ +Failed: error 127 at offset 3: conditional subpattern contains more than two branches + +/(?(?=a)a|b|c)/ +Failed: error 127 at offset 0: conditional subpattern contains more than two branches + +/(?(1a)/ +Failed: error 124 at offset 4: missing closing parenthesis for condition + +/(?(1a))/ +Failed: error 124 at offset 4: missing closing parenthesis for condition + +/(?(?i))/ +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) + +/(?(abc))/ +Failed: error 115 at offset 3: reference to non-existent subpattern + +/(?(? +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + aaaaabbbbbcccccdef + 0: aaaaabbbbbcccccdef + 1: aaaaabbbbbcccccdef + 2: aaaaa + 3: b + 4: bbbbccccc + 5: def + +/(?<=foo)[ab]/I +Capture group count = 0 +Max lookbehind = 3 +Starting code units: a b +Subject length lower bound = 1 + +/(?^abc)/Im +Capture group count = 0 +Options: multiline +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + def\nabc + 0: abc +\= Expect no match + defabc +No match + +/(?<=ab(c+)d)ef/ +Failed: error 125 at offset 0: length of lookbehind assertion is not limited + +/(?<=ab(?<=c+)d)ef/ +Failed: error 125 at offset 6: length of lookbehind assertion is not limited + +/The next three are in testinput2 because they have variable length branches/ + +/(?<=bullock|donkey)-cart/I +Capture group count = 0 +Max lookbehind = 7 +First code unit = '-' +Last code unit = 't' +Subject length lower bound = 5 + the bullock-cart + 0: -cart + a donkey-cart race + 0: -cart +\= Expect no match + cart +No match + horse-and-cart +No match + +/(?<=ab(?i)x|y|z)/I +Capture group count = 0 +Max lookbehind = 3 +May match empty string +Subject length lower bound = 0 + +/(?>.*)(?<=(abcd)|(xyz))/I +Capture group count = 2 +Max lookbehind = 4 +May match empty string +Subject length lower bound = 0 + alphabetabcd + 0: alphabetabcd + 1: abcd + endingxyz + 0: endingxyz + 1: + 2: xyz + +/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I +Capture group count = 0 +Max lookbehind = 4 +First code unit = 'Z' +Last code unit = 'Z' +Subject length lower bound = 2 + abxyZZ + 0: ZZ + abXyZZ + 0: ZZ + ZZZ + 0: ZZ + zZZ + 0: ZZ + bZZ + 0: ZZ + BZZ + 0: ZZ +\= Expect no match + ZZ +No match + abXYZZ +No match + zzz +No match + bzz +No match + +/(? +Overall options: anchored +Starting code units: a b +Subject length lower bound = 4 + adef\=get=1,get=2,get=3,get=4,getall + 0: adef + 1: a + 2: + 3: f + 1G a (1) +Get substring 2 failed (-55): requested value is not set + 3G f (1) +Get substring 4 failed (-49): unknown substring + 0L adef + 1L a + 2L + 3L f + bcdef\=get=1,get=2,get=3,get=4,getall + 0: bcdef + 1: bc + 2: bc + 3: f + 1G bc (2) + 2G bc (2) + 3G f (1) +Get substring 4 failed (-49): unknown substring + 0L bcdef + 1L bc + 2L bc + 3L f + adefghijk\=copy=0 + 0: adef + 1: a + 2: + 3: f + 0C adef (4) + +/^abc\00def/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 7 + abc\00def\=copy=0,getall + 0: abc\x00def + 0C abc\x00def (7) + 0L abc\x00def + +/word ((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ +)((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ )((?:[a-zA-Z0-9]+ +)?)?)?)?)?)?)?)?)?otherword/I +Capture group count = 8 +Contains explicit CR or LF match +First code unit = 'w' +Last code unit = 'd' +Subject length lower bound = 14 + +/.*X/IB +------------------------------------------------------------------ + Bra + Any* + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'X' +Subject length lower bound = 1 + +/.*X/IBs +------------------------------------------------------------------ + Bra + AllAny* + X + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'X' +Subject length lower bound = 1 + +/(.*X|^B)/IB +------------------------------------------------------------------ + Bra + CBra 1 + Any* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit at start or follows newline +Subject length lower bound = 1 + +/(.*X|^B)/IBs +------------------------------------------------------------------ + Bra + CBra 1 + AllAny* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 1 + +/(?s)(.*X|^B)/IB +------------------------------------------------------------------ + Bra + CBra 1 + AllAny* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + +/(?s:.*X|^B)/IB +------------------------------------------------------------------ + Bra + Bra + AllAny* + X + Alt + ^ + B + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + +/\Biss\B/I,aftertext +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + +/iss/I,aftertext,altglobal +Capture group count = 0 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + 0: iss + 0+ ippi + +/\Biss\B/I,aftertext,altglobal +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + +/\Biss\B/Ig,aftertext +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + 0: iss + 0+ ippi +\= Expect no match + Mississippi\=anchored +No match + +/(?<=[Ms])iss/Ig,aftertext +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + 0: iss + 0+ ippi + +/(?<=[Ms])iss/I,aftertext,altglobal +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'i' +Last code unit = 's' +Subject length lower bound = 3 + Mississippi + 0: iss + 0+ issippi + +/^iss/Ig,aftertext +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'i' +Subject length lower bound = 3 + ississippi + 0: iss + 0+ issippi + +/.*iss/Ig,aftertext +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 's' +Subject length lower bound = 3 + abciss\nxyzisspqr + 0: abciss + 0+ \x0axyzisspqr + 0: xyziss + 0+ pqr + +/.i./Ig,aftertext +Capture group count = 0 +Last code unit = 'i' +Subject length lower bound = 3 + Mississippi + 0: Mis + 0+ sissippi + 0: sis + 0+ sippi + 0: sip + 0+ pi + Mississippi\=anchored + 0: Mis + 0+ sissippi + 0: sis + 0+ sippi + 0: sip + 0+ pi + Missouri river + 0: Mis + 0+ souri river + 0: ri + 0+ river + 0: riv + 0+ er + Missouri river\=anchored + 0: Mis + 0+ souri river + +/^.is/Ig,aftertext +Capture group count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 3 + Mississippi + 0: Mis + 0+ sissippi + +/^ab\n/Ig,aftertext +Capture group count = 0 +Contains explicit CR or LF match +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + ab\nab\ncd + 0: ab\x0a + 0+ ab\x0acd + +/^ab\n/Igm,aftertext +Capture group count = 0 +Contains explicit CR or LF match +Options: multiline +First code unit at start or follows newline +Last code unit = \x0a +Subject length lower bound = 3 + ab\nab\ncd + 0: ab\x0a + 0+ ab\x0acd + 0: ab\x0a + 0+ cd + +/^/gm,newline=any + a\rb\nc\r\nxyz\=aftertext + 0: + 0+ a\x0db\x0ac\x0d\x0axyz + 0: + 0+ b\x0ac\x0d\x0axyz + 0: + 0+ c\x0d\x0axyz + 0: + 0+ xyz + +/abc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc|bac/I +Capture group count = 0 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(abc|bac)/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(abc|(c|dc))/I +Capture group count = 2 +Starting code units: a c d +Last code unit = 'c' +Subject length lower bound = 1 + +/(abc|(d|de)c)/I +Capture group count = 2 +Starting code units: a d +Last code unit = 'c' +Subject length lower bound = 2 + +/a*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/a+/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/(baa|a+)/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'a' +Subject length lower bound = 1 + +/a{0,3}/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/baa{3,}/I +Capture group count = 0 +First code unit = 'b' +Last code unit = 'a' +Subject length lower bound = 5 + +/"([^\\"]+|\\.)*"/I +Capture group count = 1 +First code unit = '"' +Last code unit = '"' +Subject length lower bound = 2 + +/(abc|ab[cd])/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 3 + +/(a|.)/I +Capture group count = 1 +Subject length lower bound = 1 + +/a|ba|\w/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/abc(?=pqr)/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'r' +Subject length lower bound = 3 + +/...(?<=abc)/I +Capture group count = 0 +Max lookbehind = 3 +Subject length lower bound = 3 + +/abc(?!pqr)/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/ab./I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/ab[xyz]/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/abc*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/ab.c*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/a.c*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 2 + +/.c*/I +Capture group count = 0 +Subject length lower bound = 1 + +/ac*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/(a.c*|b.c*)/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 2 + +/a.c*|aba/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 2 + +/.+a/I +Capture group count = 0 +Last code unit = 'a' +Subject length lower bound = 2 + +/(?=abcda)a.*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 2 + +/(?=a)a.*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/a(b)*/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/a\d*/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/ab\d*/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/a(\d)*/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/abcde{0,0}/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 + +/ab\d+/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + +/a(?(1)b)(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Subject length lower bound = 2 + +/a(?(1)bag|big)(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'g' +Subject length lower bound = 5 + +/a(?(1)bag|big)*(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Subject length lower bound = 2 + +/a(?(1)bag|big)+(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'g' +Subject length lower bound = 5 + +/a(?(1)b..|b..)(.)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 5 + +/ab\d{0}e/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 3 + +/a?b?/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + a + 0: a + b + 0: b + ab + 0: ab + \ + 0: +\= Expect no match + \=notempty +No match + +/|-/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + abcd + 0: + -abc + 0: + ab-c\=notempty + 0: - +\= Expect no match + abc\=notempty +No match + +/^.?abcd/I +Capture group count = 0 +Compile options: +Overall options: anchored +Last code unit = 'd' +Subject length lower bound = 4 + +/\( # ( at start + (?: # Non-capturing bracket + (?>[^()]+) # Either a sequence of non-brackets (no backtracking) + | # Or + (?R) # Recurse - i.e. nested bracketed string + )* # Zero or more contents + \) # Closing ) + /Ix +Capture group count = 0 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (abcd) + 0: (abcd) + (abcd)xyz + 0: (abcd) + xyz(abcd) + 0: (abcd) + (ab(xy)cd)pqr + 0: (ab(xy)cd) + (ab(xycd)pqr + 0: (xycd) + () abc () + 0: () + 12(abcde(fsh)xyz(foo(bar))lmno)89 + 0: (abcde(fsh)xyz(foo(bar))lmno) +\= Expect no match + abcd +No match + abcd) +No match + (abcd +No match + +/\( ( (?>[^()]+) | (?R) )* \) /Igx +Capture group count = 1 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd)pqr + 0: (ab(xy)cd) + 1: cd + 1(abcd)(x(y)z)pqr + 0: (abcd) + 1: abcd + 0: (x(y)z) + 1: z + +/\( (?: (?>[^()]+) | (?R) ) \) /Ix +Capture group count = 0 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 3 + (abcd) + 0: (abcd) + (ab(xy)cd) + 0: (xy) + (a(b(c)d)e) + 0: (c) + ((ab)) + 0: ((ab)) +\= Expect no match + () +No match + +/\( (?: (?>[^()]+) | (?R) )? \) /Ix +Capture group count = 0 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + () + 0: () + 12(abcde(fsh)xyz(foo(bar))lmno)89 + 0: (fsh) + +/\( ( (?>[^()]+) | (?R) )* \) /Ix +Capture group count = 1 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: cd + +/\( ( ( (?>[^()]+) | (?R) )* ) \) /Ix +Capture group count = 2 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: ab(xy)cd + 2: cd + +/\( (123)? ( ( (?>[^()]+) | (?R) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: + 2: ab(xy)cd + 3: cd + (123ab(xy)cd) + 0: (123ab(xy)cd) + 1: 123 + 2: ab(xy)cd + 3: cd + +/\( ( (123)? ( (?>[^()]+) | (?R) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: ab(xy)cd + 2: + 3: cd + (123ab(xy)cd) + 0: (123ab(xy)cd) + 1: 123ab(xy)cd + 2: 123 + 3: cd + +/\( (((((((((( ( (?>[^()]+) | (?R) )* )))))))))) \) /Ix +Capture group count = 11 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(xy)cd) + 0: (ab(xy)cd) + 1: ab(xy)cd + 2: ab(xy)cd + 3: ab(xy)cd + 4: ab(xy)cd + 5: ab(xy)cd + 6: ab(xy)cd + 7: ab(xy)cd + 8: ab(xy)cd + 9: ab(xy)cd +10: ab(xy)cd +11: cd + +/\( ( ( (?>[^()<>]+) | ((?>[^()]+)) | (?R) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (abcd(xyz

qrs)123) + 0: (abcd(xyz

qrs)123) + 1: abcd(xyz

qrs)123 + 2: 123 + +/\( ( ( (?>[^()]+) | ((?R)) )* ) \) /Ix +Capture group count = 3 +Options: extended +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 2 + (ab(cd)ef) + 0: (ab(cd)ef) + 1: ab(cd)ef + 2: ef + 3: (cd) + (ab(cd(ef)gh)ij) + 0: (ab(cd(ef)gh)ij) + 1: ab(cd(ef)gh)ij + 2: ij + 3: (cd(ef)gh) + +/^[[:alnum:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:^alnum:]]/IB +------------------------------------------------------------------ + Bra + ^ + [^0-9A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 + \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 + \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 + \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 + \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[[:alpha:]]/IB +------------------------------------------------------------------ + Bra + ^ + [A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:^alpha:]]/IB +------------------------------------------------------------------ + Bra + ^ + [^A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +Subject length lower bound = 1 + +/[_[:alpha:]]/I +Capture group count = 0 +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:ascii:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-\x7f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ + \x7f +Subject length lower bound = 1 + +/^[[:^ascii:]]/IB +------------------------------------------------------------------ + Bra + ^ + [^\x00-\x7f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a + \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 + \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 + \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 + \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 + \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 + \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 + \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 + \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[[:blank:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x09 ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x09 \x20 +Subject length lower bound = 1 + +/^[[:^blank:]]/IB +------------------------------------------------------------------ + Bra + ^ + [^\x09 ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[\n\x0b\x0c\x0d[:blank:]]/I +Capture group count = 0 +Contains explicit CR or LF match +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/^[[:cntrl:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x00-\x1f\x7f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x7f +Subject length lower bound = 1 + +/^[[:digit:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 1 + +/^[[:graph:]]/IB +------------------------------------------------------------------ + Bra + ^ + [!-~] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : + ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ +Subject length lower bound = 1 + +/^[[:lower:]]/IB +------------------------------------------------------------------ + Bra + ^ + [a-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:print:]]/IB +------------------------------------------------------------------ + Bra + ^ + [ -~] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ +Subject length lower bound = 1 + +/^[[:punct:]]/IB +------------------------------------------------------------------ + Bra + ^ + [!-/:-@[-`{-~] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ + _ ` { | } ~ +Subject length lower bound = 1 + +/^[[:space:]]/IB +------------------------------------------------------------------ + Bra + ^ + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/^[[:upper:]]/IB +------------------------------------------------------------------ + Bra + ^ + [A-Z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +Subject length lower bound = 1 + +/^[[:xdigit:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9A-Fa-f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f +Subject length lower bound = 1 + +/^[[:word:]]/IB +------------------------------------------------------------------ + Bra + ^ + [0-9A-Z_a-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/^[[:^cntrl:]]/IB +------------------------------------------------------------------ + Bra + ^ + [^\x00-\x1f\x7f] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 + 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] + ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x80 \x81 + \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 + \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f + \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae + \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd + \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[12[:^digit:]]/IB +------------------------------------------------------------------ + Bra + ^ + [^03-9] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 1 2 : ; < + = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a + b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/^[[:^blank:]]/IB +------------------------------------------------------------------ + Bra + ^ + [^\x09 ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b + \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a + \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 + : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ + _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 + \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f + \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e + \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad + \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc + \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb + \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda + \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 + \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 + \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + +/[01[:alpha:]%]/IB +------------------------------------------------------------------ + Bra + [%01A-Za-z] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: % 0 1 A B C D E F G H I J K L M N O P Q R S T U V W + X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + +/[[.ch.]]/I +Failed: error 113 at offset 7: POSIX collating elements are not supported + +/[[=ch=]]/I +Failed: error 113 at offset 7: POSIX collating elements are not supported + +/[[:rhubarb:]]/I +Failed: error 130 at offset 12: unknown POSIX class name + +/[[:upper:]]/Ii +Capture group count = 0 +Options: caseless +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + A + 0: A + a + 0: a + +/[[:lower:]]/Ii +Capture group count = 0 +Options: caseless +Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 + A + 0: A + a + 0: a + +/((?-i)[[:lower:]])[[:lower:]]/Ii +Capture group count = 1 +Options: caseless +Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 2 + ab + 0: ab + 1: a + aB + 0: aB + 1: a +\= Expect no match + Ab +No match + AB +No match + +/[\200-\110]/I +Failed: error 108 at offset 9: range out of order in character class + +/^(?(0)f|b)oo/I +Failed: error 115 at offset 5: reference to non-existent subpattern + +# This one's here because of the large output vector needed + +/(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\d+(?:\s|$))(\w+)\s+(\270)/I +Capture group count = 271 +Max back reference = 270 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 1 + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC\=ovector=300 + 0: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 ABC ABC + 1: 1 + 2: 2 + 3: 3 + 4: 4 + 5: 5 + 6: 6 + 7: 7 + 8: 8 + 9: 9 +10: 10 +11: 11 +12: 12 +13: 13 +14: 14 +15: 15 +16: 16 +17: 17 +18: 18 +19: 19 +20: 20 +21: 21 +22: 22 +23: 23 +24: 24 +25: 25 +26: 26 +27: 27 +28: 28 +29: 29 +30: 30 +31: 31 +32: 32 +33: 33 +34: 34 +35: 35 +36: 36 +37: 37 +38: 38 +39: 39 +40: 40 +41: 41 +42: 42 +43: 43 +44: 44 +45: 45 +46: 46 +47: 47 +48: 48 +49: 49 +50: 50 +51: 51 +52: 52 +53: 53 +54: 54 +55: 55 +56: 56 +57: 57 +58: 58 +59: 59 +60: 60 +61: 61 +62: 62 +63: 63 +64: 64 +65: 65 +66: 66 +67: 67 +68: 68 +69: 69 +70: 70 +71: 71 +72: 72 +73: 73 +74: 74 +75: 75 +76: 76 +77: 77 +78: 78 +79: 79 +80: 80 +81: 81 +82: 82 +83: 83 +84: 84 +85: 85 +86: 86 +87: 87 +88: 88 +89: 89 +90: 90 +91: 91 +92: 92 +93: 93 +94: 94 +95: 95 +96: 96 +97: 97 +98: 98 +99: 99 +100: 100 +101: 101 +102: 102 +103: 103 +104: 104 +105: 105 +106: 106 +107: 107 +108: 108 +109: 109 +110: 110 +111: 111 +112: 112 +113: 113 +114: 114 +115: 115 +116: 116 +117: 117 +118: 118 +119: 119 +120: 120 +121: 121 +122: 122 +123: 123 +124: 124 +125: 125 +126: 126 +127: 127 +128: 128 +129: 129 +130: 130 +131: 131 +132: 132 +133: 133 +134: 134 +135: 135 +136: 136 +137: 137 +138: 138 +139: 139 +140: 140 +141: 141 +142: 142 +143: 143 +144: 144 +145: 145 +146: 146 +147: 147 +148: 148 +149: 149 +150: 150 +151: 151 +152: 152 +153: 153 +154: 154 +155: 155 +156: 156 +157: 157 +158: 158 +159: 159 +160: 160 +161: 161 +162: 162 +163: 163 +164: 164 +165: 165 +166: 166 +167: 167 +168: 168 +169: 169 +170: 170 +171: 171 +172: 172 +173: 173 +174: 174 +175: 175 +176: 176 +177: 177 +178: 178 +179: 179 +180: 180 +181: 181 +182: 182 +183: 183 +184: 184 +185: 185 +186: 186 +187: 187 +188: 188 +189: 189 +190: 190 +191: 191 +192: 192 +193: 193 +194: 194 +195: 195 +196: 196 +197: 197 +198: 198 +199: 199 +200: 200 +201: 201 +202: 202 +203: 203 +204: 204 +205: 205 +206: 206 +207: 207 +208: 208 +209: 209 +210: 210 +211: 211 +212: 212 +213: 213 +214: 214 +215: 215 +216: 216 +217: 217 +218: 218 +219: 219 +220: 220 +221: 221 +222: 222 +223: 223 +224: 224 +225: 225 +226: 226 +227: 227 +228: 228 +229: 229 +230: 230 +231: 231 +232: 232 +233: 233 +234: 234 +235: 235 +236: 236 +237: 237 +238: 238 +239: 239 +240: 240 +241: 241 +242: 242 +243: 243 +244: 244 +245: 245 +246: 246 +247: 247 +248: 248 +249: 249 +250: 250 +251: 251 +252: 252 +253: 253 +254: 254 +255: 255 +256: 256 +257: 257 +258: 258 +259: 259 +260: 260 +261: 261 +262: 262 +263: 263 +264: 264 +265: 265 +266: 266 +267: 267 +268: 268 +269: 269 +270: ABC +271: ABC + +# This one's here because Perl does this differently and PCRE2 can't at present + +/(main(O)?)+/I +Capture group count = 2 +First code unit = 'm' +Last code unit = 'n' +Subject length lower bound = 4 + mainmain + 0: mainmain + 1: main + mainOmain + 0: mainOmain + 1: main + 2: O + +# These are all cases where Perl does it differently (nested captures) + +/^(a(b)?)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 1 + aba + 0: aba + 1: a + 2: b + +/^(aa(bb)?)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + +/^(aa|aa(bb))+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + +/^(aa(bb)??)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + +/^(?:aa(bb)?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: bb + +/^(aa(b(b))?)+$/I +Capture group count = 3 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: aa + 2: bb + 3: b + +/^(?:aa(b(b))?)+$/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: bb + 2: b + +/^(?:aa(b(?:b))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: bb + +/^(?:aa(bb(?:b))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: bbb + +/^(?:aa(b(?:bb))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: bbb + +/^(?:aa(?:b(b))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbaa + 0: aabbaa + 1: b + +/^(?:aa(?:b(bb))?)+$/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: bb + +/^(aa(b(bb))?)+$/I +Capture group count = 3 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbaa + 0: aabbbaa + 1: aa + 2: bbb + 3: bb + +/^(aa(bb(bb))?)+$/I +Capture group count = 3 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + aabbbbaa + 0: aabbbbaa + 1: aa + 2: bbbb + 3: bb + +# ---------------- + +/#/IBx +------------------------------------------------------------------ + Bra + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Options: extended +Subject length lower bound = 0 + +/a#/IBx +------------------------------------------------------------------ + Bra + a + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + +/[\s]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/[\S]/IB +------------------------------------------------------------------ + Bra + [^\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +Subject length lower bound = 1 + +/a(?i)b/IB +------------------------------------------------------------------ + Bra + a + /i b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' (caseless) +Subject length lower bound = 2 + ab + 0: ab + aB + 0: aB +\= Expect no match + AB +No match + +/(a(?i)b)/IB +------------------------------------------------------------------ + Bra + CBra 1 + a + /i b + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' (caseless) +Subject length lower bound = 2 + ab + 0: ab + 1: ab + aB + 0: aB + 1: aB +\= Expect no match + AB +No match + +/ (?i)abc/IBx +------------------------------------------------------------------ + Bra + /i abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' (caseless) +Last code unit = 'c' (caseless) +Subject length lower bound = 3 + +/#this is a comment + (?i)abc/IBx +------------------------------------------------------------------ + Bra + /i abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended +First code unit = 'a' (caseless) +Last code unit = 'c' (caseless) +Subject length lower bound = 3 + +/123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB +------------------------------------------------------------------ + Bra + 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = '1' +Last code unit = '0' +Subject length lower bound = 300 + +/\Q123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/IB +------------------------------------------------------------------ + Bra + 123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = '1' +Last code unit = '0' +Subject length lower bound = 300 + +/\Q\E/IB +------------------------------------------------------------------ + Bra + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + \ + 0: + +/\Q\Ex/IB +------------------------------------------------------------------ + Bra + x + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Subject length lower bound = 1 + +/ \Q\E/IB +------------------------------------------------------------------ + Bra + + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = ' ' +Subject length lower bound = 1 + +/a\Q\E/IB +------------------------------------------------------------------ + Bra + a + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + abc + 0: a + bca + 0: a + bac + 0: a + +/a\Q\Eb/IB +------------------------------------------------------------------ + Bra + ab + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + abc + 0: ab + +/\Q\Eabc/IB +------------------------------------------------------------------ + Bra + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/x*+\w/IB +------------------------------------------------------------------ + Bra + x*+ + \w + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Subject length lower bound = 1 +\= Expect no match + xxxxx +No match + +/x?+/IB +------------------------------------------------------------------ + Bra + x?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/x++/IB +------------------------------------------------------------------ + Bra + x++ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Subject length lower bound = 1 + +# For comparison with the following test, which disables auto-possessification +# In this regex, x+ should be converted to x++ +/x+y/B,auto_possess +------------------------------------------------------------------ + Bra + x++ + y + Ket + End +------------------------------------------------------------------ + +# In this regex, x+ should not be converted to x++ +/x+y/B,auto_possess_off +------------------------------------------------------------------ + Bra + x+ + y + Ket + End +------------------------------------------------------------------ + +# Also in this regex, x+ should not be converted to x++ +/x+y/B,optimization_none +------------------------------------------------------------------ + Bra + x+ + y + Ket + End +------------------------------------------------------------------ + +# In this one too, x+ should not be converted to x++ +/x+y/B,no_auto_possess +------------------------------------------------------------------ + Bra + x+ + y + Ket + End +------------------------------------------------------------------ + +/x{1,3}+/B,no_auto_possess +------------------------------------------------------------------ + Bra + x + x{0,2}+ + Ket + End +------------------------------------------------------------------ + +/x{1,3}+/Bi,no_auto_possess +------------------------------------------------------------------ + Bra + /i x + /i x{0,2}+ + Ket + End +------------------------------------------------------------------ + +/[^x]{1,3}+/B,no_auto_possess +------------------------------------------------------------------ + Bra + [^x] (not) + [^x]{0,2}+ (not) + Ket + End +------------------------------------------------------------------ + +/[^x]{1,3}+/Bi,no_auto_possess +------------------------------------------------------------------ + Bra + /i [^x] (not) + /i [^x]{0,2}+ (not) + Ket + End +------------------------------------------------------------------ + +/x{1,3}+/IB,auto_possess_off +------------------------------------------------------------------ + Bra + x + x{0,2}+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Optimizations: dotstar_anchor,start_optimize +First code unit = 'x' +Subject length lower bound = 1 + +/(x)*+/IB +------------------------------------------------------------------ + Bra + Braposzero + CBraPos 1 + x + KetRpos + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + +/^(\w++|\s++)*$/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + now is the time for all good men to come to the aid of the party + 0: now is the time for all good men to come to the aid of the party + 1: party +\= Expect no match + this is not a line with only words and spaces! +No match + +/(\d++)(\w)/I +Capture group count = 2 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 2 + 12345a + 0: 12345a + 1: 12345 + 2: a +\= Expect no match + 12345+ +No match + +/a++b/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaab + 0: aaab + +/(a++b)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaab + 0: aaab + 1: aaab + +/(a++)b/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaab + 0: aaab + 1: aaa + +/([^()]++|\([^()]*\))+/I +Capture group count = 1 +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( * + , - . / 0 1 2 3 4 5 + 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e + \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d + \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac + \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb + \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca + \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 + \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 + \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 + \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Subject length lower bound = 1 + ((abc(ade)ufh()()x + 0: abc(ade)ufh()()x + 1: x + +/\(([^()]++|\([^()]+\))+\)/I +Capture group count = 1 +First code unit = '(' +Last code unit = ')' +Subject length lower bound = 3 + (abc) + 0: (abc) + 1: abc + (abc(def)xyz) + 0: (abc(def)xyz) + 1: xyz +\= Expect no match + ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +No match + +/(abc){1,3}+/IB +------------------------------------------------------------------ + Bra + Once + CBra 1 + abc + Ket + Brazero + Bra + CBra 1 + abc + Ket + Brazero + CBra 1 + abc + Ket + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/a+?+/I +Failed: error 109 at offset 3: quantifier does not follow a repeatable item + +/a{2,3}?+b/I +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/(?U)a+?+/I +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/a{2,3}?+b/I,ungreedy +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/x(?U)a++b/IB +------------------------------------------------------------------ + Bra + x + a++ + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + xaaaab + 0: xaaaab + +/(?U)xa++b/IB +------------------------------------------------------------------ + Bra + x + a++ + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + xaaaab + 0: xaaaab + +/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/IB +------------------------------------------------------------------ + Bra + ^ + CBra 1 + CBra 2 + a+ + Ket + CBra 3 + [ab]+? + Ket + CBra 4 + [bc]+ + Ket + CBra 5 + \w*+ + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 5 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/^x(?U)a+b/IB +------------------------------------------------------------------ + Bra + ^ + x + a++ + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + +/^x(?U)(a+)b/IB +------------------------------------------------------------------ + Bra + ^ + x + CBra 1 + a+? + Ket + b + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'x' +Last code unit = 'b' +Subject length lower bound = 3 + +/[.x.]/I +Failed: error 113 at offset 0: POSIX collating elements are not supported + +/[=x=]/I +Failed: error 113 at offset 0: POSIX collating elements are not supported + +/[:x:]/I +Failed: error 112 at offset 0: POSIX named classes are supported only within a class + +/\F/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\l/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\L/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\N{name}/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\u/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\U/I +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\N{4}/ + abcdefg + 0: abcd + +/\N{,}/ +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/\N{25,ab}/ +Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/[\N]/ +Failed: error 171 at offset 3: \N is not supported in a class + +/[\N{4}]/ +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/[\N{name}]/ +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + +/a{1,3}b/ungreedy + ab + 0: ab + +/[/I +Failed: error 106 at offset 1: missing terminating ] for character class + +/[a-/I +Failed: error 106 at offset 3: missing terminating ] for character class + +/[[:space:]/I +Failed: error 106 at offset 10: missing terminating ] for character class + +/[\s]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/[[:space:]]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d ] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 +Subject length lower bound = 1 + +/[[:space:]abcde]/IB +------------------------------------------------------------------ + Bra + [\x09-\x0d a-e] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b c d e +Subject length lower bound = 1 + +/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/Ix +Capture group count = 0 +Options: extended +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + <> + 0: <> + + 0: + hij> + 0: hij> + hij> + 0: + def> + 0: def> + + 0: <> +\= Expect no match + iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB +------------------------------------------------------------------ + Bra + 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X + \b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Max lookbehind = 1 +First code unit = '8' +Last code unit = 'X' +Subject length lower bound = 409 + +/\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b/IB +------------------------------------------------------------------ + Bra + $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDDqmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X + \b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Max lookbehind = 1 +First code unit = '$' +Last code unit = 'X' +Subject length lower bound = 404 + +/(.*)\d+\1/I +Capture group count = 1 +Max back reference = 1 +Subject length lower bound = 1 + +/(.*)\d+/I +Capture group count = 1 +First code unit at start or follows newline +Subject length lower bound = 1 + +/(.*)\d+\1/Is +Capture group count = 1 +Max back reference = 1 +Options: dotall +Subject length lower bound = 1 + +/(.*)\d+/Is +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 1 + +/(.*(xyz))\d+\2/I +Capture group count = 2 +Max back reference = 2 +First code unit at start or follows newline +Last code unit = 'z' +Subject length lower bound = 7 + +/((.*))\d+\1/I +Capture group count = 2 +Max back reference = 1 +Subject length lower bound = 1 + abc123bc + 0: bc123bc + 1: bc + 2: bc + +/a[b]/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/(?=a).*/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Subject length lower bound = 1 + +/(?=abc).xyz/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Last code unit = 'z' (caseless) +Subject length lower bound = 4 + +/(?=abc)(?i).xyz/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'z' (caseless) +Subject length lower bound = 4 + +/(?=a)(?=b)/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Subject length lower bound = 1 + +/(?=.)a/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=abcda)a)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 2 + +/((?=abcda)ab)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/()a/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/(?:(?=.)|(?abcdef + 0 ^ ^ d + 0: abcdef + 1234abcdef +--->1234abcdef + 0 ^ ^ d + 0: abcdef +\= Expect no match + abcxyz +No match + abcxyzf +--->abcxyzf + 0 ^ ^ d +No match + +/abc(?C)de(?C1)f/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + 123abcdef +--->123abcdef + 0 ^ ^ d + 1 ^ ^ f + 0: abcdef + +/(?C1)\dabc(?C2)def/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'f' +Subject length lower bound = 7 + 1234abcdef +--->1234abcdef + 1 ^ \d + 1 ^ \d + 1 ^ \d + 1 ^ \d + 2 ^ ^ d + 0: 4abcdef +\= Expect no match + abcdef +No match + +/(?C1)\dabc(?C2)def/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'f' +Subject length lower bound = 7 + 1234abcdef +--->1234abcdef + 1 ^ \d + 1 ^ \d + 1 ^ \d + 1 ^ \d + 2 ^ ^ d + 0: 4abcdef +\= Expect no match + abcdef +No match + +/(?C255)ab/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/(?C256)ab/I +Failed: error 138 at offset 6: number after (?C is greater than 255 + +/(?Cab)xx/I +Failed: error 182 at offset 3: unrecognized string delimiter follows (?C + +/(?C12vr)x/I +Failed: error 139 at offset 5: closing parenthesis for (?C expected + +/abc(?C)def/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + \x83\x0\x61bcdef +--->\x83\x00abcdef + 0 ^ ^ d + 0: abcdef + +/(abc)(?C)de(?C1)f/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + 123abcdef +--->123abcdef + 0 ^ ^ d + 1 ^ ^ f + 0: abcdef + 1: abc + 123abcdef\=callout_capture +Callout 0: last capture = 1 + 1: abc +--->123abcdef + ^ ^ d +Callout 1: last capture = 1 + 1: abc +--->123abcdef + ^ ^ f + 0: abcdef + 1: abc + 123abcdefC-\=callout_none + 0: abcdef + 1: abc +\= Expect no match + 123abcdef\=callout_fail=1 +--->123abcdef + 0 ^ ^ d + 1 ^ ^ f +No match + +/(?C0)(abc(?C1))*/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + abcabcabc +--->abcabcabc + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* + 0: abcabcabc + 1: abc + abcabc\=callout_fail=1:4 +--->abcabc + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 0: abcabc + 1: abc + abcabcabc\=callout_fail=1:4 +--->abcabcabc + 0 ^ ( + 1 ^ ^ )* + 1 ^ ^ )* + 1 ^ ^ )* + 0: abcabc + 1: abc + +/(\d{3}(?C))*/I +Capture group count = 1 +May match empty string +Subject length lower bound = 0 + 123\=callout_capture +Callout 0: last capture = 0 +--->123 + ^ ^ )* + 0: 123 + 1: 123 + 123456\=callout_capture +Callout 0: last capture = 0 +--->123456 + ^ ^ )* +Callout 0: last capture = 1 + 1: 123 +--->123456 + ^ ^ )* + 0: 123456 + 1: 456 + 123456789\=callout_capture +Callout 0: last capture = 0 +--->123456789 + ^ ^ )* +Callout 0: last capture = 1 + 1: 123 +--->123456789 + ^ ^ )* +Callout 0: last capture = 1 + 1: 456 +--->123456789 + ^ ^ )* + 0: 123456789 + 1: 789 + +/((xyz)(?C)p|(?C1)xyzabc)/I +Capture group count = 2 +First code unit = 'x' +Subject length lower bound = 4 + xyzabc\=callout_capture +Callout 0: last capture = 2 + 1: + 2: xyz +--->xyzabc + ^ ^ p +Callout 1: last capture = 0 +--->xyzabc + ^ x + 0: xyzabc + 1: xyzabc + +/(X)((xyz)(?C)p|(?C1)xyzabc)/I +Capture group count = 3 +First code unit = 'X' +Last code unit = 'x' +Subject length lower bound = 5 + Xxyzabc\=callout_capture +Callout 0: last capture = 3 + 1: X + 2: + 3: xyz +--->Xxyzabc + ^ ^ p +Callout 1: last capture = 1 + 1: X +--->Xxyzabc + ^^ x + 0: Xxyzabc + 1: X + 2: xyzabc + +/(?=(abc))(?C)abcdef/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + abcdef\=callout_capture +Callout 0: last capture = 1 + 1: abc +--->abcdef + ^ a + 0: abcdef + 1: abc + +/(?!(abc)(?C1)d)(?C2)abcxyz/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 6 + abcxyz\=callout_capture +Callout 1: last capture = 1 + 1: abc +--->abcxyz + ^ ^ d +Callout 2: last capture = 0 +--->abcxyz + ^ a + 0: abcxyz + +/(?<=(abc)(?C))xyz/I +Capture group count = 1 +Max lookbehind = 3 +First code unit = 'x' +Last code unit = 'z' +Subject length lower bound = 3 + abcxyz\=callout_capture +Callout 0: last capture = 1 + 1: abc +--->abcxyz + ^ ) + 0: xyz + 1: abc + +/a(b+)(c*)(?C1)/I +Capture group count = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 +\= Expect no match + abbbbbccc\=callout_data=1 +--->abbbbbccc + 1 ^ ^ End of pattern +Callout data = 1 +No match + +/a(b+?)(c*?)(?C1)/I +Capture group count = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 +\= Expect no match + abbbbbccc\=callout_data=1 +--->abbbbbccc + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 + 1 ^ ^ End of pattern +Callout data = 1 +No match + +/(?C)abc/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/(?C)^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/(?C)a|b/I +Capture group count = 0 +Starting code units: a b +Subject length lower bound = 1 + +/a|(b)(?C)/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 1 + b +--->b + 0 ^^ End of pattern + 0: b + 1: b + +/x(ab|(bc|(de|(?R))))/I +Capture group count = 3 +First code unit = 'x' +Subject length lower bound = 3 + xab + 0: xab + 1: ab + xbc + 0: xbc + 1: bc + 2: bc + xde + 0: xde + 1: de + 2: de + 3: de + xxab + 0: xxab + 1: xab + 2: xab + 3: xab + xxxab + 0: xxxab + 1: xxab + 2: xxab + 3: xxab +\= Expect no match + xyab +No match + +/^([^()]|\((?1)*\))*$/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + abc + 0: abc + 1: c + a(b)c + 0: a(b)c + 1: c + a(b(c))d + 0: a(b(c))d + 1: d +\= Expect no match) + a(b(c)d +No match + +/^>abc>([^()]|\((?1)*\))* +Overall options: anchored +First code unit = '>' +Last code unit = '<' +Subject length lower bound = 10 + >abc>123abc>123abc>1(2)3abc>1(2)3abc>(1(2)3)abc>(1(2)3) +Overall options: anchored +Starting code units: ( - 0 1 2 3 4 5 6 7 8 9 +Subject length lower bound = 1 + 12 + 0: 12 + 1: 12 + (((2+2)*-3)-7) + 0: (((2+2)*-3)-7) + 1: (((2+2)*-3)-7) + 2: - + -12 + 0: -12 + 1: -12 +\= Expect no match + ((2+2)*-3)-7) +No match + +/^(x(y|(?1){2})z)/I +Capture group count = 2 +Compile options: +Overall options: anchored +First code unit = 'x' +Subject length lower bound = 3 + xyz + 0: xyz + 1: xyz + 2: y + xxyzxyzz + 0: xxyzxyzz + 1: xxyzxyzz + 2: xyzxyz +\= Expect no match + xxyzz +No match + xxyzxyzxyzz +No match + +/((< (?: (?(R) \d++ | [^<>]*+) | (?2)) * >))/Ix +Capture group count = 2 +Options: extended +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 2 + <> + 0: <> + 1: <> + 2: <> + + 0: + 1: + 2: + hij> + 0: hij> + 1: hij> + 2: hij> + hij> + 0: + 1: + 2: + def> + 0: def> + 1: def> + 2: def> + + 0: <> + 1: <> + 2: <> +\= Expect no match + +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 9 + abcdefabc + 0: abcdefabc + 1: abc + +/^(a|b|c)=(?1)+/I +Capture group count = 1 +Compile options: +Overall options: anchored +Starting code units: a b c +Subject length lower bound = 2 + a=a + 0: a=a + 1: a + a=b + 0: a=b + 1: a + a=bc + 0: a=bc + 1: a + +/^(a|b|c)=((?1))+/I +Capture group count = 2 +Compile options: +Overall options: anchored +Starting code units: a b c +Subject length lower bound = 2 + a=a + 0: a=a + 1: a + 2: a + a=b + 0: a=b + 1: a + 2: b + a=bc + 0: a=bc + 1: a + 2: c + +/a(?Pb|c)d(?Pe)/IB +------------------------------------------------------------------ + Bra + a + CBra 1 + b + Alt + c + Ket + d + CBra 2 + e + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +Named capture groups: + longername2 2 + name1 1 +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 4 + abde + 0: abde + 1: b + 2: e + acde + 0: acde + 1: c + 2: e + +/(?:a(?Pc(?Pd)))(?Pa)/IB +------------------------------------------------------------------ + Bra + Bra + a + CBra 1 + c + CBra 2 + d + Ket + Ket + Ket + CBra 3 + a + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 3 +Named capture groups: + a 3 + c 1 + d 2 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 4 + +/(?Pa)...(?P=a)bbb(?P>a)d/IB +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Any + Any + Any + \1 + bbb + Recurse + d + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Max back reference = 1 +Named capture groups: + a 1 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 10 + +/^\W*(?:(?P(?P.)\W*(?P>one)\W*(?P=two)|)|(?P(?P.)\W*(?P>three)\W*(?P=four)|\W*.\W*))\W*$/Ii +Capture group count = 4 +Max back reference = 4 +Named capture groups: + four 4 + one 1 + three 3 + two 2 +May match empty string +Compile options: caseless +Overall options: anchored caseless +Subject length lower bound = 0 + 1221 + 0: 1221 + 1: 1221 + 2: 1 + Satan, oscillate my metallic sonatas! + 0: Satan, oscillate my metallic sonatas! + 1: + 2: + 3: Satan, oscillate my metallic sonatas + 4: S + A man, a plan, a canal: Panama! + 0: A man, a plan, a canal: Panama! + 1: + 2: + 3: A man, a plan, a canal: Panama + 4: A + Able was I ere I saw Elba. + 0: Able was I ere I saw Elba. + 1: + 2: + 3: Able was I ere I saw Elba + 4: A +\= Expect no match + The quick brown fox +No match + +/((?(R)a|b))\1(?1)?/I +Capture group count = 1 +Max back reference = 1 +Subject length lower bound = 2 + bb + 0: bb + 1: b + bbaa + 0: bba + 1: b + +/(.*)a/Is +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'a' +Subject length lower bound = 1 + +/(.*)a\1/Is +Capture group count = 1 +Max back reference = 1 +Options: dotall +Last code unit = 'a' +Subject length lower bound = 1 + +/(.*)a(b)\2/Is +Capture group count = 2 +Max back reference = 2 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'b' +Subject length lower bound = 3 + +/((.*)a|(.*)b)z/Is +Capture group count = 3 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/((.*)a|(.*)b)z\1/Is +Capture group count = 3 +Max back reference = 1 +Options: dotall +Last code unit = 'z' +Subject length lower bound = 3 + +/((.*)a|(.*)b)z\2/Is +Capture group count = 3 +Max back reference = 2 +Options: dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/((.*)a|(.*)b)z\3/Is +Capture group count = 3 +Max back reference = 3 +Options: dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/((.*)a|^(.*)b)z\3/Is +Capture group count = 3 +Max back reference = 3 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'z' +Subject length lower bound = 2 + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a/Is +Capture group count = 31 +May match empty string +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 0 + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\31/Is +Capture group count = 31 +Max back reference = 31 +May match empty string +Options: dotall +Subject length lower bound = 0 + +/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\32/Is +Capture group count = 32 +Max back reference = 32 +May match empty string +Options: dotall +Subject length lower bound = 0 + +/(a)(bc)/IB,no_auto_capture +------------------------------------------------------------------ + Bra + Bra + a + Ket + Bra + bc + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_auto_capture +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + +/(?Pa)(bc)/IB,no_auto_capture +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Bra + bc + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Named capture groups: + one 1 +Options: no_auto_capture +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + abc + 0: abc + 1: a + +/(a)(?Pbc)/IB,no_auto_capture +------------------------------------------------------------------ + Bra + Bra + a + Ket + CBra 1 + bc + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Named capture groups: + named 1 +Options: no_auto_capture +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/(aaa(?C1)bbb|ab)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + aaabbb +--->aaabbb + 1 ^ ^ b + 0: aaabbb + 1: aaabbb + aaabbb\=callout_data=0 +--->aaabbb + 1 ^ ^ b + 0: aaabbb + 1: aaabbb + aaabbb\=callout_data=1 +--->aaabbb + 1 ^ ^ b +Callout data = 1 + 0: ab + 1: ab +\= Expect no match + aaabbb\=callout_data=-1 +--->aaabbb + 1 ^ ^ b +Callout data = -1 +No match + +/ab(?Pcd)ef(?Pgh)/I +Capture group count = 2 +Named capture groups: + one 1 + two 2 +First code unit = 'a' +Last code unit = 'h' +Subject length lower bound = 8 + abcdefgh + 0: abcdefgh + 1: cd + 2: gh + abcdefgh\=copy=1,get=two + 0: abcdefgh + 1: cd + 2: gh + 1C cd (2) + G gh (2) two (group 2) + abcdefgh\=copy=one,copy=two + 0: abcdefgh + 1: cd + 2: gh + C cd (2) one (group 1) + C gh (2) two (group 2) + abcdefgh\=copy=three + 0: abcdefgh + 1: cd + 2: gh +Number not found for group "three" +Copy substring "three" failed (-49): unknown substring + +/(?P)(?P)/IB +------------------------------------------------------------------ + Bra + CBra 1 + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +Named capture groups: + Tes 1 + Test 2 +May match empty string +Subject length lower bound = 0 + +/(?P)(?P)/IB +------------------------------------------------------------------ + Bra + CBra 1 + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +Named capture groups: + Tes 2 + Test 1 +May match empty string +Subject length lower bound = 0 + +/(?Pzz)(?Paa)/I +Capture group count = 2 +Named capture groups: + A 2 + Z 1 +First code unit = 'z' +Last code unit = 'a' +Subject length lower bound = 4 + zzaa\=copy=Z + 0: zzaa + 1: zz + 2: aa + C zz (2) Z (group 1) + zzaa\=copy=A + 0: zzaa + 1: zz + 2: aa + C aa (2) A (group 2) + +/(?Peks)(?Peccs)/I +Failed: error 143 at offset 16: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +/(?Pabc(?Pdef)(?Pxyz))/I +Failed: error 143 at offset 31: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +"\[((?P\d+)(,(?P>elem))*)\]"I +Capture group count = 3 +Named capture groups: + elem 2 +First code unit = '[' +Last code unit = ']' +Subject length lower bound = 3 + [10,20,30,5,5,4,4,2,43,23,4234] + 0: [10,20,30,5,5,4,4,2,43,23,4234] + 1: 10,20,30,5,5,4,4,2,43,23,4234 + 2: 10 + 3: ,4234 +\= Expect no match + [] +No match + +"\[((?P\d+)(,(?P>elem))*)?\]"I +Capture group count = 3 +Named capture groups: + elem 2 +First code unit = '[' +Last code unit = ']' +Subject length lower bound = 2 + [10,20,30,5,5,4,4,2,43,23,4234] + 0: [10,20,30,5,5,4,4,2,43,23,4234] + 1: 10,20,30,5,5,4,4,2,43,23,4234 + 2: 10 + 3: ,4234 + [] + 0: [] + +/(a(b(?2)c))?/IB +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/(a(b(?2)c))*/IB +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + KetRmax + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/(a(b(?2)c)){0,2}/IB +------------------------------------------------------------------ + Bra + Brazero + Bra + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + Ket + Brazero + CBra 1 + a + CBra 2 + b + Recurse + c + Ket + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/[ab]{1}+/B +------------------------------------------------------------------ + Bra + [ab] + Ket + End +------------------------------------------------------------------ + +/()(?1){1}/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Recurse + Ket + End +------------------------------------------------------------------ + +/()(?1)/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Recurse + Ket + End +------------------------------------------------------------------ + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii +Capture group count = 3 +Options: caseless +Last code unit = 'g' (caseless) +Subject length lower bound = 8 + Baby Bjorn Active Carrier - With free SHIPPING!! + 0: Baby Bjorn Active Carrier - With free SHIPPING!! + 1: Baby Bjorn Active Carrier - With free SHIPPING!! + +/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii +Capture group count = 3 +Options: caseless +Last code unit = 'g' (caseless) +Subject length lower bound = 8 + Baby Bjorn Active Carrier - With free SHIPPING!! + 0: Baby Bjorn Active Carrier - With free SHIPPING!! + 1: Baby Bjorn Active Carrier - With free SHIPPING!! + +/a*.*b/IB +------------------------------------------------------------------ + Bra + a* + Any* + b + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 1 + +/(a|b)*.?c/IB +------------------------------------------------------------------ + Bra + Brazero + CBra 1 + a + Alt + b + KetRmax + Any? + c + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Last code unit = 'c' +Subject length lower bound = 1 + +/abc(?C255)de(?C)f/IB +------------------------------------------------------------------ + Bra + abc + Callout 255 10 1 + de + Callout 0 16 1 + f + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' +Last code unit = 'f' +Subject length lower bound = 6 + +/abcde/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + a + Callout 255 1 1 + b + Callout 255 2 1 + c + Callout 255 3 1 + d + Callout 255 4 1 + e + Callout 255 5 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + abcde +--->abcde + +0 ^ a + +1 ^^ b + +2 ^ ^ c + +3 ^ ^ d + +4 ^ ^ e + +5 ^ ^ End of pattern + 0: abcde +\= Expect no match + abcdfe +--->abcdfe + +0 ^ a + +1 ^^ b + +2 ^ ^ c + +3 ^ ^ d + +4 ^ ^ e +No match + +/a*b/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 2 + a*+ + Callout 255 2 1 + b + Callout 255 3 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 1 + ab +--->ab + +0 ^ a* + +2 ^^ b + +3 ^ ^ End of pattern + 0: ab + aaaab +--->aaaab + +0 ^ a* + +2 ^ ^ b + +3 ^ ^ End of pattern + 0: aaaab + aaaacb +--->aaaacb + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^^ b + +0 ^ a* + +2 ^ b + +3 ^^ End of pattern + 0: b + +/a*b/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 2 + a*+ + Callout 255 2 1 + b + Callout 255 3 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 1 + ab +--->ab + +0 ^ a* + +2 ^^ b + +3 ^ ^ End of pattern + 0: ab + aaaab +--->aaaab + +0 ^ a* + +2 ^ ^ b + +3 ^ ^ End of pattern + 0: aaaab + aaaacb +--->aaaacb + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^ ^ b + +0 ^ a* + +2 ^^ b + +0 ^ a* + +2 ^ b + +3 ^^ End of pattern + 0: b + +/a+b/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 2 + a++ + Callout 255 2 1 + b + Callout 255 3 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: auto_callout +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + ab +--->ab + +0 ^ a+ + +2 ^^ b + +3 ^ ^ End of pattern + 0: ab + aaaab +--->aaaab + +0 ^ a+ + +2 ^ ^ b + +3 ^ ^ End of pattern + 0: aaaab +\= Expect no match + aaaacb +--->aaaacb + +0 ^ a+ + +2 ^ ^ b + +0 ^ a+ + +2 ^ ^ b + +0 ^ a+ + +2 ^ ^ b + +0 ^ a+ + +2 ^^ b +No match + +/(abc|def)x/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 1 + a + Callout 255 2 1 + b + Callout 255 3 1 + c + Callout 255 4 1 + Alt + Callout 255 5 1 + d + Callout 255 6 1 + e + Callout 255 7 1 + f + Callout 255 8 1 + Ket + Callout 255 9 1 + x + Callout 255 10 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a d +Last code unit = 'x' +Subject length lower bound = 4 + abcx +--->abcx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: abcx + 1: abc + defx +--->defx + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: defx + 1: def +\= Expect no match + abcdefzx +--->abcdefzx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x + +5 ^ d + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x +No match + +/(abc|def)x/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 1 + a + Callout 255 2 1 + b + Callout 255 3 1 + c + Callout 255 4 1 + Alt + Callout 255 5 1 + d + Callout 255 6 1 + e + Callout 255 7 1 + f + Callout 255 8 1 + Ket + Callout 255 9 1 + x + Callout 255 10 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a d +Last code unit = 'x' +Subject length lower bound = 4 + abcx +--->abcx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: abcx + 1: abc + defx +--->defx + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x ++10 ^ ^ End of pattern + 0: defx + 1: def +\= Expect no match + abcdefzx +--->abcdefzx + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ c + +4 ^ ^ | + +9 ^ ^ x + +5 ^ d + +0 ^ ( + +1 ^ a + +5 ^ d + +6 ^^ e + +7 ^ ^ f + +8 ^ ^ ) + +9 ^ ^ x +No match + +/(ab|cd){3,4}/I,auto_callout +Capture group count = 1 +Options: auto_callout +Starting code units: a c +Subject length lower bound = 6 + ababab +--->ababab + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +2 ^ ^ b + +3 ^ ^ | + +1 ^ ^ a + +2 ^ ^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c ++12 ^ ^ End of pattern + 0: ababab + 1: ab + abcdabcd +--->abcdabcd + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +2 ^ ^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern + 0: abcdabcd + 1: cd + abcdcdcdcdcd +--->abcdcdcdcdcd + +0 ^ ( + +1 ^ a + +2 ^^ b + +3 ^ ^ | + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} + +1 ^ ^ a + +4 ^ ^ c + +5 ^ ^ d + +6 ^ ^ ){3,4} ++12 ^ ^ End of pattern + 0: abcdcdcd + 1: cd + +/([ab]{,}c|xy)/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 4 + [ab] + Callout 255 5 1 + { + Callout 255 6 1 + , + Callout 255 7 1 + } + Callout 255 8 1 + c + Callout 255 9 1 + Alt + Callout 255 10 1 + x + Callout 255 11 1 + y + Callout 255 12 1 + Ket + Callout 255 13 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a b x +Subject length lower bound = 2 +\= Expect no match + Note: that {,} does NOT introduce a quantifier +--->Note: that {,} does NOT introduce a quantifier + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x +No match + +/([ab]{,}c|xy)/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 4 + [ab] + Callout 255 5 1 + { + Callout 255 6 1 + , + Callout 255 7 1 + } + Callout 255 8 1 + c + Callout 255 9 1 + Alt + Callout 255 10 1 + x + Callout 255 11 1 + y + Callout 255 12 1 + Ket + Callout 255 13 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a b x +Subject length lower bound = 2 +\= Expect no match + Note: that {,} does NOT introduce a quantifier +--->Note: that {,} does NOT introduce a quantifier + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x + +0 ^ ( + +1 ^ [ab] + +5 ^^ { ++10 ^ x +No match + +/([ab]{1,4}c|xy){4,5}?123/IB,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 1 + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + Braminzero + CBra 1 + Callout 255 1 9 + [ab]{1,4}+ + Callout 255 10 1 + c + Callout 255 11 1 + Alt + Callout 255 12 1 + x + Callout 255 13 1 + y + Callout 255 14 7 + Ket + Callout 255 21 1 + 1 + Callout 255 22 1 + 2 + Callout 255 23 1 + 3 + Callout 255 24 0 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +Options: auto_callout +Starting code units: a b x +Last code unit = '3' +Subject length lower bound = 11 + aacaacaacaacaac123 +--->aacaacaacaacaac123 + +0 ^ ( + +1 ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | ++21 ^ ^ 1 + +1 ^ ^ [ab]{1,4} ++10 ^ ^ c ++11 ^ ^ | ++21 ^ ^ 1 ++22 ^ ^ 2 ++23 ^ ^ 3 ++24 ^ ^ End of pattern + 0: aacaacaacaacaac123 + 1: aac + +/\b.*/I +Capture group count = 0 +Max lookbehind = 1 +May match empty string +Subject length lower bound = 0 + ab cd\=offset=1 + 0: cd + +/\b.*/Is +Capture group count = 0 +Max lookbehind = 1 +May match empty string +Options: dotall +Subject length lower bound = 0 + ab cd\=startoffset=1 + 0: cd + +/(?!.bcd).*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + Xbcd12345 + 0: bcd12345 + +/abcde/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'e' +Subject length lower bound = 5 + ab\=ps +Partial match: ab + abc\=ps +Partial match: abc + abcd\=ps +Partial match: abcd + abcde\=ps + 0: abcde + the quick brown abc\=ps +Partial match: abc +\= Expect no match\=ps + the quick brown abxyz fox\=ps +No match + +"^(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[012])/(20)?\d\d$"I +Capture group count = 3 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = '/' +Subject length lower bound = 6 + 13/05/04\=ps + 0: 13/05/04 + 1: 13 + 2: 05 + 13/5/2004\=ps + 0: 13/5/2004 + 1: 13 + 2: 5 + 3: 20 + 02/05/09\=ps + 0: 02/05/09 + 1: 02 + 2: 05 + 1\=ps +Partial match: 1 + 1/2\=ps +Partial match: 1/2 + 1/2/0\=ps +Partial match: 1/2/0 + 1/2/04\=ps + 0: 1/2/04 + 1: 1 + 2: 2 + 0\=ps +Partial match: 0 + 02/\=ps +Partial match: 02/ + 02/0\=ps +Partial match: 02/0 + 02/1\=ps +Partial match: 02/1 +\= Expect no match\=ps + \=ps +No match + 123\=ps +No match + 33/4/04\=ps +No match + 3/13/04\=ps +No match + 0/1/2003\=ps +No match + 0/\=ps +No match + 02/0/\=ps +No match + 02/13\=ps +No match + +/0{0,2}ABC/I +Capture group count = 0 +Starting code units: 0 A +Last code unit = 'C' +Subject length lower bound = 3 + +/\d{3,}ABC/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'C' +Subject length lower bound = 6 + +/\d*ABC/I +Capture group count = 0 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A +Last code unit = 'C' +Subject length lower bound = 3 + +/[abc]+DE/I +Capture group count = 0 +Starting code units: a b c +Last code unit = 'E' +Subject length lower bound = 3 + +/[abc]?123/I +Capture group count = 0 +Starting code units: 1 a b c +Last code unit = '3' +Subject length lower bound = 3 + 123\=ps + 0: 123 + a\=ps +Partial match: a + b\=ps +Partial match: b + c\=ps +Partial match: c + c12\=ps +Partial match: c12 + c123\=ps + 0: c123 + +/^(?:\d){3,5}X/I +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = 'X' +Subject length lower bound = 4 + 1\=ps +Partial match: 1 + 123\=ps +Partial match: 123 + 123X + 0: 123X + 1234\=ps +Partial match: 1234 + 1234X + 0: 1234X + 12345\=ps +Partial match: 12345 + 12345X + 0: 12345X +\= Expect no match + 1X +No match + 123456\=ps +No match + +"<(\w+)/?>(.)*"Igms +Capture group count = 3 +Max back reference = 1 +Options: dotall multiline +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 7 + \n\n\nPartner der LCO\nde\nPartner der LINEAS Consulting\nGmbH\nLINEAS Consulting GmbH Hamburg\nPartnerfirmen\n30 days\nindex,follow\n\nja\n3\nPartner\n\n\nLCO\nLINEAS Consulting\n15.10.2003\n\n\n\n\nDie Partnerfirmen der LINEAS Consulting\nGmbH\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\=jitstack=1024 + 0: \x0a\x0aPartner der LCO\x0ade\x0aPartner der LINEAS Consulting\x0aGmbH\x0aLINEAS Consulting GmbH Hamburg\x0aPartnerfirmen\x0a30 days\x0aindex,follow\x0a\x0aja\x0a3\x0aPartner\x0a\x0a\x0aLCO\x0aLINEAS Consulting\x0a15.10.2003\x0a\x0a\x0a\x0a\x0aDie Partnerfirmen der LINEAS Consulting\x0aGmbH\x0a\x0a\x0a \x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a\x0a + 1: seite + 2: \x0a + 3: seite + +/line\nbreak/I +Capture group count = 0 +Contains explicit CR or LF match +First code unit = 'l' +Last code unit = 'k' +Subject length lower bound = 10 + this is a line\nbreak + 0: line\x0abreak + line one\nthis is a line\nbreak in the second line + 0: line\x0abreak + +/line\nbreak/I,firstline +Capture group count = 0 +Contains explicit CR or LF match +Options: firstline +First code unit = 'l' +Last code unit = 'k' +Subject length lower bound = 10 + this is a line\nbreak + 0: line\x0abreak +\= Expect no match + line one\nthis is a line\nbreak in the second line +No match + +/line\nbreak/Im,firstline +Capture group count = 0 +Contains explicit CR or LF match +Options: firstline multiline +First code unit = 'l' +Last code unit = 'k' +Subject length lower bound = 10 + this is a line\nbreak + 0: line\x0abreak +\= Expect no match + line one\nthis is a line\nbreak in the second line +No match + +/(?i)(?-i)AbCd/I +Capture group count = 0 +First code unit = 'A' +Last code unit = 'd' +Subject length lower bound = 4 + AbCd + 0: AbCd +\= Expect no match + abcd +No match + +/a{11111111111111111111}/I +Failed: error 105 at offset 22: number too big in {} quantifier + +/(){64294967295}/I +Failed: error 105 at offset 14: number too big in {} quantifier + +/(){2,4294967295}/I +Failed: error 105 at offset 15: number too big in {} quantifier + +"(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' (caseless) +Last code unit = 'B' +Subject length lower bound = 15 + abcdefghijklAkB + 0: abcdefghijklAkB + 1: k + +"(?Pa)(?Pb)(?Pc)(?Pd)(?Pe)(?Pf)(?Pg)(?Ph)(?Pi)(?Pj)(?Pk)(?Pl)A\11B"I +Capture group count = 12 +Max back reference = 11 +Named capture groups: + n0 1 + n1 2 + n10 11 + n11 12 + n2 3 + n3 4 + n4 5 + n5 6 + n6 7 + n7 8 + n8 9 + n9 10 +First code unit = 'a' +Last code unit = 'B' +Subject length lower bound = 15 + abcdefghijklAkB + 0: abcdefghijklAkB + 1: a + 2: b + 3: c + 4: d + 5: e + 6: f + 7: g + 8: h + 9: i +10: j +11: k +12: l + +"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)A\11B"I +Capture group count = 12 +Max back reference = 11 +First code unit = 'a' +Last code unit = 'B' +Subject length lower bound = 15 + abcdefghijklAkB + 0: abcdefghijklAkB + 1: a + 2: b + 3: c + 4: d + 5: e + 6: f + 7: g + 8: h + 9: i +10: j +11: k +12: l + +"(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)(?Pa)"I +Capture group count = 101 +Named capture groups: + name0 1 + name1 2 + name10 11 + name100 101 + name11 12 + name12 13 + name13 14 + name14 15 + name15 16 + name16 17 + name17 18 + name18 19 + name19 20 + name2 3 + name20 21 + name21 22 + name22 23 + name23 24 + name24 25 + name25 26 + name26 27 + name27 28 + name28 29 + name29 30 + name3 4 + name30 31 + name31 32 + name32 33 + name33 34 + name34 35 + name35 36 + name36 37 + name37 38 + name38 39 + name39 40 + name4 5 + name40 41 + name41 42 + name42 43 + name43 44 + name44 45 + name45 46 + name46 47 + name47 48 + name48 49 + name49 50 + name5 6 + name50 51 + name51 52 + name52 53 + name53 54 + name54 55 + name55 56 + name56 57 + name57 58 + name58 59 + name59 60 + name6 7 + name60 61 + name61 62 + name62 63 + name63 64 + name64 65 + name65 66 + name66 67 + name67 68 + name68 69 + name69 70 + name7 8 + name70 71 + name71 72 + name72 73 + name73 74 + name74 75 + name75 76 + name76 77 + name77 78 + name78 79 + name79 80 + name8 9 + name80 81 + name81 82 + name82 83 + name83 84 + name84 85 + name85 86 + name86 87 + name87 88 + name88 89 + name89 90 + name9 10 + name90 91 + name91 92 + name92 93 + name93 94 + name94 95 + name95 96 + name96 97 + name97 98 + name98 99 + name99 100 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 101 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Matched, but too many substrings + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a +11: a +12: a +13: a +14: a + +"(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)"I +Capture group count = 101 +First code unit = 'a' +Last code unit = 'a' +Subject length lower bound = 101 + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +Matched, but too many substrings + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a +11: a +12: a +13: a +14: a + +/[^()]*(?:\((?R)\)[^()]*)*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: (this(and)that) + (this(and)that)stuff + 0: (this(and)that)stuff + +/[^()]*(?:\((?>(?R))\)[^()]*)*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: (this(and)that) + +/[^()]*(?:\((?R)\))*[^()]*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: (this(and)that) + +/(?:\((?R)\))*[^()]*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: + ((this)) + 0: ((this)) + +/(?:\((?R)\))|[^()]*/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + (this(and)that + 0: + (this(and)that) + 0: + (this) + 0: (this) + ((this)) + 0: ((this)) + +/\x{0000ff}/I +Capture group count = 0 +First code unit = \xff +Subject length lower bound = 1 + +/^((?Pa1)|(?Pa2)b)/I +Failed: error 143 at offset 18: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +/^((?Pa1)|(?Pa2)b)/I,dupnames +Capture group count = 3 +Named capture groups: + A 2 + A 3 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + a1b\=copy=A + 0: a1 + 1: a1 + 2: a1 + C a1 (2) A (non-unique) + a2b\=copy=A + 0: a2b + 1: a2b + 2: + 3: a2 + C a2 (2) A (non-unique) + a1b\=copy=Z,copy=A + 0: a1 + 1: a1 + 2: a1 +Number not found for group "Z" +Copy substring "Z" failed (-49): unknown substring + C a1 (2) A (non-unique) + +/(?|(?)(?)(?)|(?)(?)(?))/I,dupnames +Capture group count = 3 +Named capture groups: + a 1 + a 3 + b 2 +May match empty string +Options: dupnames +Subject length lower bound = 0 + +/^(?Pa)(?Pb)/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + ab\=copy=A + 0: ab + 1: a + 2: b + C a (1) A (non-unique) + +/^(?Pa)(?Pb)|cd/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + ab\=copy=A + 0: ab + 1: a + 2: b + C a (1) A (non-unique) + cd\=copy=A + 0: cd +Copy substring "A" failed (-55): requested value is not set + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames +Capture group count = 4 +Named capture groups: + A 1 + A 2 + A 3 + A 4 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + cdefgh\=copy=A + 0: cdefgh + 1: + 2: + 3: ef + 4: gh + C ef (2) A (non-unique) + +/^((?Pa1)|(?Pa2)b)/I,dupnames +Capture group count = 3 +Named capture groups: + A 2 + A 3 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + a1b\=get=A + 0: a1 + 1: a1 + 2: a1 + G a1 (2) A (non-unique) + a2b\=get=A + 0: a2b + 1: a2b + 2: + 3: a2 + G a2 (2) A (non-unique) + a1b\=get=Z,get=A + 0: a1 + 1: a1 + 2: a1 +Number not found for group "Z" +Get substring "Z" failed (-49): unknown substring + G a1 (2) A (non-unique) + +/^(?Pa)(?Pb)/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Compile options: dupnames +Overall options: anchored dupnames +First code unit = 'a' +Subject length lower bound = 2 + ab\=get=A + 0: ab + 1: a + 2: b + G a (1) A (non-unique) + +/^(?Pa)(?Pb)|cd/I,dupnames +Capture group count = 2 +Named capture groups: + A 1 + A 2 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + ab\=get=A + 0: ab + 1: a + 2: b + G a (1) A (non-unique) + cd\=get=A + 0: cd +Get substring "A" failed (-55): requested value is not set + +/^(?Pa)(?Pb)|cd(?Pef)(?Pgh)/I,dupnames +Capture group count = 4 +Named capture groups: + A 1 + A 2 + A 3 + A 4 +Options: dupnames +Starting code units: a c +Subject length lower bound = 2 + cdefgh\=get=A + 0: cdefgh + 1: + 2: + 3: ef + 4: gh + G ef (2) A (non-unique) + +/(?J)^((?Pa1)|(?Pa2)b)/I +Capture group count = 3 +Named capture groups: + A 2 + A 3 +Compile options: +Overall options: anchored +Duplicate name status changes +First code unit = 'a' +Subject length lower bound = 2 + a1b\=copy=A + 0: a1 + 1: a1 + 2: a1 + C a1 (2) A (non-unique) + a2b\=copy=A + 0: a2b + 1: a2b + 2: + 3: a2 + C a2 (2) A (non-unique) + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I +Failed: error 143 at offset 38: two named subpatterns have the same name (PCRE2_DUPNAMES not set) + +# In this next test, J is not set at the outer level; consequently it isn't set +# in the pattern's options; consequently pcre2_substring_get_byname() produces +# a random value. + +/^(?Pa) (?J:(?Pb)(?Pc)) (?Pd)/I +Capture group count = 4 +Named capture groups: + A 1 + B 2 + B 3 + C 4 +Compile options: +Overall options: anchored +Duplicate name status changes +First code unit = 'a' +Subject length lower bound = 6 + a bc d\=copy=A,copy=B,copy=C + 0: a bc d + 1: a + 2: b + 3: c + 4: d + C a (1) A (group 1) + C b (1) B (non-unique) + C d (1) C (group 4) + +/^(?Pa)?(?(A)a|b)/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + A 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + aabc + 0: aa + 1: a + bc + 0: b +\= Expect no match + abc +No match + +/(?:(?(ZZ)a|b)(?PX))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + ZZ 1 +Last code unit = 'X' +Subject length lower bound = 2 + bXaX + 0: bXaX + 1: X + +/(?:(?(2y)a|b)(X))+/I +Failed: error 124 at offset 7: missing closing parenthesis for condition + +/(?:(?(ZA)a|b)(?PX))+/I +Failed: error 115 at offset 6: reference to non-existent subpattern + +/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?PX))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + ZZ 1 +Last code unit = 'X' +Subject length lower bound = 3 + bbXaaX + 0: bbXaaX + 1: X + +/(?:(?(ZZ)a|\(b\))\\(?PX))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + ZZ 1 +Last code unit = 'X' +Subject length lower bound = 3 + (b)\\Xa\\X + 0: (b)\Xa\X + 1: X + +/(?PX|Y))+/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + A 1 +Subject length lower bound = 2 + bXXaYYaY + 0: bXXaYYaY + 1: Y + bXYaXXaX + 0: bX + 1: X + +/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?PX|Y))+/I +Capture group count = 10 +Max back reference = 10 +Named capture groups: + A 10 +Subject length lower bound = 2 + bXXaYYaY + 0: bXXaYYaY + 1: + 2: + 3: + 4: + 5: + 6: + 7: + 8: + 9: +10: Y + +/\s*,\s*/I +Capture group count = 0 +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 , +Last code unit = ',' +Subject length lower bound = 1 + \x0b,\x0b + 0: \x0b,\x0b + \x0c,\x0d + 0: \x0c,\x0d + +/^abc/Im,newline=lf +Capture group count = 0 +Options: multiline +Forced newline is LF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\nabc + 0: abc + xyz\r\nabc + 0: abc +\= Expect no match + xyz\rabc +No match + xyzabc\r +No match + xyzabc\rpqr +No match + xyzabc\r\n +No match + xyzabc\r\npqr +No match + +/^abc/Im,newline=crlf +Capture group count = 0 +Options: multiline +Forced newline is CRLF +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\r\nabclf> + 0: abc +\= Expect no match + xyz\nabclf +No match + xyz\rabclf +No match + +/^abc/Im,newline=cr +Capture group count = 0 +Options: multiline +Forced newline is CR +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + xyz\rabc + 0: abc +\= Expect no match + xyz\nabc +No match + xyz\r\nabc +No match + +/^abc/Im,newline=bad +** Invalid value in "newline=bad" + +/.*/I,newline=lf +Capture group count = 0 +May match empty string +Forced newline is LF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc\x0d + +/.*/I,newline=cr +Capture group count = 0 +May match empty string +Forced newline is CR +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc + abc\r\ndef + 0: abc + +/.*/I,newline=crlf +Capture group count = 0 +May match empty string +Forced newline is CRLF +First code unit at start or follows newline +Subject length lower bound = 0 + abc\ndef + 0: abc\x0adef + abc\rdef + 0: abc\x0ddef + abc\r\ndef + 0: abc + +/\w+(.)(.)?def/Is +Capture group count = 2 +Options: dotall +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = 'f' +Subject length lower bound = 5 + abc\ndef + 0: abc\x0adef + 1: \x0a + abc\rdef + 0: abc\x0ddef + 1: \x0d + abc\r\ndef + 0: abc\x0d\x0adef + 1: \x0d + 2: \x0a + +/(?P25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/I +Capture group count = 1 +Named capture groups: + B 1 +Starting code units: 0 1 2 3 4 5 6 7 8 9 +Last code unit = '.' +Subject length lower bound = 7 + +/()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + ()()()()()()()()()()()()()()()()()()()() + (.(.))/Ix +Capture group count = 102 +Options: extended +Subject length lower bound = 2 + XY\=ovector=133 + 0: XY + 1: + 2: + 3: + 4: + 5: + 6: + 7: + 8: + 9: +10: +11: +12: +13: +14: +15: +16: +17: +18: +19: +20: +21: +22: +23: +24: +25: +26: +27: +28: +29: +30: +31: +32: +33: +34: +35: +36: +37: +38: +39: +40: +41: +42: +43: +44: +45: +46: +47: +48: +49: +50: +51: +52: +53: +54: +55: +56: +57: +58: +59: +60: +61: +62: +63: +64: +65: +66: +67: +68: +69: +70: +71: +72: +73: +74: +75: +76: +77: +78: +79: +80: +81: +82: +83: +84: +85: +86: +87: +88: +89: +90: +91: +92: +93: +94: +95: +96: +97: +98: +99: +100: +101: XY +102: Y + +/(a*b|(?i:c*(?-i)d))/I +Capture group count = 1 +Starting code units: C a b c d +Subject length lower bound = 1 + +/()[ab]xyz/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'z' +Subject length lower bound = 4 + +/(|)[ab]xyz/I +Capture group count = 1 +Starting code units: a b +Last code unit = 'z' +Subject length lower bound = 4 + +/(|c)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c +Last code unit = 'z' +Subject length lower bound = 4 + +/(|c?)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c +Last code unit = 'z' +Subject length lower bound = 4 + +/(d?|c?)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c d +Last code unit = 'z' +Subject length lower bound = 4 + +/(d?|c)[ab]xyz/I +Capture group count = 1 +Starting code units: a b c d +Last code unit = 'z' +Subject length lower bound = 4 + +/^a*b\d/IB +------------------------------------------------------------------ + Bra + ^ + a*+ + b + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 2 + +/^a*+b\d/IB +------------------------------------------------------------------ + Bra + ^ + a*+ + b + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 2 + +/^a*?b\d/IB +------------------------------------------------------------------ + Bra + ^ + a*+ + b + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +Starting code units: a b +Last code unit = 'b' +Subject length lower bound = 2 + +/^a+A\d/IB +------------------------------------------------------------------ + Bra + ^ + a++ + A + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Last code unit = 'A' +Subject length lower bound = 3 + aaaA5 + 0: aaaA5 +\= Expect no match + aaaa5 +No match + +/^a*A\d/IBi +------------------------------------------------------------------ + Bra + ^ + /i a* + /i A + \d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Compile options: caseless +Overall options: anchored caseless +First code unit = 'A' (caseless) +Subject length lower bound = 2 + aaaA5 + 0: aaaA5 + aaaa5 + 0: aaaa5 + a5 + 0: a5 + +/(a*|b*)[cd]/I +Capture group count = 1 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a+|b*)[cd]/I +Capture group count = 1 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a*|b+)[cd]/I +Capture group count = 1 +Starting code units: a b c d +Subject length lower bound = 1 + +/(a+|b+)[cd]/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((( + ((( + a + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + )))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) + ))) +/Ix +Capture group count = 203 +Options: extended +First code unit = 'a' +Subject length lower bound = 1 + large nest +Matched, but too many substrings + 0: a + 1: a + 2: a + 3: a + 4: a + 5: a + 6: a + 7: a + 8: a + 9: a +10: a +11: a +12: a +13: a +14: a + +/a*\d/B +------------------------------------------------------------------ + Bra + a*+ + \d + Ket + End +------------------------------------------------------------------ + +/a*\D/B +------------------------------------------------------------------ + Bra + a* + \D + Ket + End +------------------------------------------------------------------ + +/0*\d/B +------------------------------------------------------------------ + Bra + 0* + \d + Ket + End +------------------------------------------------------------------ + +/0*\D/B +------------------------------------------------------------------ + Bra + 0*+ + \D + Ket + End +------------------------------------------------------------------ + +/a*\s/B +------------------------------------------------------------------ + Bra + a*+ + \s + Ket + End +------------------------------------------------------------------ + +/a*\S/B +------------------------------------------------------------------ + Bra + a* + \S + Ket + End +------------------------------------------------------------------ + +/ *\s/B +------------------------------------------------------------------ + Bra + * + \s + Ket + End +------------------------------------------------------------------ + +/ *\S/B +------------------------------------------------------------------ + Bra + *+ + \S + Ket + End +------------------------------------------------------------------ + +/a*\w/B +------------------------------------------------------------------ + Bra + a* + \w + Ket + End +------------------------------------------------------------------ + +/a*\W/B +------------------------------------------------------------------ + Bra + a*+ + \W + Ket + End +------------------------------------------------------------------ + +/=*\w/B +------------------------------------------------------------------ + Bra + =*+ + \w + Ket + End +------------------------------------------------------------------ + +/=*\W/B +------------------------------------------------------------------ + Bra + =* + \W + Ket + End +------------------------------------------------------------------ + +/\d*a/B +------------------------------------------------------------------ + Bra + \d*+ + a + Ket + End +------------------------------------------------------------------ + +/\d*2/B +------------------------------------------------------------------ + Bra + \d* + 2 + Ket + End +------------------------------------------------------------------ + +/\d*\d/B +------------------------------------------------------------------ + Bra + \d* + \d + Ket + End +------------------------------------------------------------------ + +/\d*\D/B +------------------------------------------------------------------ + Bra + \d*+ + \D + Ket + End +------------------------------------------------------------------ + +/\d*\s/B +------------------------------------------------------------------ + Bra + \d*+ + \s + Ket + End +------------------------------------------------------------------ + +/\d*\S/B +------------------------------------------------------------------ + Bra + \d* + \S + Ket + End +------------------------------------------------------------------ + +/\d*\w/B +------------------------------------------------------------------ + Bra + \d* + \w + Ket + End +------------------------------------------------------------------ + +/\d*\W/B +------------------------------------------------------------------ + Bra + \d*+ + \W + Ket + End +------------------------------------------------------------------ + +/\D*a/B +------------------------------------------------------------------ + Bra + \D* + a + Ket + End +------------------------------------------------------------------ + +/\D*2/B +------------------------------------------------------------------ + Bra + \D*+ + 2 + Ket + End +------------------------------------------------------------------ + +/\D*\d/B +------------------------------------------------------------------ + Bra + \D*+ + \d + Ket + End +------------------------------------------------------------------ + +/\D*\D/B +------------------------------------------------------------------ + Bra + \D* + \D + Ket + End +------------------------------------------------------------------ + +/\D*\s/B +------------------------------------------------------------------ + Bra + \D* + \s + Ket + End +------------------------------------------------------------------ + +/\D*\S/B +------------------------------------------------------------------ + Bra + \D* + \S + Ket + End +------------------------------------------------------------------ + +/\D*\w/B +------------------------------------------------------------------ + Bra + \D* + \w + Ket + End +------------------------------------------------------------------ + +/\D*\W/B +------------------------------------------------------------------ + Bra + \D* + \W + Ket + End +------------------------------------------------------------------ + +/\s*a/B +------------------------------------------------------------------ + Bra + \s*+ + a + Ket + End +------------------------------------------------------------------ + +/\s*2/B +------------------------------------------------------------------ + Bra + \s*+ + 2 + Ket + End +------------------------------------------------------------------ + +/\s*\d/B +------------------------------------------------------------------ + Bra + \s*+ + \d + Ket + End +------------------------------------------------------------------ + +/\s*\D/B +------------------------------------------------------------------ + Bra + \s* + \D + Ket + End +------------------------------------------------------------------ + +/\s*\s/B +------------------------------------------------------------------ + Bra + \s* + \s + Ket + End +------------------------------------------------------------------ + +/\s*\S/B +------------------------------------------------------------------ + Bra + \s*+ + \S + Ket + End +------------------------------------------------------------------ + +/\s*\w/B +------------------------------------------------------------------ + Bra + \s*+ + \w + Ket + End +------------------------------------------------------------------ + +/\s*\W/B +------------------------------------------------------------------ + Bra + \s* + \W + Ket + End +------------------------------------------------------------------ + +/\S*a/B +------------------------------------------------------------------ + Bra + \S* + a + Ket + End +------------------------------------------------------------------ + +/\S*2/B +------------------------------------------------------------------ + Bra + \S* + 2 + Ket + End +------------------------------------------------------------------ + +/\S*\d/B +------------------------------------------------------------------ + Bra + \S* + \d + Ket + End +------------------------------------------------------------------ + +/\S*\D/B +------------------------------------------------------------------ + Bra + \S* + \D + Ket + End +------------------------------------------------------------------ + +/\S*\s/B +------------------------------------------------------------------ + Bra + \S*+ + \s + Ket + End +------------------------------------------------------------------ + +/\S*\S/B +------------------------------------------------------------------ + Bra + \S* + \S + Ket + End +------------------------------------------------------------------ + +/\S*\w/B +------------------------------------------------------------------ + Bra + \S* + \w + Ket + End +------------------------------------------------------------------ + +/\S*\W/B +------------------------------------------------------------------ + Bra + \S* + \W + Ket + End +------------------------------------------------------------------ + +/\w*a/B +------------------------------------------------------------------ + Bra + \w* + a + Ket + End +------------------------------------------------------------------ + +/\w*2/B +------------------------------------------------------------------ + Bra + \w* + 2 + Ket + End +------------------------------------------------------------------ + +/\w*\d/B +------------------------------------------------------------------ + Bra + \w* + \d + Ket + End +------------------------------------------------------------------ + +/\w*\D/B +------------------------------------------------------------------ + Bra + \w* + \D + Ket + End +------------------------------------------------------------------ + +/\w*\s/B +------------------------------------------------------------------ + Bra + \w*+ + \s + Ket + End +------------------------------------------------------------------ + +/\w*\S/B +------------------------------------------------------------------ + Bra + \w* + \S + Ket + End +------------------------------------------------------------------ + +/\w*\w/B +------------------------------------------------------------------ + Bra + \w* + \w + Ket + End +------------------------------------------------------------------ + +/\w*\W/B +------------------------------------------------------------------ + Bra + \w*+ + \W + Ket + End +------------------------------------------------------------------ + +/\W*a/B +------------------------------------------------------------------ + Bra + \W*+ + a + Ket + End +------------------------------------------------------------------ + +/\W*2/B +------------------------------------------------------------------ + Bra + \W*+ + 2 + Ket + End +------------------------------------------------------------------ + +/\W*\d/B +------------------------------------------------------------------ + Bra + \W*+ + \d + Ket + End +------------------------------------------------------------------ + +/\W*\D/B +------------------------------------------------------------------ + Bra + \W* + \D + Ket + End +------------------------------------------------------------------ + +/\W*\s/B +------------------------------------------------------------------ + Bra + \W* + \s + Ket + End +------------------------------------------------------------------ + +/\W*\S/B +------------------------------------------------------------------ + Bra + \W* + \S + Ket + End +------------------------------------------------------------------ + +/\W*\w/B +------------------------------------------------------------------ + Bra + \W*+ + \w + Ket + End +------------------------------------------------------------------ + +/\W*\W/B +------------------------------------------------------------------ + Bra + \W* + \W + Ket + End +------------------------------------------------------------------ + +/[^a]+a/B +------------------------------------------------------------------ + Bra + [^a]++ (not) + a + Ket + End +------------------------------------------------------------------ + +/[^a]+a/Bi +------------------------------------------------------------------ + Bra + /i [^a]++ (not) + /i a + Ket + End +------------------------------------------------------------------ + +/[^a]+A/Bi +------------------------------------------------------------------ + Bra + /i [^a]++ (not) + /i A + Ket + End +------------------------------------------------------------------ + +/[^a]+b/B +------------------------------------------------------------------ + Bra + [^a]+ (not) + b + Ket + End +------------------------------------------------------------------ + +/[^a]+\d/B +------------------------------------------------------------------ + Bra + [^a]+ (not) + \d + Ket + End +------------------------------------------------------------------ + +/a*[^a]/B +------------------------------------------------------------------ + Bra + a*+ + [^a] (not) + Ket + End +------------------------------------------------------------------ + +/(?Px)(?Py)/I +Capture group count = 2 +Named capture groups: + abc 1 + xyz 2 +First code unit = 'x' +Last code unit = 'y' +Subject length lower bound = 2 + xy\=copy=abc,copy=xyz + 0: xy + 1: x + 2: y + C x (1) abc (group 1) + C y (1) xyz (group 2) + +/(?x)(?'xyz'y)/I +Capture group count = 2 +Named capture groups: + abc 1 + xyz 2 +First code unit = 'x' +Last code unit = 'y' +Subject length lower bound = 2 + xy\=copy=abc,copy=xyz + 0: xy + 1: x + 2: y + C x (1) abc (group 1) + C y (1) xyz (group 2) + +/(?x)(?'xyz>y)/I +Failed: error 142 at offset 15: syntax error in subpattern name (missing terminator?) + +/(?P'abc'x)(?Py)/I +Failed: error 141 at offset 3: unrecognized character after (?P + +/^(?:(?(ZZ)a|b)(?X))+/ + bXaX + 0: bXaX + 1: X + bXbX + 0: bX + 1: X +\= Expect no match + aXaX +No match + aXbX +No match + +/^(?P>abc)(?xxx)/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/^(?P>abc)(?x|y)/ + xx + 0: xx + 1: x + xy + 0: xy + 1: y + yy + 0: yy + 1: y + yx + 0: yx + 1: x + +/^(?P>abc)(?Px|y)/ + xx + 0: xx + 1: x + xy + 0: xy + 1: y + yy + 0: yy + 1: y + yx + 0: yx + 1: x + +/^((?(abc)a|b)(?x|y))+/ + bxay + 0: bxay + 1: ay + 2: y + bxby + 0: bx + 1: bx + 2: x +\= Expect no match + axby +No match + +/^(((?P=abc)|X)(?x|y))+/ + XxXxxx + 0: XxXxxx + 1: xx + 2: x + 3: x + XxXyyx + 0: XxXyyx + 1: yx + 2: y + 3: x + XxXyxx + 0: XxXy + 1: Xy + 2: X + 3: y +\= Expect no match + x +No match + +/^(?1)(abc)/ + abcabc + 0: abcabc + 1: abc + +/^(?:(?:\1|X)(a|b))+/ + Xaaa + 0: Xaaa + 1: a + Xaba + 0: Xa + 1: a + +/^[\E\Qa\E-\Qz\E]+/B +------------------------------------------------------------------ + Bra + ^ + [a-z]++ + Ket + End +------------------------------------------------------------------ + +/^[a\Q]bc\E]/B +------------------------------------------------------------------ + Bra + ^ + [\]a-c] + Ket + End +------------------------------------------------------------------ + +/^[a-\Q\E]/B +------------------------------------------------------------------ + Bra + ^ + [\-a] + Ket + End +------------------------------------------------------------------ + +/^(?P>abc)[()](?)/B +------------------------------------------------------------------ + Bra + ^ + Recurse + [()] + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^((?(abc)y)[()](?Px))+/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + Cond + 2 Capture ref + y + Ket + [()] + CBra 2 + x + Ket + KetRmax + Ket + End +------------------------------------------------------------------ + (xy)x + 0: (xy)x + 1: y)x + 2: x + +/^(?P>abc)\Q()\E(?)/B +------------------------------------------------------------------ + Bra + ^ + Recurse + () + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^(?P>abc)[a\Q(]\E(](?)/B +------------------------------------------------------------------ + Bra + ^ + Recurse + [(\]a] + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^(?P>abc) # this is (a comment) + (?)/Bx +------------------------------------------------------------------ + Bra + ^ + Recurse + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/^\W*(?:(?(?.)\W*(?&one)\W*\k|)|(?(?.)\W*(?&three)\W*\k'four'|\W*.\W*))\W*$/Ii +Capture group count = 4 +Max back reference = 4 +Named capture groups: + four 4 + one 1 + three 3 + two 2 +May match empty string +Compile options: caseless +Overall options: anchored caseless +Subject length lower bound = 0 + 1221 + 0: 1221 + 1: 1221 + 2: 1 + Satan, oscillate my metallic sonatas! + 0: Satan, oscillate my metallic sonatas! + 1: + 2: + 3: Satan, oscillate my metallic sonatas + 4: S + A man, a plan, a canal: Panama! + 0: A man, a plan, a canal: Panama! + 1: + 2: + 3: A man, a plan, a canal: Panama + 4: A + Able was I ere I saw Elba. + 0: Able was I ere I saw Elba. + 1: + 2: + 3: Able was I ere I saw Elba + 4: A +\= Expect no match + The quick brown fox +No match + +/(?=(\w+))\1:/I +Capture group count = 1 +Max back reference = 1 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = ':' +Subject length lower bound = 2 + abcd: + 0: abcd: + 1: abcd + +/(?=(?'abc'\w+))\k:/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + abc 1 +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z +Last code unit = ':' +Subject length lower bound = 2 + abcd: + 0: abcd: + 1: abcd + +/(?'abc'a|b)(?d|e)\k{2}/dupnames + adaa + 0: adaa + 1: a + 2: d +\= Expect no match + addd +No match + adbb +No match + +/(?'abc'a|b)(?d|e)(?&abc){2}/dupnames + bdaa + 0: bdaa + 1: b + 2: d + bdab + 0: bdab + 1: b + 2: d +\= Expect no match + bddd +No match + +/(?( (?'B' abc (?(R) (?(R&A)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x + abcabc1Xabc2XabcXabcabc + 0: abcabc1Xabc2XabcX + 1: abcabc1Xabc2XabcX + 2: abcabc1Xabc2XabcX + +/(? (?'B' abc (?(R) (?(R&C)1) (?(R&B)2) X | (?1) (?2) (?R) ))) /x +Failed: error 115 at offset 27: reference to non-existent subpattern + +/^(?(DEFINE) abc | xyz ) /x +Failed: error 154 at offset 4: DEFINE subpattern contains more than one branch + +/(?(DEFINE) abc) xyz/Ix +Capture group count = 0 +Options: extended +First code unit = 'x' +Last code unit = 'z' +Subject length lower bound = 3 + +/(a|)*\d/ + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4\=ovector=0 + 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4 + 1: +\= Expect no match + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\=ovector=0 +No match + +/^a.b/newline=lf + a\rb + 0: a\x0db +\= Expect no match + a\nb +No match + +/^a.b/newline=cr + a\nb + 0: a\x0ab +\= Expect no match + a\rb +No match + +/^a.b/newline=anycrlf + a\x85b + 0: a\x85b +\= Expect no match + a\rb +No match + +/^a.b/newline=any +\= Expect no match + a\nb +No match + a\rb +No match + a\x85b +No match + +/^abc./gmx,newline=any + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 JUNK + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc7 + +/abc.$/gmx,newline=any + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7 abc9 + 0: abc1 + 0: abc2 + 0: abc3 + 0: abc4 + 0: abc5 + 0: abc6 + 0: abc9 + +/^a\Rb/bsr=unicode + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b +\= Expect no match + a\n\rb +No match + +/^a\R*b/bsr=unicode + ab + 0: ab + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b + a\n\rb + 0: a\x0a\x0db + a\n\r\x85\x0cb + 0: a\x0a\x0d\x85\x0cb + +/^a\R+b/bsr=unicode + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + a\r\nb + 0: a\x0d\x0ab + a\x0bb + 0: a\x0bb + a\x0cb + 0: a\x0cb + a\x85b + 0: a\x85b + a\n\rb + 0: a\x0a\x0db + a\n\r\x85\x0cb + 0: a\x0a\x0d\x85\x0cb +\= Expect no match + ab +No match + +/^a\R{1,3}b/bsr=unicode + a\nb + 0: a\x0ab + a\n\rb + 0: a\x0a\x0db + a\n\r\x85b + 0: a\x0a\x0d\x85b + a\r\n\r\nb + 0: a\x0d\x0a\x0d\x0ab + a\r\n\r\n\r\nb + 0: a\x0d\x0a\x0d\x0a\x0d\x0ab + a\n\r\n\rb + 0: a\x0a\x0d\x0a\x0db + a\n\n\r\nb + 0: a\x0a\x0a\x0d\x0ab +\= Expect no match + a\n\n\n\rb +No match + a\r +No match + +/(?&abc)X(?P)/I +Capture group count = 1 +Named capture groups: + abc 1 +Last code unit = 'P' +Subject length lower bound = 3 + abcPXP123 + 0: PXP + 1: P + +/(?1)X(?P)/I +Capture group count = 1 +Named capture groups: + abc 1 +Last code unit = 'P' +Subject length lower bound = 3 + abcPXP123 + 0: PXP + 1: P + +/(?:a(?&abc)b)*(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + 123axbaxbaxb456 + 0: x + 1: x + +/(?:a(?&abc)b){1,5}(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + +/(?:a(?&abc)b){2,5}(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + +/(?:a(?&abc)b){2,}(?x)/ + 123axbaxbaxbx456 + 0: axbaxbaxbx + 1: x + +/(abc)(?i:(?1))/ + defabcabcxyz + 0: abcabc + 1: abc +\= Expect no match + DEFabcABCXYZ +No match + +/(abc)(?:(?i)(?1))/ + defabcabcxyz + 0: abcabc + 1: abc +\= Expect no match + DEFabcABCXYZ +No match + +/^(a)\g-2/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/^(a)\g/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/^(a)\g{0}/ +Failed: error 115 at offset 9: reference to non-existent subpattern + +/^(a)\g{3/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/^(a)\g{aa}/ +Failed: error 115 at offset 7: reference to non-existent subpattern + +/^a.b/newline=lf + a\rb + 0: a\x0db +\= Expect no match + a\nb +No match + +/.+foo/ + afoo + 0: afoo +\= Expect no match + \r\nfoo +No match + \nfoo +No match + +/.+foo/newline=crlf + afoo + 0: afoo + \nfoo + 0: \x0afoo +\= Expect no match + \r\nfoo +No match + +/.+foo/newline=any + afoo + 0: afoo +\= Expect no match + \nfoo +No match + \r\nfoo +No match + +/.+foo/s + afoo + 0: afoo + \r\nfoo + 0: \x0d\x0afoo + \nfoo + 0: \x0afoo + +/^$/gm,newline=any + abc\r\rxyz + 0: + abc\n\rxyz + 0: +\= Expect no match + abc\r\nxyz +No match + +/(?m)^$/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a + +/(?m)^$|^\r\n/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a + 0: \x0d\x0a + 0+ + +/(?m)$/g,newline=any,aftertext + abc\r\n\r\n + 0: + 0+ \x0d\x0a\x0d\x0a + 0: + 0+ \x0d\x0a + 0: + 0+ + +/abc.$/gmx,newline=anycrlf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc9 + 0: abc1 + 0: abc4 + 0: abc5 + 0: abc9 + +/^X/m + XABC + 0: X +\= Expect no match + XABC\=notbol +No match + +/(ab|c)(?-1)/B +------------------------------------------------------------------ + Bra + CBra 1 + ab + Alt + c + Ket + Recurse + Ket + End +------------------------------------------------------------------ + abc + 0: abc + 1: ab + +/xy(?+1)(abc)/B +------------------------------------------------------------------ + Bra + xy + Recurse + CBra 1 + abc + Ket + Ket + End +------------------------------------------------------------------ + xyabcabc + 0: xyabcabc + 1: abc +\= Expect no match + xyabc +No match + +/x(?-0)y/ +Failed: error 126 at offset 5: a relative value of zero is not allowed + +/x(?-1)y/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/x(?+0)y/ +Failed: error 126 at offset 5: a relative value of zero is not allowed + +/x(?+1)y/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/^(abc)?(?(-1)X|Y)/B +------------------------------------------------------------------ + Bra + ^ + Brazero + CBra 1 + abc + Ket + Cond + 1 Capture ref + X + Alt + Y + Ket + Ket + End +------------------------------------------------------------------ + abcX + 0: abcX + 1: abc + Y + 0: Y +\= Expect no match + abcY +No match + +/^((?(+1)X|Y)(abc))+/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + Cond + 2 Capture ref + X + Alt + Y + Ket + CBra 2 + abc + Ket + KetRmax + Ket + End +------------------------------------------------------------------ + YabcXabc + 0: YabcXabc + 1: Xabc + 2: abc + YabcXabcXabc + 0: YabcXabcXabc + 1: Xabc + 2: abc +\= Expect no match + XabcXabc +No match + +/(?(-1)a)/B +Failed: error 115 at offset 5: reference to non-existent subpattern + +/((?(-1)a))/B +------------------------------------------------------------------ + Bra + CBra 1 + Cond + 1 Capture ref + a + Ket + Ket + Ket + End +------------------------------------------------------------------ + +/((?(-2)a))/B +Failed: error 115 at offset 6: reference to non-existent subpattern + +/^(?(+1)X|Y)(.)/B +------------------------------------------------------------------ + Bra + ^ + Cond + 1 Capture ref + X + Alt + Y + Ket + CBra 1 + Any + Ket + Ket + End +------------------------------------------------------------------ + Y! + 0: Y! + 1: ! + +/(?tom|bon)-\k{A}/ + tom-tom + 0: tom-tom + 1: tom + bon-bon + 0: bon-bon + 1: bon +\= Expect no match + tom-bon +No match + +/\g{A/ +Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) + +/(?|(abc)|(xyz))/B +------------------------------------------------------------------ + Bra + Bra + CBra 1 + abc + Ket + Alt + CBra 1 + xyz + Ket + Ket + Ket + End +------------------------------------------------------------------ + >abc< + 0: abc + 1: abc + >xyz< + 0: xyz + 1: xyz + +/(x)(?|(abc)|(xyz))(x)/B +------------------------------------------------------------------ + Bra + CBra 1 + x + Ket + Bra + CBra 2 + abc + Ket + Alt + CBra 2 + xyz + Ket + Ket + CBra 3 + x + Ket + Ket + End +------------------------------------------------------------------ + xabcx + 0: xabcx + 1: x + 2: abc + 3: x + xxyzx + 0: xxyzx + 1: x + 2: xyz + 3: x + +/(x)(?|(abc)(pqr)|(xyz))(x)/B +------------------------------------------------------------------ + Bra + CBra 1 + x + Ket + Bra + CBra 2 + abc + Ket + CBra 3 + pqr + Ket + Alt + CBra 2 + xyz + Ket + Ket + CBra 4 + x + Ket + Ket + End +------------------------------------------------------------------ + xabcpqrx + 0: xabcpqrx + 1: x + 2: abc + 3: pqr + 4: x + xxyzx + 0: xxyzx + 1: x + 2: xyz + 3: + 4: x + +/\H++X/B +------------------------------------------------------------------ + Bra + \H++ + X + Ket + End +------------------------------------------------------------------ +\= Expect no match + XXXX +No match + +/\H+\hY/B +------------------------------------------------------------------ + Bra + \H++ + \h + Y + Ket + End +------------------------------------------------------------------ + XXXX Y + 0: XXXX Y + +/\H+ Y/B +------------------------------------------------------------------ + Bra + \H++ + Y + Ket + End +------------------------------------------------------------------ + +/\h+A/B +------------------------------------------------------------------ + Bra + \h++ + A + Ket + End +------------------------------------------------------------------ + +/\v*B/B +------------------------------------------------------------------ + Bra + \v*+ + B + Ket + End +------------------------------------------------------------------ + +/\V+\x0a/B +------------------------------------------------------------------ + Bra + \V++ + \x0a + Ket + End +------------------------------------------------------------------ + +/A+\h/B +------------------------------------------------------------------ + Bra + A++ + \h + Ket + End +------------------------------------------------------------------ + +/ *\H/B +------------------------------------------------------------------ + Bra + *+ + \H + Ket + End +------------------------------------------------------------------ + +/A*\v/B +------------------------------------------------------------------ + Bra + A*+ + \v + Ket + End +------------------------------------------------------------------ + +/\x0b*\V/B +------------------------------------------------------------------ + Bra + \x0b*+ + \V + Ket + End +------------------------------------------------------------------ + +/\d+\h/B +------------------------------------------------------------------ + Bra + \d++ + \h + Ket + End +------------------------------------------------------------------ + +/\d*\v/B +------------------------------------------------------------------ + Bra + \d*+ + \v + Ket + End +------------------------------------------------------------------ + +/S+\h\S+\v/B +------------------------------------------------------------------ + Bra + S++ + \h + \S++ + \v + Ket + End +------------------------------------------------------------------ + +/\w{3,}\h\w+\v/B +------------------------------------------------------------------ + Bra + \w{3} + \w*+ + \h + \w++ + \v + Ket + End +------------------------------------------------------------------ + +/\h+\d\h+\w\h+\S\h+\H/B +------------------------------------------------------------------ + Bra + \h++ + \d + \h++ + \w + \h++ + \S + \h++ + \H + Ket + End +------------------------------------------------------------------ + +/\v+\d\v+\w\v+\S\v+\V/B +------------------------------------------------------------------ + Bra + \v++ + \d + \v++ + \w + \v++ + \S + \v++ + \V + Ket + End +------------------------------------------------------------------ + +/\H+\h\H+\d/B +------------------------------------------------------------------ + Bra + \H++ + \h + \H+ + \d + Ket + End +------------------------------------------------------------------ + +/\V+\v\V+\w/B +------------------------------------------------------------------ + Bra + \V++ + \v + \V+ + \w + Ket + End +------------------------------------------------------------------ + +/\( (?: [^()]* | (?R) )* \)/x +(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(00)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)\=jitstack=1024 + 0: (0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(00)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0) + +/[\E]AAA/ +Failed: error 106 at offset 7: missing terminating ] for character class + +/[\Q\E]AAA/ +Failed: error 106 at offset 9: missing terminating ] for character class + +/[^\E]AAA/ +Failed: error 106 at offset 8: missing terminating ] for character class + +/[^\Q\E]AAA/ +Failed: error 106 at offset 10: missing terminating ] for character class + +/[\E^]AAA/ +Failed: error 106 at offset 8: missing terminating ] for character class + +/[\Q\E^]AAA/ +Failed: error 106 at offset 10: missing terminating ] for character class + +/A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/B +------------------------------------------------------------------ + Bra + A + *PRUNE + B + *SKIP + C + *THEN + D + *COMMIT + E + *FAIL + F + *FAIL + G + *FAIL + H + *ACCEPT + I + Ket + End +------------------------------------------------------------------ + +/^a+(*FAIL)/auto_callout +\= Expect no match + aaaaaa +--->aaaaaa + +0 ^ ^ + +1 ^ a+ + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^ ^ (*FAIL) + +3 ^^ (*FAIL) +No match + +/a+b?c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ c+ + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ c+ + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^^ b? + +4 ^ ^ c+ + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) + +6 ^ ^ (*FAIL) +No match + +/a+b?(*PRUNE)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*PRUNE) ++12 ^ ^ c+ ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*PRUNE) ++12 ^ ^ c+ ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^^ b? + +4 ^ ^ (*PRUNE) ++12 ^ ^ c+ ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) ++14 ^ ^ (*FAIL) +No match + +/a+b?(*COMMIT)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*COMMIT) ++13 ^ ^ c+ ++15 ^ ^ (*FAIL) ++15 ^ ^ (*FAIL) ++15 ^ ^ (*FAIL) +No match + +/a+b?(*SKIP)c+(*FAIL)/auto_callout +\= Expect no match + aaabcccaaabccc +--->aaabcccaaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*SKIP) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*SKIP) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) +No match + +/a+b?(*THEN)c+(*FAIL)/auto_callout +\= Expect no match + aaabccc +--->aaabccc + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*THEN) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^ ^ b? + +4 ^ ^ (*THEN) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) + +0 ^ a+ + +2 ^^ b? + +4 ^ ^ (*THEN) ++11 ^ ^ c+ ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) ++13 ^ ^ (*FAIL) +No match + +/a(*MARK)b/ +Failed: error 166 at offset 7: (*MARK) must have an argument + +/\g6666666666/ +Failed: error 161 at offset 12: subpattern number is too big + +/[\g6666666666]/B +------------------------------------------------------------------ + Bra + [6g] + Ket + End +------------------------------------------------------------------ + +/(?1)\c[/ +Failed: error 115 at offset 3: reference to non-existent subpattern + +/.+A/newline=crlf +\= Expect no match + \r\nA +No match + +/\nA/newline=crlf + \r\nA + 0: \x0aA + +/[\r\n]A/newline=crlf + \r\nA + 0: \x0aA + +/(\r|\n)A/newline=crlf + \r\nA + 0: \x0aA + 1: \x0a + +/a(*CR)b/ +Failed: error 160 at offset 5: (*VERB) not recognized or malformed + +/(*CR)a.b/ + a\nb + 0: a\x0ab +\= Expect no match + a\rb +No match + +/(*CR)a.b/newline=lf + a\nb + 0: a\x0ab +\= Expect no match + a\rb +No match + +/(*LF)a.b/newline=CRLF + a\rb + 0: a\x0db +\= Expect no match + a\nb +No match + +/(*CRLF)a.b/ + a\rb + 0: a\x0db + a\nb + 0: a\x0ab +\= Expect no match + a\r\nb +No match + +/(*ANYCRLF)a.b/newline=CR +\= Expect no match + a\rb +No match + a\nb +No match + a\r\nb +No match + +/(*ANY)a.b/newline=cr +\= Expect no match + a\rb +No match + a\nb +No match + a\r\nb +No match + a\x85b +No match + +/(*ANY).*/g + abc\r\ndef + 0: abc + 0: + 0: def + 0: + +/(*ANYCRLF).*/g + abc\r\ndef + 0: abc + 0: + 0: def + 0: + +/(*CRLF).*/g + abc\r\ndef + 0: abc + 0: + 0: def + 0: + +/(*NUL)^.*/ + a\nb\x00ccc + 0: a\x0ab + +/(*NUL)^.*/s + a\nb\x00ccc + 0: a\x0ab\x00ccc + +/^x/m,newline=NUL + ab\x00xy + 0: x + +/'#comment' 0d 0a 00 '^x\' 0a 'y'/x,newline=nul,hex + x\nyz + 0: x\x0ay + +/(*NUL)^X\NY/ + X\nY + 0: X\x0aY + X\rY + 0: X\x0dY +\= Expect no match + X\x00Y +No match + +/a\Rb/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab +\= Expect no match + a\x85b +No match + a\x0bb +No match + +/a\Rb/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab + a\x85b + 0: a\x85b + a\x0bb + 0: a\x0bb + +/a\R?b/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab +\= Expect no match + a\x85b +No match + a\x0bb +No match + +/a\R?b/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + a\rb + 0: a\x0db + a\nb + 0: a\x0ab + a\r\nb + 0: a\x0d\x0ab + a\x85b + 0: a\x85b + a\x0bb + 0: a\x0bb + +/a\R{2,4}b/I,bsr=anycrlf +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 4 + a\r\n\nb + 0: a\x0d\x0a\x0ab + a\n\r\rb + 0: a\x0a\x0d\x0db + a\r\n\r\n\r\n\r\nb + 0: a\x0d\x0a\x0d\x0a\x0d\x0a\x0d\x0ab +\= Expect no match + a\x85\x85b +No match + a\x0b\x0bb +No match + +/a\R{2,4}b/I,bsr=unicode +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 4 + a\r\rb + 0: a\x0d\x0db + a\n\n\nb + 0: a\x0a\x0a\x0ab + a\r\n\n\r\rb + 0: a\x0d\x0a\x0a\x0d\x0db + a\x85\x85b + 0: a\x85\x85b + a\x0b\x0bb + 0: a\x0b\x0bb +\= Expect no match + a\r\r\r\r\rb +No match + +/(*BSR_ANYCRLF)a\Rb/I +Capture group count = 0 +\R matches CR, LF, or CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + +/(*BSR_UNICODE)a\Rb/I +Capture group count = 0 +\R matches any Unicode newline +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\x85b + 0: a\x85b + +/(*BSR_ANYCRLF)(*CRLF)a\Rb/I +Capture group count = 0 +\R matches CR, LF, or CRLF +Forced newline is CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\nb + 0: a\x0ab + a\rb + 0: a\x0db + +/(*CRLF)(*BSR_UNICODE)a\Rb/I +Capture group count = 0 +\R matches any Unicode newline +Forced newline is CRLF +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 3 + a\x85b + 0: a\x85b + +/(*CRLF)(*BSR_ANYCRLF)(*CR)ab/I +Capture group count = 0 +\R matches CR, LF, or CRLF +Forced newline is CR +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + +/(?)(?&)/ +Failed: error 162 at offset 9: subpattern name expected + +/(?)(?&a)/ +Failed: error 115 at offset 11: reference to non-existent subpattern + +/(?)(?&aaaaaaaaaaaaaaaaaaaaaaa)/ +Failed: error 115 at offset 9: reference to non-existent subpattern + +/(?+-a)/ +Failed: error 129 at offset 2: digit expected after (?+ or (?- + +/(?-+a)/ +Failed: error 111 at offset 3: unrecognized character after (? or (?- + +/(?(-1))/ +Failed: error 115 at offset 5: reference to non-existent subpattern + +/(?(+10))/ +Failed: error 115 at offset 4: reference to non-existent subpattern + +/(?(10))/ +Failed: error 115 at offset 3: reference to non-existent subpattern + +/(?(+2))()()/ + +/(?(2))()()/ + +/\k''/ +Failed: error 162 at offset 3: subpattern name expected + +/\k<>/ +Failed: error 162 at offset 3: subpattern name expected + +/\k{}/ +Failed: error 162 at offset 3: subpattern name expected + +/\k/ +Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name + +/\kabc/ +Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name + +/(?P=)/ +Failed: error 162 at offset 4: subpattern name expected + +/(?P>)/ +Failed: error 162 at offset 4: subpattern name expected + +/[[:foo:]]/ +Failed: error 130 at offset 8: unknown POSIX class name + +/[[:1234:]]/ +Failed: error 130 at offset 9: unknown POSIX class name + +/[[:f\oo:]]/ +Failed: error 130 at offset 9: unknown POSIX class name + +/[[: :]]/ +Failed: error 130 at offset 6: unknown POSIX class name + +/[[:...:]]/ +Failed: error 130 at offset 8: unknown POSIX class name + +/[[:l\ower:]]/ +Failed: error 130 at offset 11: unknown POSIX class name + +/[[:abc\:]]/ +Failed: error 130 at offset 9: unknown POSIX class name + +/[abc[:x\]pqr:]]/ +Failed: error 130 at offset 14: unknown POSIX class name + +/[[:a\dz:]]/ +Failed: error 130 at offset 9: unknown POSIX class name + +/(^(a|b\g<-1'c))/ +Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/^(?+1)(?x|y){0}z/ + xzxx + 0: xz + yzyy + 0: yz +\= Expect no match + xxz +No match + +/(\3)(\1)(a)/ +\= Expect no match + cat +No match + +/cat[]/B,allow_empty_class +------------------------------------------------------------------ + Bra + cat + [] + Ket + End +------------------------------------------------------------------ + cat\=ph +Partial match: cat + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + 0: a + 1: + 2: + 3: a + +/TA]/ + The ACTA] comes + 0: TA] + +/TA]/allow_empty_class,match_unset_backref,dupnames + The ACTA] comes + 0: TA] + +/(?2)[]a()b](abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abcbabc + +/(?2)[^]a()b](abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abcbabc + +/(?1)[]a()b](abc)/ + abcbabc + 0: abcbabc + 1: abc +\= Expect no match + abcXabc +No match + +/(?1)[^]a()b](abc)/ + abcXabc + 0: abcXabc + 1: abc +\= Expect no match + abcbabc +No match + +/(?2)[]a()b](abc)(xyz)/ + xyzbabcxyz + 0: xyzbabcxyz + 1: abc + 2: xyz + +/(?&N)[]a(?)](?abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abc)](abc)/ +Failed: error 115 at offset 3: reference to non-existent subpattern + abcadc + +0 ^ (? + +2 ^ (?= + +5 ^ .* + +7 ^ ^ b + +7 ^ ^ b + +7 ^^ b + +7 ^ b ++11 ^ ^ ++12 ^ ) ++13 ^ End of pattern + 0: + abc +--->abc + +0 ^ (? + +2 ^ (?= + +5 ^ .* + +7 ^ ^ b + +7 ^ ^ b + +7 ^^ b + +8 ^ ^ ) + +9 ^ b + +0 ^ (? + +2 ^ (?= + +5 ^ .* + +7 ^ ^ b + +7 ^^ b + +7 ^ b + +8 ^^ ) + +9 ^ b ++10 ^^ | ++13 ^^ End of pattern + 0: b + +/(?(?=b).*b|^d)/I +Capture group count = 0 +Subject length lower bound = 1 + +/(?(?=.*b).*b|^d)/I +Capture group count = 0 +Subject length lower bound = 1 + +/xyz/auto_callout + xyz +--->xyz + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz + abcxyz +--->abcxyz + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz +\= Expect no match + abc +No match + abcxypqr +No match + +/xyz/auto_callout,no_start_optimize + abcxyz +--->abcxyz + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz +\= Expect no match + abc +--->abc + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x +No match + abcxypqr +--->abcxypqr + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x +No match + +/(*NO_START_OPT)xyz/auto_callout + abcxyz +--->abcxyz ++15 ^ x ++15 ^ x ++15 ^ x ++15 ^ x ++16 ^^ y ++17 ^ ^ z ++18 ^ ^ End of pattern + 0: xyz + +/(*NO_AUTO_POSSESS)a+b/B +------------------------------------------------------------------ + Bra + a+ + b + Ket + End +------------------------------------------------------------------ + +/xyz/auto_callout,no_start_optimize + abcxyz +--->abcxyz + +0 ^ x + +0 ^ x + +0 ^ x + +0 ^ x + +1 ^^ y + +2 ^ ^ z + +3 ^ ^ End of pattern + 0: xyz + +/^"((?(?=[a])[^"])|b)*"$/auto_callout + "ab" +--->"ab" + +0 ^ ^ + +1 ^ " + +2 ^^ ( + +3 ^^ (? + +5 ^^ (?= + +8 ^^ [a] ++11 ^ ^ ) ++12 ^^ [^"] ++16 ^ ^ ) ++17 ^ ^ | + +3 ^ ^ (? + +5 ^ ^ (?= + +8 ^ ^ [a] ++17 ^ ^ | ++21 ^ ^ " ++18 ^ ^ b ++19 ^ ^ )* + +3 ^ ^ (? + +5 ^ ^ (?= + +8 ^ ^ [a] ++17 ^ ^ | ++21 ^ ^ " ++22 ^ ^ $ ++23 ^ ^ End of pattern + 0: "ab" + 1: + +/^"((?(?=[a])[^"])|b)*"$/ + "ab" + 0: "ab" + 1: + +/^X(?5)(a)(?|(b)|(q))(c)(d)Y/ +Failed: error 115 at offset 5: reference to non-existent subpattern + XYabcdY + +/^X(?&N)(a)(?|(b)|(q))(c)(d)(?Y)/ + XYabcdY + 0: XYabcdY + 1: a + 2: b + 3: c + 4: d + 5: Y + +/Xa{2,4}b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/Xa{2,4}?b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/Xa{2,4}+b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\d{2,4}b/ + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\d{2,4}?b/ + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\d{2,4}+b/ + X\=ps +Partial match: X + X3\=ps +Partial match: X3 + X33\=ps +Partial match: X33 + X333\=ps +Partial match: X333 + X3333\=ps +Partial match: X3333 + +/X\D{2,4}b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\D{2,4}?b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X\D{2,4}+b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}?b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[abc]{2,4}+b/ + X\=ps +Partial match: X + Xa\=ps +Partial match: Xa + Xaa\=ps +Partial match: Xaa + Xaaa\=ps +Partial match: Xaaa + Xaaaa\=ps +Partial match: Xaaaa + +/X[^a]{2,4}b/ + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/X[^a]{2,4}?b/ + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/X[^a]{2,4}+b/ + X\=ps +Partial match: X + Xz\=ps +Partial match: Xz + Xzz\=ps +Partial match: Xzz + Xzzz\=ps +Partial match: Xzzz + Xzzzz\=ps +Partial match: Xzzzz + +/(Y)X\1{2,4}b/ + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/(Y)X\1{2,4}?b/ + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/(Y)X\1{2,4}+b/ + YX\=ps +Partial match: YX + YXY\=ps +Partial match: YXY + YXYY\=ps +Partial match: YXYY + YXYYY\=ps +Partial match: YXYYY + YXYYYY\=ps +Partial match: YXYYYY + +/\++\KZ|\d+X|9+Y/startchar + ++++123999\=ps +Partial match: 123999 + ++++123999Y\=ps + 0: 999Y + ++++Z1234\=ps + 0: ++++Z + ^^^^ + +/Z(*F)/ +\= Expect no match + Z\=ps +No match + ZA\=ps +No match + +/Z(?!)/ +\= Expect no match + Z\=ps +No match + ZA\=ps +No match + +/dog(sbody)?/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/dog(sbody)??/ + dogs\=ps + 0: dog + dogs\=ph + 0: dog + +/dog|dogsbody/ + dogs\=ps + 0: dog + dogs\=ph + 0: dog + +/dogsbody|dog/ + dogs\=ps + 0: dog + dogs\=ph +Partial match: dogs + +/\bthe cat\b/ + the cat\=ps + 0: the cat + the cat\=ph +Partial match: the cat + +/abc/ + abc\=ps + 0: abc + abc\=ph + 0: abc + +/abc\K123/startchar + xyzabc123pqr + 0: abc123 + ^^^ + xyzabc12\=ps +Partial match: abc12 + xyzabc12\=ph +Partial match: abc12 + +/(?<=abc)123/ + xyzabc123pqr + 0: 123 + xyzabc12\=ps +Partial match: 12 + xyzabc12\=ph +Partial match: 12 + +/\babc\b/ + +++abc+++ + 0: abc + +++ab\=ps +Partial match: ab + +++ab\=ph +Partial match: ab + +/(?&word)(?&element)(?(DEFINE)(?<[^m][^>]>[^<])(?\w*+))/B +------------------------------------------------------------------ + Bra + Recurse + Recurse + Cond + Cond false + CBra 1 + < + [^m] (not) + [^>] (not) + > + [^<] (not) + Ket + CBra 2 + \w*+ + Ket + Ket + Ket + End +------------------------------------------------------------------ + +/(?&word)(?&element)(?(DEFINE)(?<[^\d][^>]>[^<])(?\w*+))/B +------------------------------------------------------------------ + Bra + Recurse + Recurse + Cond + Cond false + CBra 1 + < + [^0-9] + [^>] (not) + > + [^<] (not) + Ket + CBra 2 + \w*+ + Ket + Ket + Ket + End +------------------------------------------------------------------ + +/(ab)(x(y)z(cd(*ACCEPT)))pq/B +------------------------------------------------------------------ + Bra + CBra 1 + ab + Ket + CBra 2 + x + CBra 3 + y + Ket + z + CBra 4 + cd + Close 4 + Close 2 + *ACCEPT + Ket + Ket + pq + Ket + End +------------------------------------------------------------------ + +/abc\K/aftertext,startchar + abcdef + 0: abc + ^^^ + 0+ def + abcdef\=notempty_atstart + 0: abc + ^^^ + 0+ def + xyzabcdef\=notempty_atstart + 0: abc + ^^^ + 0+ def +\= Expect no match + abcdef\=notempty +No match + xyzabcdef\=notempty +No match + +/^(?:(?=abc)|abc\K)/aftertext,startchar + abcdef + 0: + 0+ abcdef + abcdef\=notempty_atstart + 0: abc + ^^^ + 0+ def +\= Expect no match + abcdef\=notempty +No match + +/a?b?/aftertext + xyz + 0: + 0+ xyz + xyzabc + 0: + 0+ xyzabc + xyzabc\=notempty + 0: ab + 0+ c + xyzabc\=notempty_atstart + 0: + 0+ yzabc + xyz\=notempty_atstart + 0: + 0+ yz +\= Expect no match + xyz\=notempty +No match + +/^a?b?/aftertext + xyz + 0: + 0+ xyz + xyzabc + 0: + 0+ xyzabc +\= Expect no match + xyzabc\=notempty +No match + xyzabc\=notempty_atstart +No match + xyz\=notempty_atstart +No match + xyz\=notempty +No match + +/^(?a|b\gc)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(?a|b\g'name'c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(a|b\g<1>c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(a|b\g'1'c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/^(a|b\g'-1'c)/ + aaaa + 0: a + 1: a + bacxxx + 0: bac + 1: bac + bbaccxxx + 0: bbacc + 1: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + +/(^(a|b\g<-1>c))/ + aaaa + 0: a + 1: a + 2: a + bacxxx + 0: bac + 1: bac + 2: bac + bbaccxxx + 0: bbacc + 1: bbacc + 2: bbacc + bbbacccxx + 0: bbbaccc + 1: bbbaccc + 2: bbbaccc + +/(?-i:\g)(?i:(?a))/ + XaaX + 0: aa + 1: a + XAAX + 0: AA + 1: A + +/(?i:\g)(?-i:(?a))/ + XaaX + 0: aa + 1: a +\= Expect no match + XAAX +No match + +/(?-i:\g<+1>)(?i:(a))/ + XaaX + 0: aa + 1: a + XAAX + 0: AA + 1: A + +/(?=(?(?#simplesyntax)\$(?[a-zA-Z_\x{7f}-\x{ff}][a-zA-Z0-9_\x{7f}-\x{ff}]*)(?:\[(?[a-zA-Z0-9_\x{7f}-\x{ff}]+|\$\g)\]|->\g(\(.*?\))?)?|(?#simple syntax withbraces)\$\{(?:\g(?\[(?:\g|'(?:\\.|[^'\\])*'|"(?:\g|\\.|[^"\\])*")\])?|\g|\$\{\g\})\}|(?#complexsyntax)\{(?\$(?\g(\g*|\(.*?\))?)(?:->\g)*|\$\g|\$\{\g\})\}))\{/ + +/(?a|b|c)\g*/ + abc + 0: abc + 1: a + accccbbb + 0: accccbbb + 1: a + +/^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/ + XYabcdY + 0: XYabcdY + 1: a + 2: b + 3: + 4: + 5: c + 6: d + 7: Y + +/(?<=b(?1)|zzz)(a)/ + xbaax + 0: a + 1: a + xzzzax + 0: a + 1: a + +/(a)(?<=b\1)/ + +/(a)(?<=b+(?1))/ +Failed: error 125 at offset 3: length of lookbehind assertion is not limited + +/(a+)(?<=b(?1))/ +Failed: error 125 at offset 4: length of lookbehind assertion is not limited + +/(a(?<=b(?1)))/ +Failed: error 125 at offset 2: length of lookbehind assertion is not limited + +/(?<=b(?1))xyz/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/(?<=b(?1))xyz(b+)pqrstuvew/ +Failed: error 125 at offset 0: length of lookbehind assertion is not limited + +/(a|bc)\1/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|bc)\1{2,3}/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 3 + +/(a|bc)(?1)/I +Capture group count = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|b\1)(a|b\1)/I +Capture group count = 2 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|b\1){2}/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|bbbb\1)(a|bbbb\1)/I +Capture group count = 2 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/(a|bbbb\1){2}/I +Capture group count = 1 +Max back reference = 1 +Starting code units: a b +Subject length lower bound = 2 + +/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'F' +Last code unit = ':' +Subject length lower bound = 22 + +/]{0,})>]{0,})>([\d]{0,}\.)(.*)((
([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/Iis +Capture group count = 11 +Options: caseless dotall +First code unit = '<' +Last code unit = '>' +Subject length lower bound = 47 + +"(?>.*/)foo"I +Capture group count = 0 +Last code unit = 'o' +Subject length lower bound = 4 + +/(?(?=[^a-z]+[a-z]) \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) /Ix +Capture group count = 0 +Options: extended +Last code unit = '-' +Subject length lower bound = 8 + +/(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/Ii +Capture group count = 1 +Options: caseless +Starting code units: A B C a b c +Subject length lower bound = 1 + +/(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/I +Capture group count = 0 +Starting code units: c d +Last code unit = 'b' +Subject length lower bound = 41 + +/A)|(?
B))/I +Capture group count = 1 +Named capture groups: + a 1 +Starting code units: A B +Subject length lower bound = 1 + AB\=copy=a + 0: A + 1: A + C A (1) a (group 1) + BA\=copy=a + 0: B + 1: B + C B (1) a (group 1) + +/(?|(?A)|(?B))/ +Failed: error 165 at offset 16: different names for subpatterns of the same number are not allowed + +/(?:a(? (?')|(?")) | + b(? (?')|(?")) ) + (?('quote')[a-z]+|[0-9]+)/Ix,dupnames +Capture group count = 6 +Max back reference = 4 +Named capture groups: + apostrophe 2 + apostrophe 5 + quote 1 + quote 4 + realquote 3 + realquote 6 +Options: dupnames extended +Starting code units: a b +Subject length lower bound = 3 + a"aaaaa + 0: a"aaaaa + 1: " + 2: + 3: " + b"aaaaa + 0: b"aaaaa + 1: + 2: + 3: + 4: " + 5: + 6: " +\= Expect no match + b"11111 +No match + a"11111 +No match + +/(?:a(?[0-5])|b(?[4-7]))c(?()d|e)/B,dupnames +------------------------------------------------------------------ + Bra + Bra + a + CBra 1 + [0-5] + Ket + Alt + b + CBra 2 + [4-7] + Ket + Ket + c + Cond + Capture ref 2 + d + Alt + e + Ket + Ket + End +------------------------------------------------------------------ + a4cd + 0: a4cd + 1: 4 + b4cd + 0: b4cd + 1: + 2: 4 +\= Expect no match + a6cd +No match + a6ce +No match + +/^(?|(a)(b)(c)(?d)|(?e)) (?('D')X|Y)/IBx,dupnames +------------------------------------------------------------------ + Bra + ^ + Bra + CBra 1 + a + Ket + CBra 2 + b + Ket + CBra 3 + c + Ket + CBra 4 + d + Ket + Alt + CBra 1 + e + Ket + Ket + Cond + Capture ref 2 + X + Alt + Y + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 4 +Max back reference = 4 +Named capture groups: + D 4 + D 1 +Compile options: dupnames extended +Overall options: anchored dupnames extended +Starting code units: a e +Subject length lower bound = 2 + abcdX + 0: abcdX + 1: a + 2: b + 3: c + 4: d + eX + 0: eX + 1: e +\= Expect no match + abcdY +No match + ey +No match + +/(?a) (b)(c) (?d (?(R&A)$ | (?4)) )/IBx,dupnames +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + CBra 2 + b + Ket + CBra 3 + c + Ket + CBra 4 + d + Cond + Cond recurse 2 + $ + Alt + Recurse + Ket + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 4 +Max back reference = 4 +Named capture groups: + A 1 + A 4 +Options: dupnames extended +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 + abcdd + 0: abcdd + 1: a + 2: b + 3: c + 4: dd +\= Expect no match + abcdde +No match + +/abcd*/ + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + +/abcd*/i + xxxxabcd\=ps + 0: abcd + xxxxabcd\=ph +Partial match: abcd + XXXXABCD\=ps + 0: ABCD + XXXXABCD\=ph +Partial match: ABCD + +/abc\d*/ + xxxxabc1\=ps + 0: abc1 + xxxxabc1\=ph +Partial match: abc1 + +/(a)bc\1*/ + xxxxabca\=ps + 0: abca + 1: a + xxxxabca\=ph +Partial match: abca + +/abc[de]*/ + xxxxabcde\=ps + 0: abcde + xxxxabcde\=ph +Partial match: abcde + +/(\3)(\1)(a)/allow_empty_class,match_unset_backref,dupnames + cat + 0: a + 1: + 2: + 3: a + +/(\3)(\1)(a)/I,allow_empty_class,match_unset_backref,dupnames +Capture group count = 3 +Max back reference = 3 +Options: allow_empty_class dupnames match_unset_backref +Last code unit = 'a' +Subject length lower bound = 1 + cat + 0: a + 1: + 2: + 3: a + +/(\3)(\1)(a)/I +Capture group count = 3 +Max back reference = 3 +Last code unit = 'a' +Subject length lower bound = 3 +\= Expect no match + cat +No match + +/i(?(DEFINE)(?a))/I +Capture group count = 1 +Named capture groups: + s 1 +First code unit = 'i' +Subject length lower bound = 1 + i + 0: i + +/()i(?(1)a)/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'i' +Subject length lower bound = 1 + ia + 0: ia + 1: + +/(?i)a(?-i)b|c/B +------------------------------------------------------------------ + Bra + /i a + b + Alt + c + Ket + End +------------------------------------------------------------------ + XabX + 0: ab + XAbX + 0: Ab + CcC + 0: c +\= Expect no match + XABX +No match + +/(?i)a(?s)b|c/B +------------------------------------------------------------------ + Bra + /i ab + Alt + /i c + Ket + End +------------------------------------------------------------------ + +/(?i)a(?s-i)b|c/B +------------------------------------------------------------------ + Bra + /i a + b + Alt + c + Ket + End +------------------------------------------------------------------ + +/^(ab(c\1)d|x){2}$/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + ab + CBra 2 + c + \1 + Ket + d + Alt + x + Ket + CBra 1 + ab + CBra 2 + c + \1 + Ket + d + Alt + x + Ket + $ + Ket + End +------------------------------------------------------------------ + xabcxd + 0: xabcxd + 1: abcxd + 2: cx + +/^(?&t)*+(?(DEFINE)(?.))$/B +------------------------------------------------------------------ + Bra + ^ + Braposzero + SBraPos + Recurse + KetRpos + Cond + Cond false + CBra 1 + Any + Ket + Ket + $ + Ket + End +------------------------------------------------------------------ + +/^(?&t)*(?(DEFINE)(?.))$/B +------------------------------------------------------------------ + Bra + ^ + Brazero + SBra + Recurse + KetRmax + Cond + Cond false + CBra 1 + Any + Ket + Ket + $ + Ket + End +------------------------------------------------------------------ + +# This one is here because Perl gives the match as "b" rather than "ab". I +# believe this to be a Perl bug. + +/(?>a\Kb)z|(ab)/ + ab\=startchar + 0: ab + 1: ab + +/(?P(?P0|)|(?P>L2)(?P>L1))/ + abcd + 0: + 1: + 2: + 0abc + 0: 0 + 1: 0 + 2: 0 + +/abc(*MARK:)pqr/ +Failed: error 166 at offset 10: (*MARK) must have an argument + +/abc(*:)pqr/ +Failed: error 166 at offset 6: (*MARK) must have an argument + +/(*COMMIT:X)/B +------------------------------------------------------------------ + Bra + *COMMIT X + Ket + End +------------------------------------------------------------------ + +# This should, and does, fail. In Perl, it does not, which I think is a +# bug because replacing the B in the pattern by (B|D) does make it fail. +# Turning off Perl's optimization by inserting (??{""}) also makes it fail. + +/A(*COMMIT)B/aftertext,mark +\= Expect no match + ACABX +No match + +# These should be different, but in Perl they are not, which I think +# is a bug in Perl. + +/A(*THEN)B|A(*THEN)C/mark + AC + 0: AC + +/A(*PRUNE)B|A(*PRUNE)C/mark +\= Expect no match + AC +No match + +# Mark names can be duplicated. Perl doesn't give a mark for this one, +# though PCRE2 does. + +/^A(*:A)B|^X(*:A)Y/mark +\= Expect no match + XAQQ +No match, mark = A + +# COMMIT at the start of a pattern should be the same as an anchor. Perl +# optimizations defeat this. So does the PCRE2 optimization unless we disable +# it. + +/(*COMMIT)ABC/ + ABCDEFG + 0: ABC + +/(*COMMIT)ABC/no_start_optimize +\= Expect no match + DEFGABC +No match + +/^(ab (c+(*THEN)cd) | xyz)/x +\= Expect no match + abcccd +No match + +/^(ab (c+(*PRUNE)cd) | xyz)/x +\= Expect no match + abcccd +No match + +/^(ab (c+(*FAIL)cd) | xyz)/x +\= Expect no match + abcccd +No match + +# Perl gets some of these wrong + +/(?>.(*ACCEPT))*?5/ + abcde + 0: a + +/(.(*ACCEPT))*?5/ + abcde + 0: a + 1: a + +/(.(*ACCEPT))5/ + abcde + 0: a + 1: a + +/(.(*ACCEPT))*5/ + abcde + 0: a + 1: a + +/A\NB./B +------------------------------------------------------------------ + Bra + A + Any + B + Any + Ket + End +------------------------------------------------------------------ + ACBD + 0: ACBD +\= Expect no match + A\nB +No match + ACB\n +No match + +/A\NB./Bs +------------------------------------------------------------------ + Bra + A + Any + B + AllAny + Ket + End +------------------------------------------------------------------ + ACBD + 0: ACBD + ACB\n + 0: ACB\x0a +\= Expect no match + A\nB +No match + +/A\NB/newline=crlf + A\nB + 0: A\x0aB + A\rB + 0: A\x0dB +\= Expect no match + A\r\nB +No match + +/\R+b/B +------------------------------------------------------------------ + Bra + \R++ + b + Ket + End +------------------------------------------------------------------ + +/\R+\n/B +------------------------------------------------------------------ + Bra + \R+ + \x0a + Ket + End +------------------------------------------------------------------ + +/\R+\d/B +------------------------------------------------------------------ + Bra + \R++ + \d + Ket + End +------------------------------------------------------------------ + +/\d*\R/B +------------------------------------------------------------------ + Bra + \d*+ + \R + Ket + End +------------------------------------------------------------------ + +/\s*\R/B +------------------------------------------------------------------ + Bra + \s* + \R + Ket + End +------------------------------------------------------------------ + \x20\x0a + 0: \x0a + \x20\x0d + 0: \x0d + \x20\x0d\x0a + 0: \x0d\x0a + +/\S*\R/B +------------------------------------------------------------------ + Bra + \S*+ + \R + Ket + End +------------------------------------------------------------------ + a\x0a + 0: a\x0a + +/X\h*\R/B +------------------------------------------------------------------ + Bra + X + \h*+ + \R + Ket + End +------------------------------------------------------------------ + X\x20\x0a + 0: X \x0a + +/X\H*\R/B +------------------------------------------------------------------ + Bra + X + \H* + \R + Ket + End +------------------------------------------------------------------ + X\x0d\x0a + 0: X\x0d\x0a + +/X\H+\R/B +------------------------------------------------------------------ + Bra + X + \H+ + \R + Ket + End +------------------------------------------------------------------ + X\x0d\x0a + 0: X\x0d\x0a + +/X\H++\R/B +------------------------------------------------------------------ + Bra + X + \H++ + \R + Ket + End +------------------------------------------------------------------ +\= Expect no match + X\x0d\x0a +No match + +/(?<=abc)def/ + abc\=ph +Partial match: + +/abc$/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc$/m + abc + 0: abc + abc\n + 0: abc + abc\=ph +Partial match: abc + abc\n\=ph + 0: abc + abc\=ps + 0: abc + abc\n\=ps + 0: abc + +/abc\z/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\Z/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\b/ + abc + 0: abc + abc\=ps + 0: abc + abc\=ph +Partial match: abc + +/abc\B/ + abc\=ps +Partial match: abc + abc\=ph +Partial match: abc +\= Expect no match + abc +No match + +/.+/ +\= Bad offsets + abc\=offset=4 +Failed: error -33: bad offset value + abc\=offset=-4 +** Invalid value in "offset=-4" +\= Valid data + abc\=offset=0 + 0: abc + abc\=offset=1 + 0: bc + abc\=offset=2 + 0: c +\= Expect no match + abc\=offset=3 +No match + +/^\cÄ£/ +Failed: error 168 at offset 3: \c must be followed by a printable ASCII character + +/(?P(?P=abn)xxx)/B +------------------------------------------------------------------ + Bra + CBra 1 + \1 + xxx + Ket + Ket + End +------------------------------------------------------------------ + +/(a\1z)/B +------------------------------------------------------------------ + Bra + CBra 1 + a + \1 + z + Ket + Ket + End +------------------------------------------------------------------ + +/(?P(?P=abn)(?(?P=axn)xxx)/B +Failed: error 115 at offset 12: reference to non-existent subpattern + +/(?P(?P=axn)xxx)(?yy)/B +------------------------------------------------------------------ + Bra + CBra 1 + \2 + xxx + Ket + CBra 2 + yy + Ket + Ket + End +------------------------------------------------------------------ + +# These tests are here because Perl gets the first one wrong. + +/(\R*)(.)/s + \r\n + 0: \x0d + 1: + 2: \x0d + \r\r\n\n\r + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + \r\r\n\n\r\n + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + +/(\R)*(.)/s + \r\n + 0: \x0d + 1: + 2: \x0d + \r\r\n\n\r + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0a + 2: \x0d + \r\r\n\n\r\n + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0a + 2: \x0d + +/((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s + \r\n + 0: \x0d + 1: + 2: \x0d + \r\r\n\n\r + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + \r\r\n\n\r\n + 0: \x0d\x0d\x0a\x0a\x0d + 1: \x0d\x0d\x0a\x0a + 2: \x0d + +# ------------- + +/^abc$/B +------------------------------------------------------------------ + Bra + ^ + abc + $ + Ket + End +------------------------------------------------------------------ + +/^abc$/Bm +------------------------------------------------------------------ + Bra + /m ^ + abc + /m $ + Ket + End +------------------------------------------------------------------ + +/^(a)*+(\w)/ + aaaaX + 0: aaaaX + 1: a + 2: X +\= Expect no match + aaaa +No match + +/^(?:a)*+(\w)/ + aaaaX + 0: aaaaX + 1: X +\= Expect no match + aaaa +No match + +/(a)++1234/IB +------------------------------------------------------------------ + Bra + CBraPos 1 + a + KetRpos + 1234 + Ket + End +------------------------------------------------------------------ +Capture group count = 1 +First code unit = 'a' +Last code unit = '4' +Subject length lower bound = 5 + +/([abc])++1234/I +Capture group count = 1 +Starting code units: a b c +Last code unit = '4' +Subject length lower bound = 5 + +/(?<=(abc)+)X/ +Failed: error 125 at offset 0: length of lookbehind assertion is not limited + +/(^ab)/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(^ab)++/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(^ab|^)+/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(^ab|^)++/I +Capture group count = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(?:^ab)/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(?:^ab)++/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 2 + +/(?:^ab|^)+/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(?:^ab|^)++/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(.*ab)/I +Capture group count = 1 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(.*ab)++/I +Capture group count = 1 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(.*ab|.*)+/I +Capture group count = 1 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(.*ab|.*)++/I +Capture group count = 1 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(?:.*ab)/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(?:.*ab)++/I +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'b' +Subject length lower bound = 2 + +/(?:.*ab|.*)+/I +Capture group count = 0 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(?:.*ab|.*)++/I +Capture group count = 0 +May match empty string +First code unit at start or follows newline +Subject length lower bound = 0 + +/(?=a)[bcd]/I +Capture group count = 0 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=a))[bcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=a))+[bcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/((?=a))++[bcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/(?=a+)[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Subject length lower bound = 1 + +/(?=a+?)[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Subject length lower bound = 1 + +/(?=a++)[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'a' (caseless) +Subject length lower bound = 1 + +/(?=a{3})[bcd]/Ii +Capture group count = 0 +Options: caseless +First code unit = 'A' (caseless) +Subject length lower bound = 1 + +/(abc)\1+/ + +# Perl doesn't get these right IMO (the 3rd is PCRE2-specific) + +/(?1)(?:(b(*ACCEPT))){0}/ + b + 0: b + +/(?1)(?:(b(*ACCEPT))){0}c/ + bc + 0: bc +\= Expect no match + b +No match + +/(?1)(?:((*ACCEPT))){0}c/ + c + 0: c + c\=notempty + 0: c + +/^.*?(?(?=a)a|b(*THEN)c)/ +\= Expect no match + ba +No match + +/^.*?(?(?=a)a|bc)/ + ba + 0: ba + +/^.*?(?(?=a)a(*THEN)b|c)/ +\= Expect no match + ac +No match + +/^.*?(?(?=a)a(*THEN)b)c/ +\= Expect no match + ac +No match + +/^.*?(a(*THEN)b)c/ +\= Expect no match + aabc +No match + +/^.*? (?1) c (?(DEFINE)(a(*THEN)b))/x + aabc + 0: aabc + +/^.*?(a(*THEN)b|z)c/ + aabc + 0: aabc + 1: ab + +/^.*?(z|a(*THEN)b)c/ + aabc + 0: aabc + 1: ab + +# These are here because they are not Perl-compatible; the studying means the +# mark is not seen. + +/(*MARK:A)(*SKIP:B)(C|X)/mark + C + 0: C + 1: C +MK: A +\= Expect no match + D +No match, mark = A + +/(*:A)A+(*SKIP:A)(B|Z)/mark +\= Expect no match + AAAC +No match, mark = A + +# ---------------------------- + +"(?=a*(*ACCEPT)b)c" + c + 0: c + c\=notempty + 0: c + +/(?1)c(?(DEFINE)((*ACCEPT)b))/ + c + 0: c + c\=notempty + 0: c + +/(?>(*ACCEPT)b)c/ + c + 0: +\= Expect no match + c\=notempty +No match + +/(?:(?>(a)))+a%/allaftertext + %aa% + 0: aa% + 0+ + 1: a + 1+ a% + +/(a)b|ac/allaftertext + ac\=ovector=1 + 0: ac + 0+ + +/(a)(b)x|abc/allaftertext + abc\=ovector=2 + 0: abc + 0+ + +/(a)bc|(a)(b)\2/ + abc\=ovector=1 +Matched, but too many substrings + 0: abc + abc\=ovector=2 + 0: abc + 1: a + aba\=ovector=1 +Matched, but too many substrings + 0: aba + aba\=ovector=2 +Matched, but too many substrings + 0: aba + 1: + aba\=ovector=3 +Matched, but too many substrings + 0: aba + 1: + 2: a + aba\=ovector=4 + 0: aba + 1: + 2: a + 3: b + +/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I +Capture group count = 2 +Starting code units: a b +Subject length lower bound = 3 + +/(a(?2)|b)(b(?1)|a)(?1)(?2)/I +Capture group count = 2 +Starting code units: a b +Subject length lower bound = 4 + +/(abc)(?1)/I +Capture group count = 1 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 6 + +/(?:(foo)|(bar)|(baz))X/allcaptures + bazfooX + 0: fooX + 1: foo + 2: + 3: + foobazbarX + 0: barX + 1: + 2: bar + 3: + barfooX + 0: fooX + 1: foo + 2: + 3: + bazX + 0: bazX + 1: + 2: + 3: baz + foobarbazX + 0: bazX + 1: + 2: + 3: baz + bazfooX\=ovector=0 + 0: fooX + 1: foo + 2: + 3: + bazfooX\=ovector=1 +Matched, but too many substrings + 0: fooX + bazfooX\=ovector=2 + 0: fooX + 1: foo + bazfooX\=ovector=3 + 0: fooX + 1: foo + 2: + +/(?=abc){3}abc/B +------------------------------------------------------------------ + Bra + Assert + abc + Ket + Assert + abc + Ket + Assert + abc + Ket + abc + Ket + End +------------------------------------------------------------------ + +/(?=abc)+abc/B +------------------------------------------------------------------ + Bra + Assert + abc + Ket + Brazero + Assert + abc + Ket + abc + Ket + End +------------------------------------------------------------------ + +/(?=abc)++abc/B +------------------------------------------------------------------ + Bra + Once + Assert + abc + Ket + Brazero + Assert + abc + Ket + Ket + abc + Ket + End +------------------------------------------------------------------ + +/(?=abc){0}xyz/B +------------------------------------------------------------------ + Bra + Skip zero + Assert + abc + Ket + xyz + Ket + End +------------------------------------------------------------------ + +/(?=(a))?./B +------------------------------------------------------------------ + Bra + Brazero + Assert + CBra 1 + a + Ket + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?=(a))??./B +------------------------------------------------------------------ + Bra + Braminzero + Assert + CBra 1 + a + Ket + Ket + Any + Ket + End +------------------------------------------------------------------ + +/^(?=(a)){0}b(?1)/B +------------------------------------------------------------------ + Bra + ^ + Skip zero + Assert + CBra 1 + a + Ket + Ket + b + Recurse + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)(a))?b(?1)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a + Ket + Ket + b + Recurse + Ket + End +------------------------------------------------------------------ + +/^(?=(?1))?[az]([abc])d/B +------------------------------------------------------------------ + Bra + ^ + Brazero + Assert + Recurse + Ket + [az] + CBra 1 + [a-c] + Ket + d + Ket + End +------------------------------------------------------------------ + +/^(?!a){0}\w+/B +------------------------------------------------------------------ + Bra + ^ + Skip zero + Assert not + a + Ket + \w++ + Ket + End +------------------------------------------------------------------ + +/(?<=(abc))?xyz/B +------------------------------------------------------------------ + Bra + Brazero + Assert back + Reverse + CBra 1 + abc + Ket + Ket + xyz + Ket + End +------------------------------------------------------------------ + +/[:a[:abc]b:]/B +------------------------------------------------------------------ + Bra + [:[a-c] + b:] + Ket + End +------------------------------------------------------------------ + +/^(a(*:A)(d|e(*:B))z|aeq)/auto_callout + adz +--->adz + +0 ^ ^ + +1 ^ ( + +2 ^ a + +3 ^^ (*:A) + +8 ^^ ( +Latest Mark: A + +9 ^^ d ++10 ^ ^ | ++18 ^ ^ z ++19 ^ ^ | ++24 ^ ^ End of pattern + 0: adz + 1: adz + 2: d + aez +--->aez + +0 ^ ^ + +1 ^ ( + +2 ^ a + +3 ^^ (*:A) + +8 ^^ ( +Latest Mark: A + +9 ^^ d ++11 ^^ e ++12 ^ ^ (*:B) ++17 ^ ^ ) +Latest Mark: B ++18 ^ ^ z ++19 ^ ^ | ++24 ^ ^ End of pattern + 0: aez + 1: aez + 2: e + aeqwerty +--->aeqwerty + +0 ^ ^ + +1 ^ ( + +2 ^ a + +3 ^^ (*:A) + +8 ^^ ( +Latest Mark: A + +9 ^^ d ++11 ^^ e ++12 ^ ^ (*:B) ++17 ^ ^ ) +Latest Mark: B ++18 ^ ^ z ++20 ^ a ++21 ^^ e ++22 ^ ^ q ++23 ^ ^ ) ++24 ^ ^ End of pattern + 0: aeq + 1: aeq + +/.(*F)/ +\= Expect no match + abc\=ph +No match + +/\btype\b\W*?\btext\b\W*?\bjavascript\b/I +Capture group count = 0 +Max lookbehind = 1 +First code unit = 't' +Last code unit = 't' +Subject length lower bound = 18 + +/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|a+)(?>(z+))\w/B +------------------------------------------------------------------ + Bra + ^ + Once + a++ + Ket + Once + CBra 1 + z++ + Ket + Ket + \w + Ket + End +------------------------------------------------------------------ + aaaazzzzb + 0: aaaazzzzb + 1: zzzz +\= Expect no match + aazz +No match + +/(.)(\1|a(?2))/ + bab + 0: bab + 1: b + 2: ab + +/\1|(.)(?R)\1/ + cbbbc + 0: cbbbc + 1: c + +/(.)((?(1)c|a)|a(?2))/ +\= Expect no match + baa +No match + +/(?P(?P=abn)xxx)/B +------------------------------------------------------------------ + Bra + CBra 1 + \1 + xxx + Ket + Ket + End +------------------------------------------------------------------ + +/(a\1z)/B +------------------------------------------------------------------ + Bra + CBra 1 + a + \1 + z + Ket + Ket + End +------------------------------------------------------------------ + +/^a\x41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz +\= Expect no match + ax41z +No match + +/^a[m\x41]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz + +/^a\x1z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + ax1z + 0: ax1z + +/^a\u0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz +\= Expect no match + au0041z +No match + +/^a[m\u0041]z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aAz + 0: aAz + +/^a\u041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + au041z + 0: au041z +\= Expect no match + aAz +No match + +/^a\U0041z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aU0041z + 0: aU0041z +\= Expect no match + aAz +No match + +/^\u{7a}/alt_bsux + u{7a} + 0: u{7a} +\= Expect no match + zoo +No match + +/^\u{7a}/extra_alt_bsux + zoo + 0: z + +/\u{}/extra_alt_bsux + u{} + 0: u{} + +/\u{Q12}/extra_alt_bsux + --u{Q12}-- + 0: u{Q12} + +/\u{ 12}/extra_alt_bsux + --u{ 12}-- + 0: u{ 12} + +/\u{{3}}/extra_alt_bsux + --u{{{}-- + 0: u{{{} + +/(?(?=c)c|d)++Y/B +------------------------------------------------------------------ + Bra + BraPos + Cond + Assert + c + Ket + c + Alt + d + Ket + KetRpos + Y + Ket + End +------------------------------------------------------------------ + +/(?(?=c)c|d)*+Y/B +------------------------------------------------------------------ + Bra + Braposzero + BraPos + Cond + Assert + c + Ket + c + Alt + d + Ket + KetRpos + Y + Ket + End +------------------------------------------------------------------ + +/a[\NB]c/ +Failed: error 171 at offset 4: \N is not supported in a class + aNc + +/a[B-\Nc]/ +Failed: error 171 at offset 6: \N is not supported in a class + +/a[B\Nc]/ +Failed: error 171 at offset 5: \N is not supported in a class + +/(a)(?2){0,1999}?(b)/ + +/(a)(?(DEFINE)(b))(?2){0,1999}?(?2)/ + +# This test, with something more complicated than individual letters, causes +# different behaviour in Perl. Perhaps it disables some optimization; no tag is +# passed back for the failures, whereas in PCRE2 there is a tag. + +/(A|P)(*:A)(B|P) | (X|P)(X|P)(*:B)(Y|P)/x,mark + AABC + 0: AB + 1: A + 2: B +MK: A + XXYZ + 0: XXY + 1: + 2: + 3: X + 4: X + 5: Y +MK: B +\= Expect no match + XAQQ +No match, mark = A + XAQQXZZ +No match, mark = A + AXQQQ +No match, mark = A + AXXQQQ +No match, mark = B + +# Perl doesn't give marks for these, though it does if the alternatives are +# replaced by single letters. + +/(b|q)(*:m)f|a(*:n)w/mark + aw + 0: aw +MK: n +\= Expect no match + abc +No match, mark = m + +/(q|b)(*:m)f|a(*:n)w/mark + aw + 0: aw +MK: n +\= Expect no match + abc +No match, mark = m + +# After a partial match, the behaviour is as for a failure. + +/^a(*:X)bcde/mark + abc\=ps +Partial match, mark=X: abc + +# These are here because Perl doesn't return a mark, except for the first. + +/(?=(*:x))(q|)/aftertext,mark + abc + 0: + 0+ abc + 1: +MK: x + +/(?=(*:x))((*:y)q|)/aftertext,mark + abc + 0: + 0+ abc + 1: +MK: x + +/(?=(*:x))(?:(*:y)q|)/aftertext,mark + abc + 0: + 0+ abc +MK: x + +/(?=(*:x))(?>(*:y)q|)/aftertext,mark + abc + 0: + 0+ abc +MK: x + +/(?=a(*:x))(?!a(*:y)c)/aftertext,mark + ab + 0: + 0+ ab +MK: x + +/(?=a(*:x))(?=a(*:y)c|)/aftertext,mark + ab + 0: + 0+ ab +MK: x + +/(..)\1/ + ab\=ps +Partial match: ab + aba\=ps +Partial match: aba + abab\=ps + 0: abab + 1: ab + +/(..)\1/i + ab\=ps +Partial match: ab + abA\=ps +Partial match: abA + aBAb\=ps + 0: aBAb + 1: aB + +/(..)\1{2,}/ + ab\=ps +Partial match: ab + aba\=ps +Partial match: aba + abab\=ps +Partial match: abab + ababa\=ps +Partial match: ababa + ababab\=ps + 0: ababab + 1: ab + ababab\=ph +Partial match: ababab + abababa\=ps + 0: ababab + 1: ab + abababa\=ph +Partial match: abababa + +/(..)\1{2,}/i + ab\=ps +Partial match: ab + aBa\=ps +Partial match: aBa + aBAb\=ps +Partial match: aBAb + AbaBA\=ps +Partial match: AbaBA + abABAb\=ps + 0: abABAb + 1: ab + aBAbaB\=ph +Partial match: aBAbaB + abABabA\=ps + 0: abABab + 1: ab + abaBABa\=ph +Partial match: abaBABa + +/(..)\1{2,}?x/i + ab\=ps +Partial match: ab + abA\=ps +Partial match: abA + aBAb\=ps +Partial match: aBAb + abaBA\=ps +Partial match: abaBA + abAbaB\=ps +Partial match: abAbaB + abaBabA\=ps +Partial match: abaBabA + abAbABaBx\=ps + 0: abAbABaBx + 1: ab + +/^(..)\1/ + aba\=ps +Partial match: aba + +/^(..)\1{2,3}x/ + aba\=ps +Partial match: aba + ababa\=ps +Partial match: ababa + ababa\=ph +Partial match: ababa + abababx + 0: abababx + 1: ab + ababababx + 0: ababababx + 1: ab + +/^(..)\1{2,3}?x/ + aba\=ps +Partial match: aba + ababa\=ps +Partial match: ababa + ababa\=ph +Partial match: ababa + abababx + 0: abababx + 1: ab + ababababx + 0: ababababx + 1: ab + +/^(..)(\1{2,3})ab/ + abababab + 0: abababab + 1: ab + 2: abab + +/^\R/ + \r\=ps + 0: \x0d + \r\=ph +Partial match: \x0d + +/^\R{2,3}x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps +Partial match: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps +Partial match: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + \r\rx + 0: \x0d\x0dx + \r\r\rx + 0: \x0d\x0d\x0dx + +/^\R{2,3}?x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps +Partial match: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps +Partial match: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + \r\rx + 0: \x0d\x0dx + \r\r\rx + 0: \x0d\x0d\x0dx + +/^\R?x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + x + 0: x + \rx + 0: \x0dx + +/^\R+x/ + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\n\=ps +Partial match: \x0d\x0a + \r\n\=ph +Partial match: \x0d\x0a + \rx + 0: \x0dx + +/^a$/newline=crlf + a\r\=ps +Partial match: a\x0d + a\r\=ph +Partial match: a\x0d + +/^a$/m,newline=crlf + a\r\=ps +Partial match: a\x0d + a\r\=ph +Partial match: a\x0d + +/^(a$|a\r)/newline=crlf + a\r\=ps + 0: a\x0d + 1: a\x0d + a\r\=ph +Partial match: a\x0d + +/^(a$|a\r)/m,newline=crlf + a\r\=ps + 0: a\x0d + 1: a\x0d + a\r\=ph +Partial match: a\x0d + +/./newline=crlf + \r\=ps + 0: \x0d + \r\=ph +Partial match: \x0d + +/.{2,3}/newline=crlf + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps + 0: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps + 0: \x0d\x0d\x0d + \r\r\r\=ph +Partial match: \x0d\x0d\x0d + +/.{2,3}?/newline=crlf + \r\=ps +Partial match: \x0d + \r\=ph +Partial match: \x0d + \r\r\=ps + 0: \x0d\x0d + \r\r\=ph +Partial match: \x0d\x0d + \r\r\r\=ps + 0: \x0d\x0d + \r\r\r\=ph + 0: \x0d\x0d + +"AB(C(D))(E(F))?(?(?=\2)(?=\4))" + ABCDGHI\=ovector=01 +Matched, but too many substrings + 0: ABCD + +# These are all run as real matches in test 1; here we are just checking the +# settings of the anchored and startline bits. + +/(?>.*?a)(?<=ba)/I +Capture group count = 0 +Max lookbehind = 2 +Last code unit = 'a' +Subject length lower bound = 1 + +/(?:.*?a)(?<=ba)/I +Capture group count = 0 +Max lookbehind = 2 +First code unit at start or follows newline +Last code unit = 'a' +Subject length lower bound = 1 + +/.*?a(*PRUNE)b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 2 + +/.*?a(*PRUNE)b/Is +Capture group count = 0 +Options: dotall +Last code unit = 'b' +Subject length lower bound = 2 + +/^a(*PRUNE)b/Is +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +First code unit = 'a' +Subject length lower bound = 2 + +/.*?a(*SKIP)b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 2 + +/(?>.*?a)b/Is +Capture group count = 0 +Options: dotall +Last code unit = 'b' +Subject length lower bound = 2 + +/(?>.*?a)b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 2 + +/(?>^a)b/Is +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +First code unit = 'a' +Subject length lower bound = 2 + +/(?>.*?)(?<=(abcd)|(wxyz))/I +Capture group count = 2 +Max lookbehind = 4 +May match empty string +Subject length lower bound = 0 + +/(?>.*)(?<=(abcd)|(wxyz))/I +Capture group count = 2 +Max lookbehind = 4 +May match empty string +Subject length lower bound = 0 + +"(?>.*)foo"I +Capture group count = 0 +Last code unit = 'o' +Subject length lower bound = 3 + +"(?>.*?)foo"I +Capture group count = 0 +Last code unit = 'o' +Subject length lower bound = 3 + +/(?>^abc)/Im +Capture group count = 0 +Options: multiline +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + +/(?>.*abc)/Im +Capture group count = 0 +Options: multiline +Last code unit = 'c' +Subject length lower bound = 3 + +/(?:.*abc)/Im +Capture group count = 0 +Options: multiline +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + +/(?:(a)+(?C1)bb|aa(?C2)b)/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(?:(a)++(?C1)bb|aa(?C2)b)/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(?:(?>(a))(?C1)bb|aa(?C2)b)/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(?:(?1)(?C1)x|ab(?C2))((a)){0}/ + aab\=callout_capture +Callout 1: last capture = 0 +--->aab + ^^ x +Callout 1: last capture = 0 +--->aab + ^^ x +Callout 2: last capture = 0 +--->aab + ^ ^ ) + 0: ab + +/(?1)(?C1)((a)(?C2)){0}/ + aab\=callout_capture +Callout 2: last capture = 2 + 1: + 2: a +--->aab + ^^ ){0} +Callout 1: last capture = 0 +--->aab + ^^ ( + 0: a + +/(?:(a)+(?C1)bb|aa(?C2)b)++/ + aab\=callout_capture +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + aab\=callout_capture,ovector=1 +Callout 1: last capture = 1 + 1: a +--->aab + ^ ^ b +Callout 1: last capture = 1 + 1: a +--->aab + ^^ b +Callout 2: last capture = 0 +--->aab + ^ ^ b + 0: aab + +/(ab)x|ab/ + ab\=ovector=0 + 0: ab + ab\=ovector=1 + 0: ab + +/(?<=123)(*MARK:xx)abc/mark + xxxx123a\=ph +Partial match, mark=xx: a + xxxx123a\=ps +Partial match, mark=xx: a + +/123\Kabc/startchar + xxxx123a\=ph +Partial match: 123a + xxxx123a\=ps +Partial match: 123a + +/^(?(?=a)aa|bb)/auto_callout + bb +--->bb + +0 ^ ^ + +1 ^ (? + +3 ^ (?= + +6 ^ a ++11 ^ b ++12 ^^ b ++13 ^ ^ ) ++14 ^ ^ End of pattern + 0: bb + +/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/ + bb +--->bb + 1 ^ ^ + 2 ^ (? + 99 ^ (?= + 3 ^ a + 8 ^ b + 9 ^^ b + 10 ^ ^ ) + 11 ^ ^ End of pattern + 0: bb + +# Perl seems to have a bug with this one. + +/aaaaa(*COMMIT)(*PRUNE)b|a+c/ + aaaaaac + 0: aaaac + +# Here are some that Perl treats differently because of the way it handles +# backtracking verbs. + +/(?!a(*COMMIT)b)ac|ad/ + ac + 0: ac + ad + 0: ad + +/^(?!a(*THEN)b|ac)../ + ad + 0: ad +\= Expect no match + ac +No match + +/^(?=a(*THEN)b|ac)/ + ac + 0: + +/\A.*?(?:a|b(*THEN)c)/ + ba + 0: ba + +/\A.*?(?:a|b(*THEN)c)++/ + ba + 0: ba + +/\A.*?(?:a|b(*THEN)c|d)/ + ba + 0: ba + +/(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/ + aac + 0: aac + +/\A.*?(a|b(*THEN)c)/ + ba + 0: ba + 1: a + +/^(A(*THEN)B|A(*THEN)D)/ + AD + 0: AD + 1: AD + +/(?!b(*THEN)a)bn|bnn/ + bnn + 0: bn + +/(?(?=b(*SKIP)a)bn|bnn)/ + bnn + 0: bnn + +/(?=b(*THEN)a|)bn|bnn/ + bnn + 0: bn + +# This test causes a segfault with Perl 5.18.0 + +/^(?=(a)){0}b(?1)/ + backgammon + 0: ba + +/(?|(?f)|(?b))/I,dupnames +Capture group count = 1 +Named capture groups: + n 1 +Options: dupnames +Starting code units: b f +Subject length lower bound = 1 + +/(?abc)(?z)\k()/IB,dupnames +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + CBra 2 + z + Ket + \k2 + CBra 3 + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 3 +Max back reference = 2 +Named capture groups: + a 1 + a 2 +Options: dupnames +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 5 + +/a*[bcd]/B +------------------------------------------------------------------ + Bra + a*+ + [b-d] + Ket + End +------------------------------------------------------------------ + +/[bcd]*a/B +------------------------------------------------------------------ + Bra + [b-d]*+ + a + Ket + End +------------------------------------------------------------------ + +# A complete set of tests for auto-possessification of character types, but +# omitting \C because it might be disabled (it has its own tests). + +/\D+\D \D+\d \D+\S \D+\s \D+\W \D+\w \D+. \D+\R \D+\H \D+\h \D+\V \D+\v \D+\Z \D+\z \D+$/Bx +------------------------------------------------------------------ + Bra + \D+ + \D + \D++ + \d + \D+ + \S + \D+ + \s + \D+ + \W + \D+ + \w + \D+ + Any + \D+ + \R + \D+ + \H + \D+ + \h + \D+ + \V + \D+ + \v + \D+ + \Z + \D++ + \z + \D+ + $ + Ket + End +------------------------------------------------------------------ + +/\d+\D \d+\d \d+\S \d+\s \d+\W \d+\w \d+. \d+\R \d+\H \d+\h \d+\V \d+\v \d+\Z \d+\z \d+$/Bx +------------------------------------------------------------------ + Bra + \d++ + \D + \d+ + \d + \d+ + \S + \d++ + \s + \d++ + \W + \d+ + \w + \d+ + Any + \d++ + \R + \d+ + \H + \d++ + \h + \d+ + \V + \d++ + \v + \d++ + \Z + \d++ + \z + \d++ + $ + Ket + End +------------------------------------------------------------------ + +/\S+\D \S+\d \S+\S \S+\s \S+\W \S+\w \S+. \S+\R \S+\H \S+\h \S+\V \S+\v \S+\Z \S+\z \S+$/Bx +------------------------------------------------------------------ + Bra + \S+ + \D + \S+ + \d + \S+ + \S + \S++ + \s + \S+ + \W + \S+ + \w + \S+ + Any + \S++ + \R + \S+ + \H + \S++ + \h + \S+ + \V + \S++ + \v + \S++ + \Z + \S++ + \z + \S++ + $ + Ket + End +------------------------------------------------------------------ + +/\s+\D \s+\d \s+\S \s+\s \s+\W \s+\w \s+. \s+\R \s+\H \s+\h \s+\V \s+\v \s+\Z \s+\z \s+$/Bx +------------------------------------------------------------------ + Bra + \s+ + \D + \s++ + \d + \s++ + \S + \s+ + \s + \s+ + \W + \s++ + \w + \s+ + Any + \s+ + \R + \s+ + \H + \s+ + \h + \s+ + \V + \s+ + \v + \s+ + \Z + \s++ + \z + \s+ + $ + Ket + End +------------------------------------------------------------------ + +/\W+\D \W+\d \W+\S \W+\s \W+\W \W+\w \W+. \W+\R \W+\H \W+\h \W+\V \W+\v \W+\Z \W+\z \W+$/Bx +------------------------------------------------------------------ + Bra + \W+ + \D + \W++ + \d + \W+ + \S + \W+ + \s + \W+ + \W + \W++ + \w + \W+ + Any + \W+ + \R + \W+ + \H + \W+ + \h + \W+ + \V + \W+ + \v + \W+ + \Z + \W++ + \z + \W+ + $ + Ket + End +------------------------------------------------------------------ + +/\w+\D \w+\d \w+\S \w+\s \w+\W \w+\w \w+. \w+\R \w+\H \w+\h \w+\V \w+\v \w+\Z \w+\z \w+$/Bx +------------------------------------------------------------------ + Bra + \w+ + \D + \w+ + \d + \w+ + \S + \w++ + \s + \w++ + \W + \w+ + \w + \w+ + Any + \w++ + \R + \w+ + \H + \w++ + \h + \w+ + \V + \w++ + \v + \w++ + \Z + \w++ + \z + \w++ + $ + Ket + End +------------------------------------------------------------------ + +/\R+\D \R+\d \R+\S \R+\s \R+\W \R+\w \R+. \R+\R \R+\H \R+\h \R+\V \R+\v \R+\Z \R+\z \R+$/Bx +------------------------------------------------------------------ + Bra + \R+ + \D + \R++ + \d + \R+ + \S + \R++ + \s + \R+ + \W + \R++ + \w + \R++ + Any + \R+ + \R + \R+ + \H + \R++ + \h + \R+ + \V + \R+ + \v + \R+ + \Z + \R++ + \z + \R+ + $ + Ket + End +------------------------------------------------------------------ + +/\H+\D \H+\d \H+\S \H+\s \H+\W \H+\w \H+. \H+\R \H+\H \H+\h \H+\V \H+\v \H+\Z \H+\z \H+$/Bx +------------------------------------------------------------------ + Bra + \H+ + \D + \H+ + \d + \H+ + \S + \H+ + \s + \H+ + \W + \H+ + \w + \H+ + Any + \H+ + \R + \H+ + \H + \H++ + \h + \H+ + \V + \H+ + \v + \H+ + \Z + \H++ + \z + \H+ + $ + Ket + End +------------------------------------------------------------------ + +/\h+\D \h+\d \h+\S \h+\s \h+\W \h+\w \h+. \h+\R \h+\H \h+\h \h+\V \h+\v \h+\Z \h+\z \h+$/Bx +------------------------------------------------------------------ + Bra + \h+ + \D + \h++ + \d + \h++ + \S + \h+ + \s + \h+ + \W + \h++ + \w + \h+ + Any + \h++ + \R + \h++ + \H + \h+ + \h + \h+ + \V + \h++ + \v + \h+ + \Z + \h++ + \z + \h+ + $ + Ket + End +------------------------------------------------------------------ + +/\V+\D \V+\d \V+\S \V+\s \V+\W \V+\w \V+. \V+\R \V+\H \V+\h \V+\V \V+\v \V+\Z \V+\z \V+$/Bx +------------------------------------------------------------------ + Bra + \V+ + \D + \V+ + \d + \V+ + \S + \V+ + \s + \V+ + \W + \V+ + \w + \V+ + Any + \V++ + \R + \V+ + \H + \V+ + \h + \V+ + \V + \V++ + \v + \V+ + \Z + \V++ + \z + \V+ + $ + Ket + End +------------------------------------------------------------------ + +/\v+\D \v+\d \v+\S \v+\s \v+\W \v+\w \v+. \v+\R \v+\H \v+\h \v+\V \v+\v \v+\Z \v+\z \v+$/Bx +------------------------------------------------------------------ + Bra + \v+ + \D + \v++ + \d + \v++ + \S + \v+ + \s + \v+ + \W + \v++ + \w + \v+ + Any + \v+ + \R + \v+ + \H + \v++ + \h + \v++ + \V + \v+ + \v + \v+ + \Z + \v++ + \z + \v+ + $ + Ket + End +------------------------------------------------------------------ + +/ a+\D a+\d a+\S a+\s a+\W a+\w a+. a+\R a+\H a+\h a+\V a+\v a+\Z a+\z a+$/Bx +------------------------------------------------------------------ + Bra + a+ + \D + a++ + \d + a+ + \S + a++ + \s + a++ + \W + a+ + \w + a+ + Any + a++ + \R + a+ + \H + a++ + \h + a+ + \V + a++ + \v + a++ + \Z + a++ + \z + a++ + $ + Ket + End +------------------------------------------------------------------ + +/\n+\D \n+\d \n+\S \n+\s \n+\W \n+\w \n+. \n+\R \n+\H \n+\h \n+\V \n+\v \n+\Z \n+\z \n+$/Bx +------------------------------------------------------------------ + Bra + \x0a+ + \D + \x0a++ + \d + \x0a++ + \S + \x0a+ + \s + \x0a+ + \W + \x0a++ + \w + \x0a+ + Any + \x0a+ + \R + \x0a+ + \H + \x0a++ + \h + \x0a++ + \V + \x0a+ + \v + \x0a+ + \Z + \x0a++ + \z + \x0a+ + $ + Ket + End +------------------------------------------------------------------ + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bx +------------------------------------------------------------------ + Bra + Any+ + \D + Any+ + \d + Any+ + \S + Any+ + \s + Any+ + \W + Any+ + \w + Any+ + Any + Any++ + \R + Any+ + \H + Any+ + \h + Any+ + \V + Any+ + \v + Any+ + \Z + Any++ + \z + Any+ + $ + Ket + End +------------------------------------------------------------------ + +/ .+\D .+\d .+\S .+\s .+\W .+\w .+. .+\R .+\H .+\h .+\V .+\v .+\Z .+\z .+$/Bsx +------------------------------------------------------------------ + Bra + AllAny+ + \D + AllAny+ + \d + AllAny+ + \S + AllAny+ + \s + AllAny+ + \W + AllAny+ + \w + AllAny+ + AllAny + AllAny+ + \R + AllAny+ + \H + AllAny+ + \h + AllAny+ + \V + AllAny+ + \v + AllAny+ + \Z + AllAny++ + \z + AllAny+ + $ + Ket + End +------------------------------------------------------------------ + +/ \D+$ \d+$ \S+$ \s+$ \W+$ \w+$ \R+$ \H+$ \h+$ \V+$ \v+$ a+$ \n+$ .+$ .+$/Bmx +------------------------------------------------------------------ + Bra + \D+ + /m $ + \d++ + /m $ + \S++ + /m $ + \s+ + /m $ + \W+ + /m $ + \w++ + /m $ + \R+ + /m $ + \H+ + /m $ + \h+ + /m $ + \V+ + /m $ + \v+ + /m $ + a+ + /m $ + \x0a+ + /m $ + Any+ + /m $ + Any+ + /m $ + Ket + End +------------------------------------------------------------------ + +/(?=a+)a(a+)++a/B +------------------------------------------------------------------ + Bra + Assert + a++ + Ket + a + CBraPos 1 + a+ + KetRpos + a + Ket + End +------------------------------------------------------------------ + +/a+(bb|cc)a+(?:bb|cc)a+(?>bb|cc)a+(?:bb|cc)+a+(aa)a+(?:bb|aa)/B +------------------------------------------------------------------ + Bra + a++ + CBra 1 + bb + Alt + cc + Ket + a++ + Bra + bb + Alt + cc + Ket + a++ + Once + bb + Alt + cc + Ket + a++ + Bra + bb + Alt + cc + KetRmax + a+ + CBra 2 + aa + Ket + a+ + Bra + bb + Alt + aa + Ket + Ket + End +------------------------------------------------------------------ + +/a+(bb|cc)?#a+(?:bb|cc)??#a+(?:bb|cc)?+#a+(?:bb|cc)*#a+(bb|cc)?a#a+(?:aa)?/B +------------------------------------------------------------------ + Bra + a++ + Brazero + CBra 1 + bb + Alt + cc + Ket + # + a++ + Braminzero + Bra + bb + Alt + cc + Ket + # + a++ + Once + Brazero + Bra + bb + Alt + cc + Ket + Ket + # + a++ + Brazero + Bra + bb + Alt + cc + KetRmax + # + a+ + Brazero + CBra 2 + bb + Alt + cc + Ket + a# + a+ + Brazero + Bra + aa + Ket + Ket + End +------------------------------------------------------------------ + +/a+(?:bb)?a#a+(?:|||)#a+(?:|b)a#a+(?:|||)?a/B +------------------------------------------------------------------ + Bra + a+ + Brazero + Bra + bb + Ket + a# + a++ + Bra + Alt + Alt + Alt + Ket + # + a+ + Bra + Alt + b + Ket + a# + a+ + Brazero + Bra + Alt + Alt + Alt + Ket + a + Ket + End +------------------------------------------------------------------ + +/[ab]*/B +------------------------------------------------------------------ + Bra + [ab]*+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaaa + +/[ab]*?/B +------------------------------------------------------------------ + Bra + [ab]*? + Ket + End +------------------------------------------------------------------ + aaaa + 0: + +/[ab]?/B +------------------------------------------------------------------ + Bra + [ab]?+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: a + +/[ab]??/B +------------------------------------------------------------------ + Bra + [ab]?? + Ket + End +------------------------------------------------------------------ + aaaa + 0: + +/[ab]+/B +------------------------------------------------------------------ + Bra + [ab]++ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaaa + +/[ab]+?/B +------------------------------------------------------------------ + Bra + [ab]+? + Ket + End +------------------------------------------------------------------ + aaaa + 0: a + +/[ab]{2,3}/B +------------------------------------------------------------------ + Bra + [ab]{2,3}+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaa + +/[ab]{2,3}?/B +------------------------------------------------------------------ + Bra + [ab]{2,3}? + Ket + End +------------------------------------------------------------------ + aaaa + 0: aa + +/[ab]{2,}/B +------------------------------------------------------------------ + Bra + [ab]{2,}+ + Ket + End +------------------------------------------------------------------ + aaaa + 0: aaaa + +/[ab]{2,}?/B +------------------------------------------------------------------ + Bra + [ab]{2,}? + Ket + End +------------------------------------------------------------------ + aaaa + 0: aa + +/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B +------------------------------------------------------------------ + Bra + \d++ + \s{0,5}+ + = + \s*+ + \S? + = + \w{0,4}+ + \W*+ + Ket + End +------------------------------------------------------------------ + +/[a-d]{5,12}[e-z0-9]*#[^a-z]+[b-y]*a[2-7]?[^0-9a-z]+/B +------------------------------------------------------------------ + Bra + [a-d]{5,12}+ + [0-9e-z]*+ + # + [^a-z]++ + [b-y]*+ + a + [2-7]?+ + [^0-9a-z]++ + Ket + End +------------------------------------------------------------------ + +/[a-z]*\s#[ \t]?\S#[a-c]*\S#[C-G]+?\d#[4-8]*\D#[4-9,]*\D#[!$]{0,5}\w#[M-Xf-l]+\W#[a-c,]?\W/B +------------------------------------------------------------------ + Bra + [a-z]*+ + \s + # + [\x09 ]?+ + \S + # + [a-c]* + \S + # + [C-G]++ + \d + # + [4-8]*+ + \D + # + [,4-9]* + \D + # + [!$]{0,5}+ + \w + # + [M-Xf-l]++ + \W + # + [,a-c]? + \W + Ket + End +------------------------------------------------------------------ + +/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/B +------------------------------------------------------------------ + Bra + a+ + Brazero + CBra 1 + aa + Alt + bb + KetRmax + c# + a* + Brazero + CBra 2 + bb + Alt + cc + KetRmax + a# + a?+ + Brazero + CBra 3 + bb + Alt + cc + KetRmax + d# + [a-f]* + Brazero + CBra 4 + g + Alt + hh + KetRmax + f + Ket + End +------------------------------------------------------------------ + +/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/B +------------------------------------------------------------------ + Bra + [a-f]*+ + Brazero + CBra 1 + g + Alt + hh + Alt + i + KetRmax + i# + [a-x]{4,} + Brazero + SCBra 2 + y{0,6} + KetRmax + y# + [a-k]++ + CBra 3 + ll + Alt + mm + KetRmax + n + Ket + End +------------------------------------------------------------------ + +/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/B +------------------------------------------------------------------ + Bra + [a-f]*+ + Once + gg + Alt + hh + KetRmax + # + [a-f]*+ + Brazero + Once + gg + Alt + hh + Ket + # + [a-f]* + Brazero + Once + gg + Alt + hh + KetRmax + a# + [a-f]*+ + Brazero + Once + gg + Alt + hh + KetRmax + h + Ket + End +------------------------------------------------------------------ + +/[a-c]*d/IB +------------------------------------------------------------------ + Bra + [a-c]*+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c d +Last code unit = 'd' +Subject length lower bound = 1 + +/[a-c]+d/IB +------------------------------------------------------------------ + Bra + [a-c]++ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c +Last code unit = 'd' +Subject length lower bound = 2 + +/[a-c]?d/IB +------------------------------------------------------------------ + Bra + [a-c]?+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c d +Last code unit = 'd' +Subject length lower bound = 1 + +/[a-c]{4,6}d/IB +------------------------------------------------------------------ + Bra + [a-c]{4,6}+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c +Last code unit = 'd' +Subject length lower bound = 5 + +/[a-c]{0,6}d/IB +------------------------------------------------------------------ + Bra + [a-c]{0,6}+ + d + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Starting code units: a b c d +Last code unit = 'd' +Subject length lower bound = 1 + +# End of special auto-possessive tests + +/^A\o{1239}B/ +Failed: error 164 at offset 8: non-octal character in \o{} (closing brace missing?) + A\123B + +/^A\oB/ +Failed: error 155 at offset 4: missing opening brace after \o + +/^A\x{zz}B/ +Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?) + +/^A\x{12Z/ +Failed: error 167 at offset 7: non-hex character in \x{} (closing brace missing?) + +/^A\x{/ +Failed: error 178 at offset 5: digits missing after \x or in \x{} or \o{} or \N{U+} + +/[ab]++/B,no_auto_possess +------------------------------------------------------------------ + Bra + [ab]++ + Ket + End +------------------------------------------------------------------ + +/[^ab]*+/B,no_auto_possess +------------------------------------------------------------------ + Bra + [^ab]*+ + Ket + End +------------------------------------------------------------------ + +/a{4}+/B,no_auto_possess +------------------------------------------------------------------ + Bra + a{4} + Ket + End +------------------------------------------------------------------ + +/a{4}+/Bi,no_auto_possess +------------------------------------------------------------------ + Bra + /i a{4} + Ket + End +------------------------------------------------------------------ + +/[a-[:digit:]]+/ +Failed: error 150 at offset 12: invalid range in character class + +/[A-[:digit:]]+/ +Failed: error 150 at offset 12: invalid range in character class + +/[a-[.xxx.]]+/ +Failed: error 150 at offset 10: invalid range in character class + +/[a-[=xxx=]]+/ +Failed: error 150 at offset 10: invalid range in character class + +/[a-[!xxx!]]+/ +Failed: error 108 at offset 3: range out of order in character class + +/[A-[!xxx!]]+/ + A]]] + 0: A]]] + +/[a-\d]+/ +Failed: error 150 at offset 5: invalid range in character class + +/(?<0abc>xx)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?&1abc)xx(?<1abc>y)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?xx)/ +Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) + +/(?'0abc'xx)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?P<0abc>xx)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/\k<5ghj>/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/\k'5ghj'/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/\k{2fgh}/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?P=8yuki)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/\g{4df}/ +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/(?&1abc)xx(?<1abc>y)/ +Failed: error 144 at offset 3: subpattern name must start with a non-digit + +/(?P>1abc)xx(?<1abc>y)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/\g'3gh'/ +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/\g<5fg>/ +Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/(?(<4gh>)abc)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/(?('4gh')abc)/ +Failed: error 144 at offset 4: subpattern name must start with a non-digit + +/(?(4gh)abc)/ +Failed: error 124 at offset 4: missing closing parenthesis for condition + +/(?(R&6yh)abc)/ +Failed: error 144 at offset 5: subpattern name must start with a non-digit + +/(((a\2)|(a*)\g<-1>))*a?/B +------------------------------------------------------------------ + Bra + Brazero + SCBra 1 + CBra 2 + CBra 3 + a + \2 + Ket + Alt + CBra 4 + a* + Ket + Recurse + Ket + KetRmax + a?+ + Ket + End +------------------------------------------------------------------ + +# Test the ugly "start or end of word" compatibility syntax. + +/[[:<:]]red[[:>:]]/B +------------------------------------------------------------------ + Bra + \b + Assert + \w + Ket + red + \b + Assert back + Reverse + \w + Ket + Ket + End +------------------------------------------------------------------ + little red riding hood + 0: red + a /red/ thing + 0: red + red is a colour + 0: red + put it all on red + 0: red +\= Expect no match + no reduction +No match + Alfred Winifred +No match + +/[[:<:]]+red/B +------------------------------------------------------------------ + Bra + \b + Assert + \w + Ket + Brazero + Assert + \w + Ket + red + Ket + End +------------------------------------------------------------------ + little red riding hood + 0: red + red is a colour + 0: red +\= Expect no match + Alfred +No match + +/[a[:<:]] should give error/ +Failed: error 130 at offset 7: unknown POSIX class name + +/(?=ab\K)/aftertext,allow_lookaround_bsk + abcd\=startchar +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0+ abcd + +/abcd/newline=lf,firstline +\= Expect no match + xx\nxabcd +No match + +# Test stack guard external calls. + +/(((a)))/stackguard=1 +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) + +/(((a)))/stackguard=2 +Failed: error 133 at offset 7: parentheses are too deeply nested (stack check) + +/(((a)))/stackguard=3 + +/(((((a)))))/ + +# End stack guard tests + +/^\w+(?>\s*)(?<=\w)/B +------------------------------------------------------------------ + Bra + ^ + \w+ + Once + \s*+ + Ket + Assert back + Reverse + \w + Ket + Ket + End +------------------------------------------------------------------ + +/\othing/ +Failed: error 155 at offset 2: missing opening brace after \o + +/\o{}/ +Failed: error 178 at offset 3: digits missing after \x or in \x{} or \o{} or \N{U+} + +/\o{whatever}/ +Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missing?) + +/\xthing/ +Failed: error 178 at offset 2: digits missing after \x or in \x{} or \o{} or \N{U+} + +/^A\xZ/ +Failed: error 178 at offset 4: digits missing after \x or in \x{} or \o{} or \N{U+} + +/^A\x/ +Failed: error 178 at offset 4: digits missing after \x or in \x{} or \o{} or \N{U+} + +/\x{}/ +Failed: error 178 at offset 3: digits missing after \x or in \x{} or \o{} or \N{U+} + +/\x{whatever}/ +Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?) + +/A\8B/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +/A\9B/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +# This one is here because Perl fails to match "12" for this pattern when the $ +# is present. + +/^(?(?=abc)\w{3}:|\d\d)$/ + abc: + 0: abc: + 12 + 0: 12 +\= Expect no match + 123 +No match + xyz +No match + +# Perl gets this one wrong, giving "a" as the after text for ca and failing to +# match for cd. + +/(?(?=ab)ab)/aftertext + abxxx + 0: ab + 0+ xxx + ca + 0: + 0+ ca + cd + 0: + 0+ cd + +# This should test both paths for processing OP_RECURSE. + +/(?(R)a+|(?R)b)/ + aaaabcde + 0: aaaab + aaaabcde\=ovector=100 + 0: aaaab + +/a*?b*?/ + ab + 0: + +/(*NOTEMPTY)a*?b*?/ + ab + 0: a + ba + 0: b + cb + 0: b + +/(*NOTEMPTY_ATSTART)a*?b*?/aftertext + ab + 0: a + 0+ b + cdab + 0: + 0+ dab + +/(?(VERSION>=10.0)yes|no)/I +Capture group count = 0 +Subject length lower bound = 2 + yesno + 0: yes + +/(?(VERSION>=10.04)yes|no)/ + yesno + 0: yes + +/(?(VERSION=8)yes){3}/BI,aftertext +------------------------------------------------------------------ + Bra + Cond + Cond false + yes + Ket + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + yesno + 0: + 0+ yesno + +/(?(VERSION=8)yes|no){3}/I +Capture group count = 0 +Subject length lower bound = 6 + yesnononoyes + 0: nonono +\= Expect no match + yesno +No match + +/(?:(?abc)|xyz)(?(VERSION)yes|no)/I +Capture group count = 1 +Max back reference = 1 +Named capture groups: + VERSION 1 +Starting code units: a x +Subject length lower bound = 5 + abcyes + 0: abcyes + 1: abc + xyzno + 0: xyzno +\= Expect no match + abcno +No match + xyzyes +No match + +/(?(VERSION<10)yes|no)/ +Failed: error 179 at offset 10: syntax error or number too big in (?(VERSION condition + +/(?(VERSION>10)yes|no)/ +Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION condition + +/(?(VERSION>=10.0.0)yes|no)/ +Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition + +/(?(VERSION=10.101)yes|no)/ +Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition + +# We should see the starting code unit, required code unit, and minimum length set for this regex: +/abcd/I +Capture group count = 0 +First code unit = 'a' +Last code unit = 'd' +Subject length lower bound = 4 + +# None of the following three should have the starting code unit, required code unit, and minimum length set: +/abcd/I,no_start_optimize +Capture group count = 0 +Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor + +/abcd/I,start_optimize_off +Capture group count = 0 +Optimizations: auto_possess,dotstar_anchor + +/abcd/I,optimization_none +Capture group count = 0 +Optimizations: + +/(|ab)*?d/I +Capture group count = 1 +Starting code units: a d +Last code unit = 'd' +Subject length lower bound = 1 + abd + 0: abd + 1: ab + xyd + 0: d + +/(|ab)*?d/I,no_start_optimize +Capture group count = 1 +Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor + abd + 0: abd + 1: ab + xyd + 0: d + +/\k*(?aa)(?bb)/match_unset_backref,dupnames + aabb + 0: aabb + 1: aa + 2: bb + +/(((((a)))))/parens_nest_limit=2 +Failed: error 119 at offset 3: parentheses are too deeply nested + +/abc/replace=XYZ + 123123 + 0: 123123 + 123abc123 + 1: 123XYZ123 + 123abc123abc123 + 1: 123XYZ123abc123 + 123123\=zero_terminate + 0: 123123 + 123abc123\=zero_terminate + 1: 123XYZ123 + 123abc123abc123\=zero_terminate + 1: 123XYZ123abc123 + +/abc/g,replace=XYZ + 123abc123 + 1: 123XYZ123 + 123abc123abc123 + 2: 123XYZ123XYZ123 + +/abc/replace=X$$Z + 123abc123 + 1: 123X$Z123 + +/abc/g,replace=X$$Z + 123abc123abc123 + 2: 123X$Z123X$Z123 + +/a(b)c(d)e/replace=X$1Y${2}Z + "abcde" + 1: "XbYdZ" + +/a(b)c(d)e/replace=X$1Y${2}Z,global + "abcde-abcde" + 2: "XbYdZ-XbYdZ" + +/a(?b)c(?d)e/replace=X$ONE+${TWO}Z + "abcde" + 1: "Xb+dZ" + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z + "abcde-abcde-" + 2: "Xb+dZ-Xb+dZ-" + +/abc/replace=a$++ + 123abc +Failed: error -35 at offset 2 in replacement: invalid replacement string + +/abc/replace=a$bad + 123abc +Failed: error -49 at offset 5 in replacement: unknown substring + +/abc/replace=a${A234567890123456789_123456789012}z + 123abc +Failed: error -49 at offset 36 in replacement: unknown substring + +/abc/replace=a${A23456789012345678901234567890123}z + 123abc +Failed: error -49 at offset 37 in replacement: unknown substring + +/abc/replace=a${bcd + 123abc +Failed: error -58 at offset 6 in replacement: expected closing curly bracket in replacement string + +/abc/replace=a${b+d}z + 123abc +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + +/abc/replace=[10]XYZ + 123abc123 + 1: 123XYZ123 + +/abc/replace=[9]XYZ + 123abc123 +Failed: error -48: no more memory + +/abc/replace=xyz + 1abc2\=partial_hard +Failed: error -34: bad option value + +/abc/replace=xyz + 123abc456 + 1: 123xyz456 + 123abc456\=replace=pqr + 1: 123pqr456 + 123abc456abc789 + 1: 123xyz456abc789 + 123abc456abc789\=g + 2: 123xyz456xyz789 + +/(?<=abc)(|def)/g,replace=<$0> + 123abcxyzabcdef789abcpqr + 4: 123abc<>xyzabc<>789abc<>pqr + +/./replace=$0 + a + 1: a + +/(.)(.)/replace=$2+$1 + abc + 1: b+ac + +/(?.)(?.)/replace=$B+$A + abc + 1: b+ac + +/(.)(.)/g,replace=$2$1 + abcdefgh + 4: badcfehg + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + 3: pear orange strawberry + apple strudel + 1: pear strudel + fruitless + 0: fruitless + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} sauce, + apple lemon blackberry + 1: pear sauce lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + 3: + apple strudel + 1: strudel + fruitless + 0: fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry +Failed: error -35 at offset 11 in replacement: invalid replacement string + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry +Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry +Failed: error -35 at offset 8 in replacement: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry +Failed: error -35 at offset 9 in replacement: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry +Failed: error -48: no more memory + apple lemon blackberry\=substitute_overflow_length +Failed: error -48: no more memory: 23 code units are needed + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry + 3: pear orange strawberry + +/"(*:fruit" 00 "juice)apple"/hex,g,replace=${*MARK} + apple lemon blackberry + 1: fruit\x00juice lemon blackberry + +/abc/ + 123abc123\=replace=XYZ + 1: 123XYZ123 + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 +\= Expect error + 123abc123\=replace=[9]XYZ +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]XYZ +Failed: error -48: no more memory: 10 code units are needed + +/abc/ + 123abc123\=replace=XY + 1: 123XY123 + 123abc123\=replace=[9]XY + 1: 123XY123 + 123abc123\=replace=[9]XY,substitute_literal + 1: 123XY123 +\= Expect error + 123abc123\=replace=[8]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[8]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[6]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[6]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[5]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[5]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[4]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[4]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[3]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[3]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[2]XY,substitute_overflow_length +Failed: error -48: no more memory: 9 code units are needed + 123abc123\=replace=[2]XY,substitute_overflow_length,substitute_literal +Failed: error -48: no more memory: 9 code units are needed + +/abc/substitute_literal + 123abc123\=replace=XYZ + 1: 123XYZ123 + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 +\= Expect error + 123abc123\=replace=[9]XYZ +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]XYZ +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]XYZ +Failed: error -48: no more memory: 10 code units are needed + +/a(b)c/ + 123abc123\=replace=[9]x$1z +Failed: error -48: no more memory + 123abc123\=substitute_overflow_length,replace=[9]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[6]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[1]x$1z +Failed: error -48: no more memory: 10 code units are needed + 123abc123\=substitute_overflow_length,replace=[0]x$1z +Failed: error -48: no more memory: 10 code units are needed + +/a(b)c/substitute_extended + ZabcZ\=replace=>\1< + 1: Z>b\2< +Failed: error -49 at offset 3 in replacement: unknown substring + ZabcZ\=replace=>\8< +Failed: error -49 at offset 3 in replacement: unknown substring + ZabcZ\=replace=>${1}< + 1: Z>b${ 1 }< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${2}< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>${8}< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>$<1>< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>$< 1 >< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$<2>< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>$<8>< +Failed: error -49 at offset 5 in replacement: unknown substring + ZabcZ\=replace=>\g<-1>< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<0>< + 1: Z>abc\g<1>< + 1: Z>b\g< 1 >< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<2>< +Failed: error -49 at offset 6 in replacement: unknown substring + ZabcZ\=replace=>\g<8>< +Failed: error -49 at offset 6 in replacement: unknown substring + +/(*:pear)apple/substitute_extended + ZappleZ\=replace=>${*MARK}< + 1: Z>pear$<*MARK>< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZappleZ\=replace=>\g<*MARK>< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/a(?b)c/substitute_extended + ZabcZ\=replace=>${named}< + 1: Z>b${noexist}< +Failed: error -49 at offset 11 in replacement: unknown substring + ZabcZ\=replace=>${}< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${ }< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${ named }< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$< + 1: Z>b$< +Failed: error -49 at offset 11 in replacement: unknown substring + ZabcZ\=replace=>$<>< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$< >< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$< named >< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>\g< + 1: Z>b\g< +Failed: error -49 at offset 12 in replacement: unknown substring + ZabcZ\=replace=>\g<>< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g< >< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g< named >< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/a(b)c/substitute_extended + ZabcZ\=replace=>${1:+ yes : no } + 1: Z> yes Z + ZabcZ\=replace=>${1:+ \o{100} : \o{100} } + 1: Z> @ Z + ZabcZ\=replace=>${1:+ \o{Z} : no } +Failed: error -57 at offset 9 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ yes : \o{Z} } +Failed: error -57 at offset 15 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ \g<1> : no } + 1: Z> b Z + ZabcZ\=replace=>${1:+ yes : \g<1> } + 1: Z> yes Z + ZabcZ\=replace=>${1:+ \g<1 : no } +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ yes : \g<1 } +Failed: error -57 at offset 14 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>${1:+ $<1> : no } +Failed: error -49 at offset 11 in replacement: unknown substring + ZabcZ\=replace=>${1:+ yes : $<1> } + 1: Z> yes Z + ZabcZ\=replace=>${1:+ $<1 : no } +Failed: error -35 at offset 10 in replacement: invalid replacement string + ZabcZ\=replace=>${1:+ yes : $<1 } + 1: Z> yes Z + +/a(b)c/substitute_extended + ZabcZ\=replace=>${ +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>${1 +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + ZabcZ\=replace=>${1Z +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + ZabcZ\=replace=>${1; +Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string + ZabcZ\=replace=>$< +Failed: error -35 at offset 3 in replacement: invalid replacement string + ZabcZ\=replace=>$<1 +Failed: error -35 at offset 4 in replacement: invalid replacement string + ZabcZ\=replace=>$<1Z +Failed: error -35 at offset 5 in replacement: invalid replacement string + ZabcZ\=replace=>$<1; +Failed: error -35 at offset 4 in replacement: invalid replacement string + ZabcZ\=replace=>\g< +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<1 +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<1Z +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + ZabcZ\=replace=>\g<1; +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + +"((?=(?(?=(?(?=(?(?=()))))))))" + a + 0: + 1: + 2: + +"(?(?=)==)(((((((((?=)))))))))" +\= Expect no match + a +No match + +/(a)(b)|(c)/ + XcX\=ovector=2,get=1,get=2,get=3,get=4,getall +Matched, but too many substrings + 0: c + 1: +Get substring 1 failed (-55): requested value is not set +Get substring 2 failed (-54): requested value is not available +Get substring 3 failed (-54): requested value is not available +Get substring 4 failed (-49): unknown substring + 0L c + 1L + +/x(?=ab\K)/allow_lookaround_bsk + xab\=get=0 +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0G (0) + xab\=copy=0 +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0C (0) + xab\=getall +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0L + +/(?a)|(?b)/dupnames + a\=ovector=1,copy=A,get=A,get=2 +Matched, but too many substrings + 0: a +Copy substring "A" failed (-54): requested value is not available +Get substring 2 failed (-54): requested value is not available +Get substring "A" failed (-54): requested value is not available + a\=ovector=2,copy=A,get=A,get=2 + 0: a + 1: a + C a (1) A (non-unique) +Get substring 2 failed (-54): requested value is not available + G a (1) A (non-unique) + b\=ovector=2,copy=A,get=A,get=2 +Matched, but too many substrings + 0: b + 1: +Copy substring "A" failed (-55): requested value is not set +Get substring 2 failed (-54): requested value is not available +Get substring "A" failed (-55): requested value is not set + +/a(b)c(d)/ + abc\=ph,copy=0,copy=1,getall +Partial match: abc + 0C abc (3) +Copy substring 1 failed (-2): partial match +get substring list failed (-2): partial match + +/^abc/info +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/^abc/info,no_dotstar_anchor +Capture group count = 0 +Compile options: no_dotstar_anchor +Overall options: anchored no_dotstar_anchor +Optimizations: auto_possess,start_optimize +First code unit = 'a' +Subject length lower bound = 3 + +/^abc/info,dotstar_anchor_off +Capture group count = 0 +Compile options: +Overall options: anchored +Optimizations: auto_possess,start_optimize +First code unit = 'a' +Subject length lower bound = 3 + +# For comparison with the following tests, which disable automatic dotstar anchoring +/.*abc/BI +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit at start or follows newline +Last code unit = 'c' +Subject length lower bound = 3 + +/.*abc/BI,dotstar_anchor_off +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Optimizations: auto_possess,start_optimize +Last code unit = 'c' +Subject length lower bound = 3 + +/.*abc/BI,start_optimize_off +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Optimizations: auto_possess,dotstar_anchor + +/.*abc/BI,optimization_none +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Optimizations: + +/.*abc/BI,no_dotstar_anchor +------------------------------------------------------------------ + Bra + Any* + abc + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_dotstar_anchor +Optimizations: auto_possess,start_optimize +Last code unit = 'c' +Subject length lower bound = 3 + +/.*\d/info,auto_callout +Capture group count = 0 +Options: auto_callout +First code unit at start or follows newline +Subject length lower bound = 1 +\= Expect no match + aaa +--->aaa + +0 ^ .* + +2 ^ ^ \d + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d +No match + +/.*\d/info,no_dotstar_anchor,auto_callout +Capture group count = 0 +Options: auto_callout no_dotstar_anchor +Optimizations: auto_possess,start_optimize +Subject length lower bound = 1 +\= Expect no match + aaa +--->aaa + +0 ^ .* + +2 ^ ^ \d + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + +0 ^ .* + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + +0 ^ .* + +2 ^^ \d + +2 ^ \d +No match + +/.*\d/dotall,info +Capture group count = 0 +Compile options: dotall +Overall options: anchored dotall +Subject length lower bound = 1 + +/.*\d/dotall,no_dotstar_anchor,info +Capture group count = 0 +Options: dotall no_dotstar_anchor +Optimizations: auto_possess,start_optimize +Subject length lower bound = 1 + +/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info +Capture group count = 0 +Compile options: +Overall options: no_dotstar_anchor +Optimizations: auto_possess,start_optimize +Subject length lower bound = 1 + +'^(?:(a)|b)(?(1)A|B)' + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 + 0: aA + 1: a + +'^(?:(?a)|b)(?()A|B)' + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 + 0: aA + 1: a + +'^(?)(?:(?a)|b)(?()A|B)'dupnames + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 +Matched, but too many substrings + 0: aA + 1: + aA123\=ovector=3 + 0: aA + 1: + 2: a + +'^(?:(?X)|)(?:(?a)|b)\k{AA}'dupnames + aa123\=ovector=1 +Matched, but too many substrings + 0: aa + aa123\=ovector=2 +Matched, but too many substrings + 0: aa + 1: + aa123\=ovector=3 + 0: aa + 1: + 2: a + +/(?(?J)(?1(111111)11|)1|1|)(?()1)/ + +/(?(?J)(?))(?-J)\k/ + +# Quantifiers are not allowed on condition assertions, but are otherwise +# OK in conditions. + +/(?(?=0)?)+/ +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + +/(?(?=0)(?=00)?00765)/ + 00765 + 0: 00765 + +/(?(?=0)(?=00)?00765|(?!3).56)/ + 00765 + 0: 00765 + 456 + 0: 456 +\= Expect no match + 356 +No match + +'^(a)*+(\w)' + g + 0: g + 1: + 2: g + g\=ovector=1 +Matched, but too many substrings + 0: g + +'^(?:a)*+(\w)' + g + 0: g + 1: g + g\=ovector=1 +Matched, but too many substrings + 0: g + +# These two pattern showeds up compile-time bugs + +"((?2){0,1999}())?" + +/((?+1)(\1))/B +------------------------------------------------------------------ + Bra + CBra 1 + Recurse + CBra 2 + \1 + Ket + Ket + Ket + End +------------------------------------------------------------------ + +# Callouts with string arguments + +/a(?C"/ +Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument + +/a(?C"a/ +Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument + +/a(?C"a"/ +Failed: error 139 at offset 7: closing parenthesis for (?C expected + +/a(?C"a"bcde(?C"b")xyz/ +Failed: error 139 at offset 7: closing parenthesis for (?C expected + +/a(?C"a)b""c")/B +------------------------------------------------------------------ + Bra + a + CalloutStr "a)b"c" 5 13 0 + Ket + End +------------------------------------------------------------------ + +/ab(?C" any text with spaces ")cde/B +------------------------------------------------------------------ + Bra + ab + CalloutStr " any text with spaces " 6 30 1 + cde + Ket + End +------------------------------------------------------------------ + abcde +Callout (6): " any text with spaces " +--->abcde + ^ ^ c + 0: abcde + 12abcde +Callout (6): " any text with spaces " +--->12abcde + ^ ^ c + 0: abcde + +/^a(b)c(?C1)def/ + abcdef +--->abcdef + 1 ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C"AB")def/ + abcdef +Callout (10): "AB" +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C1)def/ + abcdef\=callout_capture +Callout 1: last capture = 1 + 1: b +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/^a(b)c(?C{AB})def/B +------------------------------------------------------------------ + Bra + ^ + a + CBra 1 + b + Ket + c + CalloutStr {AB} 10 14 1 + def + Ket + End +------------------------------------------------------------------ + abcdef\=callout_capture +Callout (10): {AB} last capture = 1 + 1: b +--->abcdef + ^ ^ d + 0: abcdef + 1: b + +/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info +------------------------------------------------------------------ + Bra + CalloutStr `a`b` 4 10 0 + CalloutStr 'a'b' 14 20 0 + CalloutStr "a"b" 24 30 0 + CalloutStr ^a^b^ 34 40 0 + CalloutStr %a%b% 44 50 0 + CalloutStr #a#b# 54 60 0 + CalloutStr $a$b$ 64 70 0 + CalloutStr {a}b} 74 80 0 + Ket + End +------------------------------------------------------------------ +Callout `a`b` ( +Callout 'a'b' ( +Callout "a"b" ( +Callout ^a^b^ ( +Callout %a%b% ( +Callout #a#b# ( +Callout $a$b$ ( +Callout {a}b} + +/(?:a(?C`code`)){3}/B +------------------------------------------------------------------ + Bra + Bra + a + CalloutStr `code` 8 14 4 + Ket + Bra + a + CalloutStr `code` 8 14 4 + Ket + Bra + a + CalloutStr `code` 8 14 4 + Ket + Ket + End +------------------------------------------------------------------ + +/^(?(?C25)(?=abc)abcd|xyz)/B,callout_info +------------------------------------------------------------------ + Bra + ^ + Cond + Callout 25 9 3 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ +Callout 25 (?= + abcdefg +--->abcdefg + 25 ^ (?= + 0: abcd + xyz123 +--->xyz123 + 25 ^ (?= + 0: xyz + +/^(?(?C$abc$)(?=abc)abcd|xyz)/B +------------------------------------------------------------------ + Bra + ^ + Cond + CalloutStr $abc$ 7 12 3 + Assert + abc + Ket + abcd + Alt + xyz + Ket + Ket + End +------------------------------------------------------------------ + abcdefg +Callout (7): $abc$ +--->abcdefg + ^ (?= + 0: abcd + xyz123 +Callout (7): $abc$ +--->xyz123 + ^ (?= + 0: xyz + +/^ab(?C'first')cd(?C"second")ef/ + abcdefg +Callout (7): 'first' +--->abcdefg + ^ ^ c +Callout (20): "second" +--->abcdefg + ^ ^ e + 0: abcdef + +/(?:a(?C`code`)){3}X/ + aaaXY +Callout (8): `code` +--->aaaXY + ^^ ){3} +Callout (8): `code` +--->aaaXY + ^ ^ ){3} +Callout (8): `code` +--->aaaXY + ^ ^ ){3} + 0: aaaX + +# Binary zero in callout string +# a ( ? C ' x z ' ) b +/ 61 28 3f 43 27 78 00 7a 27 29 62/hex,callout_info +Callout 'x\x00z' b + abcdefgh +Callout (5): 'x\x00z' +--->abcdefgh + ^^ b + 0: ab + +/(?(?!)^)/ + +/(?(?!)a|b)/ + bbb + 0: b +\= Expect no match + aaa +No match + +# JIT gives a different error message for the infinite recursion + +"(*NO_JIT)((?2)+)((?1)){" + abcd{ +Failed: error -52: nested recursion at the same subject position + +# Perl fails to diagnose the absence of an assertion + +"(?(?.*!.*)?)" +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) + +"X((?2)()*+){2}+"B +------------------------------------------------------------------ + Bra + X + Once + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + Ket + Ket + End +------------------------------------------------------------------ + +"X((?2)()*+){2}"B +------------------------------------------------------------------ + Bra + X + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + Ket + End +------------------------------------------------------------------ + +/(?<=\bABQ(3(?-7)))/ +Failed: error 115 at offset 15: reference to non-existent subpattern + +/(?<=\bABQ(3(?+7)))/ +Failed: error 115 at offset 15: reference to non-existent subpattern + +";(?<=()((?3))((?2)))" +Failed: error 125 at offset 1: length of lookbehind assertion is not limited + +# Perl loops on this (PCRE2 used to!) + +/(?<=\Ka)/g,aftertext,allow_lookaround_bsk + aaaaa + 0: a + 0+ aaaa + 0: a + 0+ aaa + 0: a + 0+ aa + 0: a + 0+ a + 0: a + 0+ + +/(?<=\Ka)/altglobal,aftertext,allow_lookaround_bsk + aaaaa + 0: a + 0+ aaaa + 0: a + 0+ aaa + 0: a + 0+ aa + 0: a + 0+ a + 0: a + 0+ + +/((?2){73}(?2))((?1))/info +Capture group count = 2 +May match empty string +Subject length lower bound = 0 + +/abc/ +\= Expect no match + \[9x!xxx(]{9999} +No match + +/(abc)*/ + \[abc]{5} + 0: abcabcabcabcabc + 1: abc + +/^/gm + \n\n\n + 0: + 0: + 0: + +/^/gm,alt_circumflex + \n\n\n + 0: + 0: + 0: + 0: + +/((((((((x))))))))\81/ +Failed: error 115 at offset 19: reference to non-existent subpattern + xx1 + +/((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/ + xx +Matched, but too many substrings + 0: xx + 1: x + 2: x + 3: x + 4: x + 5: x + 6: x + 7: x + 8: x + 9: x +10: x +11: x +12: x +13: x +14: x + +/\80/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +/A\8B\9C/ +Failed: error 115 at offset 2: reference to non-existent subpattern + A8B9C + +/(?x:((?'a')) # comment (with parentheses) and | vertical +(?-x:#not a comment (?'b')) # this is a comment () +(?'c')) # not a comment (?'d')/info +Capture group count = 5 +Named capture groups: + a 2 + b 3 + c 4 + d 5 +First code unit = '#' +Last code unit = ' ' +Subject length lower bound = 32 + +/(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames +Capture group count = 3 +Named capture groups: + a 1 + a 2 + b 3 +Options: dupnames +Starting code units: 2 3 +Subject length lower bound = 1 + A23B + 0: 2 + 1: + 2: 2 + 3: + B32A + 0: 3 + 1: + 2: + 3: 3 + +# These are some patterns that used to cause buffer overflows or other errors +# while compiling. + +/.((?2)(?R)|\1|$)()/B +------------------------------------------------------------------ + Bra + Any + CBra 1 + Recurse + Recurse + Alt + \1 + Alt + $ + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ + +/.((?3)(?R)()(?2)|\1|$)()/B +------------------------------------------------------------------ + Bra + Any + CBra 1 + Recurse + Recurse + CBra 2 + Ket + Recurse + Alt + \1 + Alt + $ + Ket + CBra 3 + Ket + Ket + End +------------------------------------------------------------------ + +/(\9*+(?2);\3++()2|)++{/ +Failed: error 115 at offset 2: reference to non-existent subpattern + +/\V\x85\9*+((?2)\3++()2)*:2/ +Failed: error 115 at offset 7: reference to non-existent subpattern + +/(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames + +/(((?(X)){0,2}) (?'x'((?'X')((?'X')))))/dupnames + +/(((?(R)){0,2}) (?'x'((?'X')((?'R')))))/ + +"(?J)(?'d'(?'d'\g{d}))" + +"(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" +Failed: error 125 at offset 16: length of lookbehind assertion is not limited + +/A(?'')Z/ +Failed: error 162 at offset 4: subpattern name expected + +"(?J:(?|(?'R')(\k'R')|((?'R'))))" + +/(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ +Failed: error 161 at offset 32: subpattern number is too big + +/^(?:(?(1)x|)+)+$()/B +------------------------------------------------------------------ + Bra + ^ + SBra + SCond + 1 Capture ref + x + Alt + KetRmax + KetRmax + $ + CBra 1 + Ket + Ket + End +------------------------------------------------------------------ + +/[[:>:]](?<)/ +Failed: error 162 at offset 10: subpattern name expected + +/((?x)(*:0))#(?'/ +Failed: error 162 at offset 15: subpattern name expected + +/(?C$[$)(?<]/ +Failed: error 162 at offset 10: subpattern name expected + +/(?C$)$)(?<]/ +Failed: error 162 at offset 10: subpattern name expected + +/(?(R))*+/B +------------------------------------------------------------------ + Bra + Braposzero + SBraPos + SCond + Cond recurse any + Ket + KetRpos + Ket + End +------------------------------------------------------------------ + abcd + 0: + +/((?x)(?#))#(?'/ +Failed: error 162 at offset 14: subpattern name expected + +/((?x)(?#))#(?'abc')/I +Capture group count = 2 +Named capture groups: + abc 2 +First code unit = '#' +Subject length lower bound = 1 + +/[[:\\](?<[::]/ +Failed: error 162 at offset 9: subpattern name expected + +/[[:\\](?'abc')[a:]/I +Capture group count = 1 +Named capture groups: + abc 1 +Starting code units: : [ \ +Subject length lower bound = 2 + +"[[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[:::::::::::::::::[[.\xe8Nq\xffq\xff\xe0\x2|||::Nq\xffq\xff\xe0\x6\x2|||::[[[:[::::::[[[[[::::::::[:[[[:[:::[[[[[[[[[[[[[[:::E[[[:[:[[:[:::[[:::E[[[:[:[[:'[:::::E[[[:[::::::[[[:[[[[[[[::E[[[:[::::::[[[:[[[[[[[[:[[::[::::[[:::::::[[:[[[[[[[:[[::[:[[:[~" +Failed: error 106 at offset 353: missing terminating ] for character class + +/()(?(R)0)*+/B +------------------------------------------------------------------ + Bra + CBra 1 + Ket + Braposzero + SBraPos + SCond + Cond recurse any + 0 + Ket + KetRpos + Ket + End +------------------------------------------------------------------ + +/(?R-:(?>abcd<< + 1: >>w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$)<< + +/abcd/g,replace=\$1$2\,substitute_literal + XabcdYabcdZ + 2: X\$1$2\Y\$1$2\Z + +/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended + abcDE + 1: aBcBCbcdEdeabAByzDone + +/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended + Hello between wORLD + 2: >hELLO< between >World< + +/abcd/replace=xy\kz,substitute_extended + abcd +Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string + +/a(?:(b)|(c))/substitute_extended,replace=X${1:+1:-1}X${2:+2:-2} + ab + 1: X1X-2 + ac + 1: X-1X2 + ab\=replace=${1:+$1\:$1:$2} + 1: b:b + ac\=replace=${1:+$1\:$1:$2} + 1: c + >>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal + 1: >>${1:+$1\:$1:$2}<< + +/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2} + ab + 1: XbX2:-2 + ac + 1: X1:-1Xc + +/(a)/substitute_extended,replace=>${1:+\Q$1:{}$$\E+\U$1}< + a + 1: >$1:{}$$+A< + +/X(b)Y/substitute_extended + XbY\=replace=x${1:+$1\U$1}y + 1: xbBY + XbY\=replace=\Ux${1:+$1$1}y + 1: XBBY + +/a/substitute_extended,replace=${*MARK:+a:b} + a +Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string + +/(abcd)/replace=${1:+xy\kz},substitute_extended + abcd +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + +/(abcd)/ + abcd\=replace=${1:+xy\kz},substitute_extended +Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string + +/abcd/substitute_extended,replace=>$1< + abcd +Failed: error -49 at offset 3 in replacement: unknown substring + +/abcd/substitute_extended,replace=>xxx${xyz}<<< + abcd +Failed: error -49 at offset 10 in replacement: unknown substring + +/(?J)(?:(?a)|(?b))/replace=<$A> + [a] + 1: [] + [b] + 1: [] +\= Expect error + (a)\=ovector=1 +Failed: error -54 at offset 3 in replacement: requested value is not available + +/(a)|(b)/replace=<$1> +\= Expect error + b +Failed: error -55 at offset 3 in replacement: requested value is not set + +/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 + aaBB + 1: AAbbaa..AAbBaa + +/abcd/replace=wxyz,substitute_matched + abcd + 1: wxyz + pqrs + 0: pqrs + +/abcd/g + >abcd1234abcd5678<\=replace=wxyz,substitute_matched + 2: >wxyz1234wxyz5678< + +/abc/substitute_extended,replace=>\045< + abc + 1: >%< + +/abc/substitute_extended,replace=>\45< + abc + 1: >%< + +/abc/substitute_extended,replace=>\o{45}< + abc + 1: >%< + +/abc/substitute_extended,replace=>\845< + abc +Failed: error -49 at offset 5 in replacement: unknown substring + +/a(b)(c)/substitute_extended,replace=>\1< + abc + 1: >b< + +/a(b)(c)/substitute_extended,replace=>\2< + abc + 1: >c< + +/a(b)(c)/substitute_extended,replace=>\3< + abc +Failed: error -49 at offset 3 in replacement: unknown substring + +/a(?b)c/substitute_extended + abc\=replace=>${namED_1}< + 1: >b< + +/a(?b)c/substitute_extended + abc\=replace=>${namedverylongbutperfectlylegalsoyoushouldnthaveaproblem_1}< + 1: >b< + +/abc/substitute_extended + abc\=replace=\a\b\e\f\n\r\t\v\\ + 1: \x07\x08\x1b\x0c\x0a\x0d\x09\x0b\ + +/a(b)c/ + LabcR\=replace=>$&< + 1: L>abc$`< + 1: L>L$'< + 1: L>R$_< + 1: L>LabcR +Overall options: anchored +First code unit = 'o' +Last code unit = '}' +Subject length lower bound = 65535 + +/((p(?'K/ +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator?) + +/((p(?'K/no_auto_capture +Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator?) + +/abc/replace=A$3123456789Z + abc +Failed: error -49 at offset 3 in replacement: unknown substring + +/(?a[bc]d + +0 ^ ( + +1 ^ )\Q\E* + +7 ^ ] + +8 ^^ End of pattern + 0: ] + 1: + +/\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended +------------------------------------------------------------------ + Bra + \x{8a}++ + f + Alt + ; + T? + *MARK ;.'?`(\x{ea}p + {! + [^!'-*;?Cy] + {1; + CBra 1 + \x08 + Ket + Ket + End +------------------------------------------------------------------ + +# Tests for NULL characters in comments and verb "names" and callouts + +# /A#B\x00C\x0aZ/ +/41 23 42 00 43 0a 5a/Bx,hex +------------------------------------------------------------------ + Bra + AZ + Ket + End +------------------------------------------------------------------ + +# /A+#B\x00C\x0a+/ +/41 2b 23 42 00 43 0a 2b/Bx,hex +------------------------------------------------------------------ + Bra + A++ + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}WC + Z + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}W#X\x{0}Y\x{a}C + Z + Ket + End +------------------------------------------------------------------ + +# /A(?C{X\x00Y})B/ +/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + A + CalloutStr {X\x{0}Y} 5 10 1 + B + Ket + End +------------------------------------------------------------------ + +# /A(?#X\x00Y)B/ +/41 28 3f 23 7b 00 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + AB + Ket + End +------------------------------------------------------------------ + +# Tests for leading comment in extended patterns + +/ (?-x):?/extended + +/ (?-x):?/extended + +/0b 28 3f 2d 78 29 3a/hex,extended + +/#comment +(?-x):?/extended + +/(8(*:6^\x09x\xa6l\)6!|\xd0:[^:|)\x09d\Z\d{85*m(?'(?<1!)*\W[*\xff]!!h\w]*\xbe;/alt_bsux,alt_verbnames,allow_empty_class,dollar_endonly,extended,multiline,never_utf,no_dotstar_anchor,no_start_optimize +Failed: error 162 at offset 49: subpattern name expected + +/a|(b)c/replace=>$1<,substitute_unset_empty + cat + 1: c>b$1< +Failed: error -55 at offset 3 in replacement: requested value is not set + cat\=replace=>$1<,substitute_unset_empty + 1: c>$1<,substitute_unset_empty + 1: x>b${2:-xx}< +Failed: error -49 at offset 9 in replacement: unknown substring + cat\=replace=>${2:-xx}<,substitute_unknown_unset + 1: c>xx${X:-xx}<,substitute_unknown_unset + 1: c>xx$X<,substitute_unset_empty + cat + 1: c>b$Y<,substitute_unset_empty + cat +Failed: error -49 at offset 3 in replacement: unknown substring + cat\=substitute_unknown_unset + 1: c>$2<,substitute_unset_empty + cat +Failed: error -49 at offset 3 in replacement: unknown substring + cat\=substitute_unknown_unset + 1: c>9010 + 0 ^ 0 + 0 ^ 0 + 0: + 1: 0 +\= Expect no match + abc +--->abc + 0 ^ 0 + 0 ^ 0 + 0 ^ 0 +No match + +/aaa/ +\[abc]{10000000000000000000000000000} +** Repeat count too large +\[a]{3} + 0: aaa + +/\[AB]{6000000000000000000000}/expand +** Pattern repeat count too large + +# Hex uses pattern length, not zero-terminated. This tests for overrunning +# the given length of a pattern. + +/'(*U'/hex +Failed: error 160 at offset 3: (*VERB) not recognized or malformed + +/'(*'/hex +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + +/'('/hex +Failed: error 114 at offset 1: missing closing parenthesis + +//hex + +# These tests are here because Perl never allows a back reference in a +# lookbehind. PCRE2 supports some limited cases. + +/([ab])...(?<=\1)z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?|([ab]))...(?<=\1)z/ +Failed: error 125 at offset 13: length of lookbehind assertion is not limited + +/([ab])(\1)...(?<=\2)z/ + aa11az + 0: aa11az + 1: a + 2: a + +/(a\2)(b\1)(?<=\2)/ +Failed: error 125 at offset 10: length of lookbehind assertion is not limited + +/(?[ab])...(?<=\k'A')z/ + a11az + 0: a11az + 1: a + b11bz + 0: b11bz + 1: b +\= Expect no match + b11az +No match + +/(?[ab])...(?<=\k'A')(?)z/dupnames +Failed: error 125 at offset 13: length of lookbehind assertion is not limited + +# Perl does not support \g+n + +/((\g+1X)?([ab]))+/ + aaXbbXa + 0: aaXbbXa + 1: bXa + 2: bX + 3: a + +/ab(?C1)c/auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ End of pattern + 0: abc + +/'ab(?C1)c'/hex,auto_callout + abc +--->abc + +0 ^ a + +1 ^^ b + 1 ^ ^ c + +8 ^ ^ End of pattern + 0: abc + +# Perl accepts these, but gives a warning. We can't warn, so give an error. + +/[a-[:digit:]]+/ +Failed: error 150 at offset 12: invalid range in character class + a-a9-a + +/[A-[:digit:]]+/ +Failed: error 150 at offset 12: invalid range in character class + A-A9-A + +/[a-\d]+/ +Failed: error 150 at offset 5: invalid range in character class + a-a9-a + +/(?abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + Cond recurse any + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?abc)(?(R)xyz)/B +------------------------------------------------------------------ + Bra + CBra 1 + abc + Ket + Cond + 1 Capture ref + xyz + Ket + Ket + End +------------------------------------------------------------------ + +/(?=.*[A-Z])/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/()(?<=(?0))/ +Failed: error 125 at offset 2: length of lookbehind assertion is not limited + +/(?*?\g'0/use_length +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/.>*?\g'0/ +Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number + +/{„Í„ÍÍ„Í{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́ÍÍ„Í{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout + +// +\=get=i00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +** Group name in "get" is too long +\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012 +** Too many characters in named "get" modifiers + +"(?(?C))" +Failed: error 128 at offset 6: atomic assertion expected after (?( or (?(?C) + +/(?(?(?(?(?(?))))))/ +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) + +/(?<=(?1))((?s))/anchored + +/(*:ab)*/ +Failed: error 109 at offset 6: quantifier does not follow a repeatable item + +%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout + +/./newline=crlf + \=ph +No match + +/(\x0e00\000000\xc)/replace=\P,substitute_extended + \x0e00\000000\xc +Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement string + +//replace=0 + \=offset=7 +Failed: error -33: bad offset value + +/(?<=\G.)/g,replace=+ + abc + 3: a+b+c+ + +".+\QX\E+"B,no_auto_possess +------------------------------------------------------------------ + Bra + Any+ + X+ + Ket + End +------------------------------------------------------------------ + +".+\QX\E+"B,auto_callout,no_auto_possess +------------------------------------------------------------------ + Bra + Callout 255 0 4 + Any+ + Callout 255 4 4 + X+ + Callout 255 8 0 + Ket + End +------------------------------------------------------------------ + +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I +Capture group count = 108 +Max back reference = 22 +Contains explicit CR or LF match +Subject length lower bound = 1 + +# This checks that new code for handling groups that may match an empty string +# works on a very large number of alternatives. This pattern used to provoke a +# complaint that it was too complicated. + +/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand + +# This one used to compile rubbish instead of a compile error, and then +# behave unpredictably at match time. + +/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ +Failed: error 128 at offset 63: atomic assertion expected after (?( or (?(?C) + .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X + +/[:[:alnum:]-[[a:lnum:]+/ +Failed: error 150 at offset 12: invalid range in character class + +/((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ +Failed: error 128 at offset 11: atomic assertion expected after (?( or (?(?C) + +/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ + +/abcd/auto_callout + abcd\=callout_error=255:2 +--->abcd + +0 ^ a + +1 ^^ b +Failed: error -37: callout error code + +/()(\g+65534)/ +Failed: error 161 at offset 11: subpattern number is too big + +/()(\g+65533)/ +Failed: error 115 at offset 10: reference to non-existent subpattern + +/Á\x00\x00\x00š(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00ÿÿ\x00š(\1{50779}?)J\w2/I +Capture group count = 2 +Max back reference = 2 +First code unit = \xc1 +Last code unit = '2' +Subject length lower bound = 65535 + +/(a)(b)\2\1\1\1\1/I +Capture group count = 2 +Max back reference = 2 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 7 + +/(?a)(?b)\g{b}\g{a}\g{a}\g{a}\g{a}(?xx)(?zz)/I,dupnames +Capture group count = 4 +Max back reference = 4 +Named capture groups: + a 1 + a 3 + b 2 + b 4 +Options: dupnames +First code unit = 'a' +Last code unit = 'z' +Subject length lower bound = 11 + +// + \=ovector=7777777777 +** Invalid value in "ovector=7777777777" + +# This is here because Perl matches, even though a COMMIT is encountered +# outside of the recursion. + +/(?1)(A(*COMMIT)|B)D/ + BAXBAD +No match + +"(?1){2}(a)"B +------------------------------------------------------------------ + Bra + Recurse + Recurse + CBra 1 + a + Ket + Ket + End +------------------------------------------------------------------ + +"(?1){2,4}(a)"B +------------------------------------------------------------------ + Bra + Recurse + Recurse + Brazero + Bra + Bra + Recurse + Ket + Brazero + Bra + Recurse + Ket + Ket + CBra 1 + a + Ket + Ket + End +------------------------------------------------------------------ + +# This test differs from Perl for the first subject. Perl ends up with +# $1 set to 'B'; PCRE2 has it unset (which I think is right). + +/^(?: +(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D) +(Z) +)+$/x + AZB + 0: AZB + 1: + 2: Z + AZBDZ + 0: AZBDZ + 1: B + 2: Z + +# The first of these, when run by Perl, gives the mark 'aa', which is wrong. + +'(?>a(*:aa))b|ac' mark + ac + 0: ac + +'(?:a(*:aa))b|ac' mark + ac + 0: ac + +/(R?){65}/ + (R?){65} + 0: + 1: + +/\[(a)]{60}/expand + aaaa +No match + +/(?abcdabcd + ^^ ( +Callout 1: last capture = 1 + 1: abcd + 2: b + 3: c +--->abcdabcd + ^ ^ ( + 0: abcdabcd + 1: abcd + 2: b + 3: c + +# Perl matches this one, but PCRE does not because (*ACCEPT) clears out any +# pending backtracks in the recursion. + +/^ (?(DEFINE) (..(*ACCEPT)|...) ) (?1)$/x +\= Expect no match + abc +No match + +# Perl gives no match for this one + +/(a(*MARK:m)(*ACCEPT)){0}(?1)/mark + abc + 0: a +MK: m + +/abc/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match +\= Expect error + xyzabc\=ph +Failed: error -34: bad option value + +/abc/ + xyzabc\=endanchored + 0: abc +\= Expect no match + xyzabcdef\=endanchored +No match +\= Expect error + xyzabc\=ps,endanchored +Failed: error -34: bad option value + +/abc(*ACCEPT)d/endanchored + xyzabc + 0: abc +\= Expect no match + xyzabcdef +No match + +/abc|bcd/endanchored + xyzabcd + 0: bcd +\= Expect no match + xyzabcdef +No match + +/a(*ACCEPT)x|aa/endanchored + aaa + 0: a + +# Check auto-anchoring when there is a group that is never obeyed at +# the start of a branch. + +/(?(DEFINE)(a))^bc/I +Capture group count = 1 +Compile options: +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 2 + +/(a){0}.*bc/sI +Capture group count = 1 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'c' +Subject length lower bound = 2 + +# This should be anchored, as the condition is always false and there is +# no alternative branch. + +/(?(VERSION>=999)yes)^bc/I +Capture group count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 2 + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I +Capture group count = 0 +Last code unit = 'c' +Subject length lower bound = 4 + +/(*LIMIT_HEAP=0)xxx/I +Capture group count = 0 +Heap limit = 0 +First code unit = 'x' +Last code unit = 'x' +Subject length lower bound = 3 + +/(*LIMIT_HEAP=123/use_length +Failed: error 160 at offset 16: (*VERB) not recognized or malformed + +/(*LIMIT_MATCH=/use_length +Failed: error 160 at offset 14: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_DEPTH=/use_length +Failed: error 160 at offset 21: (*VERB) not recognized or malformed + +/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length +Failed: error 160 at offset 34: (*VERB) not recognized or malformed + +/\d{0,3}(*:abc)(?C1)xxx/callout_info +Callout 1 x + +# ---------------------------------------------------------------------- + +# These are a whole pile of tests that touch lines of code that are not +# used by any other tests (at least when these were created). + +/^a+?x/i,no_start_optimize,no_auto_possess +\= Expect no match + aaa +No match + +/^[^a]{3,}?x/i,no_start_optimize,no_auto_possess +\= Expect no match + bbb +No match + cc +No match + +/^X\S/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\W/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\H/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h/no_start_optimize,no_auto_possess +\= Expect no match + XY +No match + +/^X\V/no_start_optimize,no_auto_possess +\= Expect no match + X\n +No match + +/^X\v/no_start_optimize,no_auto_possess +\= Expect no match + XX +No match + +/^X.+?/s,no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\R+?/no_start_optimize,no_auto_possess +\= Expect no match + XX +No match + +/^X\H+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\h+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + +/^X\V+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X\n +No match + +/^X\D+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X9 +No match + +/^X\S+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + X\n +No match + +/^X\W+?/no_start_optimize,no_auto_possess +\= Expect no match + X +No match + XX +No match + +/^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + +/(*CRLF)^X.+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\r\=ps +Partial match: XY\x0d + +/^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\r\n +No match + X\n\rY +No match + X\n\nY +No match + X\n\x{0c}Y +No match + +/(*BSR_ANYCRLF)^X\R+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\nX +No match + X\n\r\n +No match + X\n\rY +No match + X\n\nY +No match + X\n\x{0c}Y +No match + +/^X\H+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\t +No match + XYY +No match + +/^X\h+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t +No match + X\tY +No match + +/^X\V+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\v+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\D+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY9 +No match + XYY +No match + +/^X\d+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X99 +No match + X9Y +No match + +/^X\S+?Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^X\s+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X\n\n +No match + X\nY +No match + +/^X\W+?Z/no_start_optimize,no_auto_possess +\= Expect no match + X.A +No match + X++ +No match + +/^X\w+?Z/no_start_optimize,no_auto_possess +\= Expect no match + Xa. +No match + Xaa +No match + +/^X.{1,3}Z/s,no_start_optimize,no_auto_possess +\= Expect no match + Xa.bd +No match + +/^X\h+Z/no_start_optimize,no_auto_possess +\= Expect no match + X\t\t +No match + X\tY +No match + +/^X\V+Z/no_start_optimize,no_auto_possess +\= Expect no match + XY\n +No match + XYY +No match + +/^(X(*THEN)Y|AB){0}(?1)/ + ABX + 0: AB +\= Expect no match + XAB +No match + +/^(?!A(?C1)B)C/ + ABC\=callout_error=1,no_jit +No match + +/^(?!A(?C1)B)C/no_start_optimize + ABC\=callout_error=1 +--->ABC + 1 ^^ B +Failed: error -37: callout error code + +/^(?(?!A(?C1)B)C)/ + ABC\=callout_error=1 +--->ABC + 1 ^^ B +Failed: error -37: callout error code + +# ---------------------------------------------------------------------- + +/[a b c]/BxxI +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended_more +Starting code units: a b c +Subject length lower bound = 1 + +/[a b c]/BxxxI +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: extended extended_more +Starting code units: a b c +Subject length lower bound = 1 + +/[a b c]/B,extended_more +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ + +/[ a b c ]/B,extended_more +------------------------------------------------------------------ + Bra + [a-c] + Ket + End +------------------------------------------------------------------ + +/[a b](?xx: [ 12 ] (?-xx:[ 34 ]) )y z/B +------------------------------------------------------------------ + Bra + [ ab] + Bra + [12] + Bra + [ 34] + Ket + Ket + y z + Ket + End +------------------------------------------------------------------ + +# Unsetting /x also unsets /xx + +/[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B +------------------------------------------------------------------ + Bra + [ ab] + Bra + [12] + Bra + [ 34] + Ket + Ket + y z + Ket + End +------------------------------------------------------------------ + +/(a)(?-n:(b))(c)/nB +------------------------------------------------------------------ + Bra + Bra + a + Ket + Bra + CBra 1 + b + Ket + Ket + Bra + c + Ket + Ket + End +------------------------------------------------------------------ + +# ---------------------------------------------------------------------- +# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. + +/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal +** Unrecognized modifier "\bad_escape_is_literal" + +/\N{\c/IB,bad_escape_is_literal +------------------------------------------------------------------ + Bra + N{c + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Extra options: bad_escape_is_literal +First code unit = 'N' +Last code unit = 'c' +Subject length lower bound = 3 + +/[\j\x{z}\o\gAb\g]/B,bad_escape_is_literal +------------------------------------------------------------------ + Bra + [Abgjoxz{}] + Ket + End +------------------------------------------------------------------ + +/[Q-\N]/B,bad_escape_is_literal +Failed: error 171 at offset 5: \N is not supported in a class + +/[\s-_]/bad_escape_is_literal +Failed: error 150 at offset 4: invalid range in character class + +/[_-\s]/bad_escape_is_literal +Failed: error 150 at offset 5: invalid range in character class + +/[\B\R\X]/B +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\B\R\X]/B,bad_escape_is_literal +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[A-\BP-\RV-\X]/B +Failed: error 107 at offset 4: escape sequence is invalid in character class + +/[A-\BP-\RV-\X]/B,bad_escape_is_literal +Failed: error 107 at offset 4: escape sequence is invalid in character class + +# ---------------------------------------------------------------------- + +/a\b(c/literal + a\\b(c + 0: a\b(c + +/a\b(c/literal,caseless + a\\b(c + 0: a\b(c + a\\B(c + 0: a\B(c + +/a\b(c/literal,firstline + XYYa\\b(c + 0: a\b(c +\= Expect no match + X\na\\b(c +No match + +/a\b?c/literal,use_offset_limit + XXXXa\\b?c\=offset_limit=4 + 0: a\b?c +\= Expect no match + XXXXa\\b?c\=offset_limit=3 +No match + +/a\b(c/literal,anchored,endanchored + a\\b(c + 0: a\b(c +\= Expect no match + Xa\\b(c +No match + a\\b(cX +No match + Xa\\b(cX +No match + +//literal,extended +Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL + +/a\b(c/literal,auto_callout,no_start_optimize + XXXXa\\b(c +--->XXXXa\b(c + +0 ^ a + +0 ^ a + +0 ^ a + +0 ^ a + +0 ^ a + +1 ^^ \ + +2 ^ ^ b + +3 ^ ^ ( + +4 ^ ^ c + +5 ^ ^ End of pattern + 0: a\b(c + +/a\b(c/literal,auto_callout + XXXXa\\b(c +--->XXXXa\b(c + +0 ^ a + +1 ^^ \ + +2 ^ ^ b + +3 ^ ^ ( + +4 ^ ^ c + +5 ^ ^ End of pattern + 0: a\b(c + +/(*CR)abc/literal + (*CR)abc + 0: (*CR)abc + +/cat|dog/I,match_word +Capture group count = 0 +Max lookbehind = 1 +Extra options: match_word +Starting code units: c d +Subject length lower bound = 3 + the cat sat + 0: cat +\= Expect no match + caterpillar +No match + snowcat +No match + syndicate +No match + +/(cat)|dog/I,match_line,literal +Capture group count = 0 +Compile options: literal +Overall options: anchored literal +Extra options: match_line +First code unit = '(' +Subject length lower bound = 9 + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + caterpillar +No match + snowcat +No match + syndicate +No match + +# Confirm that the pcre2_set_optimize API does not have any undesired effect on literal patterns +/(cat)|dog/I,literal,auto_possess_off +Capture group count = 0 +Options: literal +Optimizations: dotstar_anchor,start_optimize +First code unit = '(' +Last code unit = 'g' +Subject length lower bound = 9 + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + +/(cat)|dog/I,literal,dotstar_anchor_off +Capture group count = 0 +Options: literal +Optimizations: auto_possess,start_optimize +First code unit = '(' +Last code unit = 'g' +Subject length lower bound = 9 + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + +/(cat)|dog/I,literal,optimization_none +Capture group count = 0 +Options: literal +Optimizations: + (cat)|dog + 0: (cat)|dog +\= Expect no match + the cat sat +No match + +# These should result in errors, since it is not permitted to use the +# PCRE2_NO_AUTO_POSSESS and PCRE2_NO_DOTSTAR_ANCHOR options on a literal pattern +/(cat)|dog/literal,no_auto_possess +Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL + +/(cat)|dog/literal,no_dotstar_anchor +Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL + +/a whole line/match_line,multiline + Rhubarb \na whole line\n custard + 0: a whole line +\= Expect no match + Not a whole line +No match + +# Perl gets this wrong, failing to capture 'b' in group 1. + +/^(b+|a){1,2}?bc/ + bbc + 0: bbc + 1: b + +# And again here, for the "babc" subject string. + +/^(b*|ba){1,2}?bc/ + babc + 0: babc + 1: ba + bbabc + 0: bbabc + 1: ba + bababc + 0: bababc + 1: ba +\= Expect no match + bababbc +No match + babababc +No match + +/[[:digit:]-a]/ +Failed: error 150 at offset 11: invalid range in character class + +/[[:digit:]-[:print:]]/ +Failed: error 150 at offset 11: invalid range in character class + +/[\d-a]/ +Failed: error 150 at offset 4: invalid range in character class + +/[\H-z]/ +Failed: error 150 at offset 4: invalid range in character class + +/[\d-[:print:]]/ +Failed: error 150 at offset 4: invalid range in character class + +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I +Capture group count = 1 +Max back reference = 1 +Max lookbehind = 1 +Last code unit = 'b' +Subject length lower bound = 1 + ab + 0: b + 1: a + aaab + 0: ab + 1: a + +"(?=(a))\1?b"I +Capture group count = 1 +Max back reference = 1 +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + ab + 0: ab + 1: a + aaab + 0: ab + 1: a + +# JIT does not support callout_extra + +/(*NO_JIT)(a+)b/auto_callout,no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ ++12 ^ ^ ) ++13 ^ ^ b +Backtrack +--->aac ++12 ^^ ) ++13 ^^ b +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ ++12 ^^ ) ++13 ^^ b +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ +Backtrack +No other matching paths +New match attempt +--->aac + +9 ^ ( ++10 ^ a+ +No match + +/(*NO_JIT)a+(?C'XXX')b/no_start_optimize,no_auto_possess +\= Expect no match + aac\=callout_extra +New match attempt +Callout (15): 'XXX' +--->aac + ^ ^ b +Backtrack +Callout (15): 'XXX' +--->aac + ^^ b +Backtrack +No other matching paths +New match attempt +Callout (15): 'XXX' +--->aac + ^^ b +No match + +/\n/firstline + xyz\nabc + 0: \x0a + +/\nabc/firstline + xyz\nabc + 0: \x0aabc + +/\x{0a}abc/firstline,newline=crlf +\= Expect no match + xyz\r\nabc +No match + +/[abc]/firstline +\= Expect no match + \na +No match + +# These tests are matched in test 1 as they are Perl compatible. Here we are +# looking at what does and does not get auto-possessified. + +/(?(DEFINE)(?a?))^(?&optional_a)a$/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a? + Ket + Ket + ^ + Recurse + a + $ + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)(?a?)X)^(?&optional_a)a$/B +------------------------------------------------------------------ + Bra + Cond + Cond false + CBra 1 + a? + Ket + X + Ket + ^ + Recurse + a + $ + Ket + End +------------------------------------------------------------------ + +/^(a?)b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + CBra 1 + a? + Ket + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)+b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + SCBra 1 + a? + KetRmax + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)++b(?1)a/B +------------------------------------------------------------------ + Bra + ^ + SCBraPos 1 + a? + KetRpos + b + Recurse + a + Ket + End +------------------------------------------------------------------ + +/^(a?)+b/B +------------------------------------------------------------------ + Bra + ^ + SCBra 1 + a? + KetRmax + b + Ket + End +------------------------------------------------------------------ + +/(?=a+)a(a+)++b/B +------------------------------------------------------------------ + Bra + Assert + a++ + Ket + a + CBraPos 1 + a++ + KetRpos + b + Ket + End +------------------------------------------------------------------ + +/(?<=(?=.){4,5}x)/B +------------------------------------------------------------------ + Bra + Assert back + Reverse + Assert + Any + Ket + Assert + Any + Ket + Assert + Any + Ket + Assert + Any + Ket + Brazero + Assert + Any + Ket + x + Ket + Ket + End +------------------------------------------------------------------ + +# Perl behaves differently with these when optimization is turned off + +/a(*PRUNE:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy +No match, mark = X + +/a(*THEN:X)bc|qq/mark,no_start_optimize +\= Expect no match + axy +No match, mark = X + +/(?^x-i)AB/ +Failed: error 194 at offset 4: invalid hyphen in option setting + +/(?^-i)AB/ +Failed: error 194 at offset 3: invalid hyphen in option setting + +/(?x-i-i)/ +Failed: error 194 at offset 5: invalid hyphen in option setting + +/(?(?=^))b/I +Capture group count = 0 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + +/(?(?=^)|)b/I +Capture group count = 0 +First code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + +/(?(?=^)|^)b/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b +\= Expect no match + abc +No match + +/(?(1)^|^())/I +Capture group count = 1 +Max back reference = 1 +May match empty string +Compile options: +Overall options: anchored +Subject length lower bound = 0 + +/(?(1)^())b/I +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + +/(?(1)^())+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + 0+ c + +/(?(1)^()|^)+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b + 0+ bc +\= Expect no match + abc +No match + +/(?(1)^()|^)*b/I,aftertext +Capture group count = 1 +Max back reference = 1 +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b + 0+ bc + abc + 0: b + 0+ c + xbc + 0: b + 0+ c + +/(?(1)^())+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + 0+ c + +/(?(1)^a()|^a)+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + abc + 0: ab + 0+ c +\= Expect no match + bbc +No match + +/(?(1)^|^(a))+b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: ab + 0+ c + 1: a +\= Expect no match + bbc +No match + +/(?(1)^a()|^a)*b/I,aftertext +Capture group count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: ab + 0+ c + bbc + 0: b + 0+ bc + xbc + 0: b + 0+ c + +/a(b)c|xyz/g,allvector,replace=<$0> + abcdefabcpqr\=ovector=4 + 2: defpqr + 0: 6 9 + 1: 7 8 + 2: + 3: + abxyz\=ovector=4 + 1: ab + 0: 2 5 + 1: + 2: + 3: + abcdefxyz\=ovector=4 + 2: def + 0: 6 9 + 1: + 2: + 3: + +/a(b)c|xyz/allvector + abcdef\=ovector=4 + 0: abc + 1: b + 2: + 3: + abxyz\=ovector=4 + 0: xyz + 1: + 2: + 3: + +/a(b)c|xyz/g,replace=<$0>,substitute_callout + abcdefabcpqr + 1(2) Old 0 3 "abc" New 0 5 "" + 2(2) Old 6 9 "abc" New 8 13 "" + 2: defpqr + abxyzpqrabcxyz + 1(1) Old 2 5 "xyz" New 2 7 "" + 2(2) Old 8 11 "abc" New 10 15 "" + 3(1) Old 11 14 "xyz" New 15 20 "" + 3: abpqr + 12abc34xyz99abc55\=substitute_stop=2 + 1(2) Old 2 5 "abc" New 2 7 "" + 2(1) Old 7 10 "xyz" New 9 14 " STOPPED" + 2: 1234xyz99abc55 + 12abc34xyz99abc55\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 2 7 " SKIPPED" + 2(1) Old 7 10 "xyz" New 7 12 "" + 3(2) Old 12 15 "abc" New 14 19 "" + 3: 12abc349955 + 12abc34xyz99abc55\=substitute_skip=2 + 1(2) Old 2 5 "abc" New 2 7 "" + 2(1) Old 7 10 "xyz" New 9 14 " SKIPPED" + 3(2) Old 12 15 "abc" New 14 19 "" + 3: 1234xyz9955 + +/a(b)c|xyz/g,replace=<$0> + abcdefabcpqr + 2: defpqr + abxyzpqrabcxyz + 3: abpqr + 12abc34xyz\=substitute_stop=2 + 1(2) Old 2 5 "abc" New 2 7 "" + 2(1) Old 7 10 "xyz" New 9 14 " STOPPED" + 2: 1234xyz + 12abc34xyz\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 2 7 " SKIPPED" + 2(1) Old 7 10 "xyz" New 7 12 "" + 2: 12abc34 + +/a(b)c|xyz/replace=<$0> + abcdefabcpqr + 1: defabcpqr + 12abc34xyz\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 2 7 " SKIPPED" + 1: 12abc34xyz + 12abc34xyz\=substitute_stop=1 + 1(2) Old 2 5 "abc" New 2 7 " STOPPED" + 1: 12abc34xyz + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[1]12 + abc\=substitute_skip=1 +Failed: error -48: no more memory: 4 code units are needed + abc +Failed: error -48: no more memory: 4 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory: 4 code units are needed + abc + 1(2) Old 0 3 "abc" New 0 2 "12" +Failed: error -48: no more memory: 3 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory: 4 code units are needed + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[2]1234 + abc\=substitute_skip=1 +Failed: error -48: no more memory: 5 code units are needed + abc +Failed: error -48: no more memory: 5 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[3]1234 + abc\=substitute_skip=1 +Failed: error -48: no more memory: 5 code units are needed + abc +Failed: error -48: no more memory: 5 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[4]1234 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 4 "1234 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 4 "1234" +Failed: error -48: no more memory: 5 code units are needed + +/a(b)c/substitute_overflow_length,substitute_callout,replace=[5]1234 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 4 "1234 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 4 "1234" + 1: 1234 + +/a(b)c/substitute_callout,replace=[1]12 + abc\=substitute_skip=1 +Failed: error -48: no more memory + abc +Failed: error -48: no more memory + +/a(b)c/substitute_callout,replace=[2]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory + abc + 1(2) Old 0 3 "abc" New 0 2 "12" +Failed: error -48: no more memory + +/a(b)c/substitute_callout,replace=[3]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" +Failed: error -48: no more memory + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + +/a(b)c/substitute_callout,replace=[4]12 + abc\=substitute_skip=1 + 1(2) Old 0 3 "abc" New 0 2 "12 SKIPPED" + 1: abc + abc + 1(2) Old 0 3 "abc" New 0 2 "12" + 1: 12 + +/abc\rdef/ + abc\ndef +No match + +/abc\rdef\x{0d}xyz/escaped_cr_is_lf + abc\ndef\rxyz + 0: abc\x0adef\x0dxyz +\= Expect no match + abc\ndef\nxyz +No match + +/(?(*ACCEPT)xxx)/ +Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) + +/(?(*atomic:xx)xxx)/ +Failed: error 128 at offset 10: atomic assertion expected after (?( or (?(?C) + +/(?(*script_run:xxx)zzz)/ +Failed: error 128 at offset 14: atomic assertion expected after (?( or (?(?C) + +/foobar/ + the foobar thing\=copy_matched_subject + 0: foobar + the foobar thing\=copy_matched_subject,zero_terminate + 0: foobar + +/foobar/g + the foobar thing foobar again\=copy_matched_subject + 0: foobar + 0: foobar + +/(*:XX)^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/(*COMMIT:XX)^abc/I +Capture group count = 0 +Compile options: +Overall options: anchored +First code unit = 'a' +Subject length lower bound = 3 + +/(*ACCEPT:XX)^abc/I +Capture group count = 0 +May match empty string +Subject length lower bound = 0 + +/abc/replace=xyz + abc\=null_context + 1: xyz + +/abc/replace=xyz,substitute_callout + abc + 1(1) Old 0 3 "abc" New 0 3 "xyz" + 1: xyz +\= Expect error message + abc\=null_context +** Replacement callouts are not supported with null_context. + +/\[()]{65535}()/expand +Failed: error 197 at offset 131071: too many capturing groups (maximum 65535) + +/\[()]{65535}(?)/expand +Failed: error 197 at offset 131075: too many capturing groups (maximum 65535) + +/a(?:(*ACCEPT))??bc/ + abc + 0: abc + axy + 0: a + +/a(*ACCEPT)??bc/ + abc + 0: abc + axy + 0: a + +/a(*ACCEPT:XX)??bc/mark + abc + 0: abc + axy + 0: a +MK: XX + +/(*:\)?/ +Failed: error 109 at offset 5: quantifier does not follow a repeatable item + +/(*:\Q \E){5}/alt_verbnames +Failed: error 109 at offset 11: quantifier does not follow a repeatable item + +/(?=abc)/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 2 + +/(?|(X)|(XY))\1abc/I +Capture group count = 1 +Max back reference = 1 +First code unit = 'X' +Last code unit = 'c' +Subject length lower bound = 4 + +/(?|(a)|(bcde))(c)\2/I +Capture group count = 2 +Max back reference = 2 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(?|(a)|(bcde))(c)\1/I +Capture group count = 2 +Max back reference = 1 +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 2 + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'B'(?'A')/I,dupnames +Capture group count = 3 +Max back reference = 2 +Named capture groups: + A 1 + A 3 + B 2 +Options: dupnames +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 3 + +/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'A'(?'A')/I,dupnames +Capture group count = 3 +Max back reference = 3 +Named capture groups: + A 1 + A 3 + B 2 +Options: dupnames +Starting code units: a b +Last code unit = 'c' +Subject length lower bound = 2 + +/((a|)+)+Z/I +Capture group count = 2 +Starting code units: Z a +Last code unit = 'Z' +Subject length lower bound = 1 + +/((?=a))[abcd]/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + +/A(?:(*ACCEPT))?B/info +Capture group count = 0 +First code unit = 'A' +Subject length lower bound = 1 + +/(A(*ACCEPT)??B)C/ + ABC + 0: ABC + 1: AB + AXY + 0: A + 1: A + +/(?<=(?<=a)b)c.*/I +Capture group count = 0 +Max lookbehind = 1 +First code unit = 'c' +Subject length lower bound = 1 + abc\=ph +Partial match: c +\= Expect no match + xbc\=ph +No match + +/(?<=ab)c.*/I +Capture group count = 0 +Max lookbehind = 2 +First code unit = 'c' +Subject length lower bound = 1 + abc\=ph +Partial match: c +\= Expect no match + xbc\=ph +No match + +/(?<=a(?<=a|a)c)/I +Capture group count = 0 +Max lookbehind = 2 +May match empty string +Subject length lower bound = 0 + +/(?<=a(?<=a|ba)c)/I +Capture group count = 0 +Max lookbehind = 2 +May match empty string +Subject length lower bound = 0 + +/(?<=(?<=a)b)(?.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 +No match + +/\A(*napla:.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + 0: word1 word3 word1 word2 word3 word2 word2 word1 word3 + 1: word3 + +/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/ + word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 + 0: word1 word3 word1 word2 word3 word2 word2 word1 word3 + 1: word3 + +/(*plb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + 0: b + 1: b + 2: + 3: b + abcda\=offset=4 +No match + +/(*naplb:(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + 0: b + 1: b + 2: + 3: b + abcda\=offset=4 + 0: a + 1: + 2: a + 3: a + +/(?<*(.)..|(.)...)(\1|\2)/ + abcdb\=offset=4 + 0: b + 1: b + 2: + 3: b + abcda\=offset=4 + 0: a + 1: + 2: a + 3: a + +/(*non_atomic_positive_lookahead:ab)/B +------------------------------------------------------------------ + Bra + Non-atomic assert + ab + Ket + Ket + End +------------------------------------------------------------------ + +/(*non_atomic_positive_lookbehind:ab)/B +------------------------------------------------------------------ + Bra + Non-atomic assert back + Reverse + ab + Ket + Ket + End +------------------------------------------------------------------ + +/(*pla:ab+)/B +------------------------------------------------------------------ + Bra + Assert + a + b++ + Ket + Ket + End +------------------------------------------------------------------ + +/(*napla:ab+)/B +------------------------------------------------------------------ + Bra + Non-atomic assert + a + b+ + Ket + Ket + End +------------------------------------------------------------------ + +/(*napla:)+/ + +/(*naplb:)+/ + +/(*napla:^x|^y)/I +Capture group count = 0 +May match empty string +Compile options: +Overall options: anchored +Starting code units: x y +Subject length lower bound = 1 + +/(*napla:abc|abd)/I +Capture group count = 0 +May match empty string +First code unit = 'a' +Subject length lower bound = 1 + +/(*napla:a|(.)(*ACCEPT)zz)\1../ + abcd + 0: abc + 1: a + +/(*napla:a(*ACCEPT)zz|(.))\1../ + abcd + 0: bcd + 1: b + +/(*napla:a|(*COMMIT)(.))\1\1/ + aabc + 0: aa + 1: a +\= Expect no match + abbc +No match + +/(*napla:a|(.))\1\1/ + aabc + 0: aa + 1: a + abbc + 0: bb + 1: b + +/(*naplb:ab?c|PQ).../g + abcdefgacxyzPQR123 + 0: def + 0: xyz + 0: R12 + +# ---- + +# Expect error (recursion => not fixed length) +/(\2)((?=(?<=\1)))/ +Failed: error 125 at offset 8: length of lookbehind assertion is not limited + +/c*+(?<=[bc])/ + abc\=ph +Partial match: c + ab\=ph +Partial match: + abc\=ps + 0: c + ab\=ps + 0: + +/c++(?<=[bc])/ + abc\=ph +Partial match: c + ab\=ph +Partial match: + +/(?<=(?=.(?<=x)))/ + abx + 0: + ab\=ph +Partial match: + bxyz + 0: + xyz + 0: + +/\z/ + abc\=ph +Partial match: + abc\=ps + 0: + +/\Z/ + abc\=ph +Partial match: + abc\=ps + 0: + abc\n\=ph +Partial match: \x0a + abc\n\=ps + 0: + +/(?![ab]).*/ + ab\=ph +Partial match: + +/c*+/ + ab\=ph,offset=2 +Partial match: + +/\A\s*(a|(?:[^`]{28500}){4})/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + a + 0: a + 1: a + +/\A\s*((?:[^`]{28500}){4})/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 65535 + +/\A\s*((?:[^`]{28500}){4}|a)/I +Capture group count = 1 +Max lookbehind = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 1 + a + 0: a + 1: a + +/(?a)(?()b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Cond + 1 Capture ref + b + Ket + CBra 2 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(1)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + 1 Capture ref + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(R1)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond recurse 1 + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(VERSION=10.3)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/[aA]b[cC]/IB +------------------------------------------------------------------ + Bra + /i a + b + /i c + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'a' (caseless) +Last code unit = 'c' (caseless) +Subject length lower bound = 3 + +/[cc]abcd/I +Capture group count = 0 +First code unit = 'c' +Last code unit = 'd' +Subject length lower bound = 5 + +/[Cc]abcd/I +Capture group count = 0 +First code unit = 'C' (caseless) +Last code unit = 'd' +Subject length lower bound = 5 + +/[c]abcd/I +Capture group count = 0 +First code unit = 'c' +Last code unit = 'd' +Subject length lower bound = 5 + +/(?:c|C)abcd/I +Capture group count = 0 +First code unit = 'C' (caseless) +Last code unit = 'd' +Subject length lower bound = 5 + +/(a)?a/I +Capture group count = 1 +First code unit = 'a' +Subject length lower bound = 1 + manm + 0: a + +/^(?|(\*)(*napla:\S*_(\2?+.+))|(\w)(?=\S*_(\2?+\1)))+_\2$/ + *abc_12345abc + 0: *abc_12345abc + 1: c + 2: 12345abc + +/^(?|(\*)(*napla:\S*_(\3?+.+))|(\w)(?=\S*_((\2?+\1))))+_\2$/ + *abc_12345abc + 0: *abc_12345abc + 1: c + 2: 12345abc + 3: 12345abc + +/^((\1+)(?C)|\d)+133X$/ + 111133X\=callout_capture +Callout 0: last capture = 2 + 1: 1 + 2: 111 +--->111133X + ^ ^ | +Callout 0: last capture = 2 + 1: 3 + 2: 3 +--->111133X + ^ ^ | +Callout 0: last capture = 2 + 1: 1 + 2: 11 +--->111133X + ^ ^ | +Callout 0: last capture = 2 + 1: 3 + 2: 3 +--->111133X + ^ ^ | + 0: 111133X + 1: 11 + 2: 11 + +/abc/replace=xyz,substitute_replacement_only + 123abc456 + 1: xyz + +/a(?b)c(?d)e/g,replace=X$ONE+${TWO}Z,substitute_replacement_only + "abcde-abcde-" + 2: Xb+dZXb+dZ + +/a(b)c|xyz/g,replace=<$0>,substitute_callout,substitute_replacement_only + abcdefabcpqr + 1(2) Old 0 3 "abc" New 0 5 "" + 2(2) Old 6 9 "abc" New 5 10 "" + 2: + abxyzpqrabcxyz + 1(1) Old 2 5 "xyz" New 0 5 "" + 2(2) Old 8 11 "abc" New 5 10 "" + 3(1) Old 11 14 "xyz" New 10 15 "" + 3: + 12abc34xyz99abc55\=substitute_stop=2 + 1(2) Old 2 5 "abc" New 0 5 "" + 2(1) Old 7 10 "xyz" New 5 10 " STOPPED" + 2: + 12abc34xyz99abc55\=substitute_skip=1 + 1(2) Old 2 5 "abc" New 0 5 " SKIPPED" + 2(1) Old 7 10 "xyz" New 0 5 "" + 3(2) Old 12 15 "abc" New 5 10 "" + 3: + 12abc34xyz99abc55\=substitute_skip=2 + 1(2) Old 2 5 "abc" New 0 5 "" + 2(1) Old 7 10 "xyz" New 5 10 " SKIPPED" + 3(2) Old 12 15 "abc" New 5 10 "" + 3: + +/a(..)d/replace=>$1<,substitute_matched + xyzabcdxyzabcdxyz + 1: xyz>bcbc$1<,substitute_matched + xyzabcdxyzabcdxyz + 2: xyz>bcbcbcbc$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 3: xyz><>bcbc$1<,substitute_matched + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 1: xyz>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 1: xyz>$1< + xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty + 3: xyz><>bcbc" 00 "<).."/hex,mark,no_start_optimize + AB + 0: AB +MK: >\x00< + A\=ph +Partial match, mark=>\x00<: A +\= Expect no match + A +No match, mark = >\x00< + +/"(*MARK:>" 00 "<).(?C1)."/hex,mark,no_start_optimize + AB +--->AB + 1 ^^ . +Latest Mark: >\x00< + 0: AB +MK: >\x00< + +/(?(VERSION=0.0/ +Failed: error 179 at offset 14: syntax error or number too big in (?(VERSION condition + +# Perl has made \K in lookarounds an error. PCRE2 now rejects as well, unless +# explicitly authorized. + +/(?=a\Kb)ab/ +Failed: error 199 at offset 10: \K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) + +/(?=a\Kb)ab/allow_lookaround_bsk + ab + 0: b + +/(?!a\Kb)ac/ +Failed: error 199 at offset 10: \K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) + +/(?!a\Kb)ac/allow_lookaround_bsk + ac + 0: ac + +/^abc(?<=b\Kc)d/ +Failed: error 199 at offset 14: \K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) + +/^abc(?<=b\Kc)d/allow_lookaround_bsk + abcd + 0: cd + +/^abc(?X<\=null_replacement +Failed: error -51: NULL argument passed with non-zero length + +/X+/replace=[20] + >XX<\=null_replacement + 1: >< + +# --------- + +/[Aa]{2}/BI +------------------------------------------------------------------ + Bra + /i A{2} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +/A{2}/iBI +------------------------------------------------------------------ + Bra + /i A{2} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +/[Aa]{2,3}/BI +------------------------------------------------------------------ + Bra + /i A{2} + /i A?+ + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +First code unit = 'A' (caseless) +Last code unit = 'A' (caseless) +Subject length lower bound = 2 + aabcd + 0: aa + +-- + \[X]{-10} +** Zero or negative repeat not allowed + +# Check imposition of maximum by match_data_create(). + +/abcd/ + abcd\=ovector=65536 + 0: abcd + +# Use recurse to test \K and Mark in atomic scope. +/(?>this line\s*((?R)|)\K)/ + this line this line this line + 0: + 1: this line this line + +/(?>this line\s*((?R)|)(*MARK:A))/ + this line this line this line + 0: this line this line this line + 1: this line this line + +# Check use of NULL pattern with zero length. + +//null_pattern,use_length + abc + 0: + +//null_pattern +Failed: error 116 at offset 0: pattern passed as NULL with non-zero length + +/bad null pattern/null_pattern,use_length +Failed: error 116 at offset 0: pattern passed as NULL with non-zero length + +/bad null pattern/null_pattern +Failed: error 116 at offset 0: pattern passed as NULL with non-zero length + +# -------- Variable length lookbehinds -------- +/12345(?<=\d{1,256})X/ +Failed: error 200 at offset 5: branch too long in variable-length lookbehind assertion + +/(?<=(\d{1,256}))X/max_varlookbehind=256 + 12345XYZ + 0: X + 1: 12345 + +/12345(?<=a?bc)X/max_varlookbehind=0 +Failed: error 200 at offset 5: branch too long in variable-length lookbehind assertion + +/12345(?<=abc)X/max_varlookbehind=0 + +/(?a)|(?Pb))(?P=same))+/g,dupnames + bbbaaabaabb + 0: bbbaaaba + 1: a + 2: b + 0: bb + 1: + 2: b + +# -------- + +/ +/anchored, firstline + \x0a + 0: \x0a + +/ +/anchored,firstline,no_start_optimize + \x0a + 0: \x0a + +/ +/firstline + \x0a + 0: \x0a + abc\x0adef + 0: \x0a + +/|a(?0)/endanchored + aaaa + 0: aaaa + +/A +/extended + +/(*ACCEPT)+/B,auto_callout +------------------------------------------------------------------ + Bra + Callout 255 0 10 + SBra + *ACCEPT + KetRmax + Callout 255 10 0 + Ket + End +------------------------------------------------------------------ + +/a\z/ + a + 0: a + a\=noteol + 0: a + +# This matches a character that only exists once in the subject, sort of like a +# hypothetical "(.)(?bd + +0 ^ a? + +2 ^ (?= + +5 ^ b + +8 ^ ) + +9 ^ d ++10 ^^ End of pattern + 0: d + +/a?(?=bc|)\bd/I +Capture group count = 0 +Max lookbehind = 1 +Starting code units: a d +Last code unit = 'd' +Subject length lower bound = 1 + bd +No match + +/(?0)/ + abc\=disable_recurseloop_check,match_limit=100 +Failed: error -47: match limit exceeded + +/(a(?1)z||(?1)++)$/ + abcd\=disable_recurseloop_check + 0: + 1: + +/(((?<=123?456456|ABC)))(?<=\2)../ + ABCDEFG + 0: DE + 1: + 2: + 12345645678910 + 0: 78 + 1: + 2: + +# This test is crashing Perl 5.38.2. + +/[^\S\W]{6}/ + .abc def.. +No match + +/(*MARK:a/y_)/debug +** Unrecognized modifier 'y' in modifier string "y_)/debug" + +//i,sr +** Unrecognized modifier "sr" + +# The behaviour of these tests is different from Perl because PCRE2 doesn't +# recognize \Q or \E within a quantifier, so these examples are not treated +# as quantifiers. Subsequent processing of the string removes the escapes. + +/a{\Q1\E,2}/ + xa{1,2}x + 0: a{1,2} +\= Expect no match + xaax +No match + +/a{\E1,2}/ + xa{1,2}x + 0: a{1,2} +\= Expect no match + xaax +No match + +# -------------- + +/(?<=|b?)./B +------------------------------------------------------------------ + Bra + Assert back + Alt + VReverse + b? + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?=|b?)./B +------------------------------------------------------------------ + Bra + Assert + Alt + b?+ + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?>|b?)./B +------------------------------------------------------------------ + Bra + Once + Alt + b?+ + Ket + Any + Ket + End +------------------------------------------------------------------ + +/(?<=xy|a.b?|cd)/B +------------------------------------------------------------------ + Bra + Assert back + Reverse + xy + Alt + VReverse + a + Any + b? + Alt + Reverse + cd + Ket + Ket + End +------------------------------------------------------------------ + +# Tests for scan substring, a non Perl feature of PCRE2 + +# Parse errors first + +/(*scs:/ +Failed: error 114 at offset 6: missing closing parenthesis + +/(*scan_substring:(/ +Failed: error 114 at offset 18: missing closing parenthesis + +/(*scs:('name'/ +Failed: error 114 at offset 13: missing closing parenthesis + +/(*scs:(1)a|b)/ +Failed: error 115 at offset 7: reference to non-existent subpattern + +/(*scs:(0)a)/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/(*scan_substring:(1)a|b)/ +Failed: error 115 at offset 18: reference to non-existent subpattern + +/(*scs:()a|b)/ +Failed: error 115 at offset 8: reference to non-existent subpattern + +/(*scan_substring:()a|b)/ +Failed: error 115 at offset 19: reference to non-existent subpattern + +/()(*scs:(1)+a)/ +Failed: error 109 at offset 11: quantifier does not follow a repeatable item + +/()(*scs:(1,1,1,1,1,1,1,1,2))/ +Failed: error 115 at offset 25: reference to non-existent subpattern + +/()()(*scs:(1,2,1,2,1,2,2,'XYZ'))/ +Failed: error 115 at offset 26: reference to non-existent subpattern + +# Tests for iterating scan_substring + +/(a)(*scs:(1)b)*c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)*?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)*+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)+?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)++c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Scan substring + 1 Capture ref + b + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)??c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b)?+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b){3}c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b){3,5}?c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Braminzero + Bra + Scan substring + 1 Capture ref + b + Ket + Braminzero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(a)(*scs:(1)b){3,}+c/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Once + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Scan substring + 1 Capture ref + b + Ket + Brazero + Scan substring + 1 Capture ref + b + Ket + Ket + c + Ket + End +------------------------------------------------------------------ + +/(\w++)=(?(*scs:(1)(abc))pqr|xyz)(\w++)/ +Failed: error 128 at offset 14: atomic assertion expected after (?( or (?(?C) + +# Tests for scan_substring + +/([a-z]++)(*scs:(1)(stx)|(ne))(.)/B +------------------------------------------------------------------ + Bra + CBra 1 + [a-z]++ + Ket + Scan substring + 1 Capture ref + CBra 2 + stx + Ket + Alt + CBra 3 + ne + Ket + Ket + CBra 4 + Any + Ket + Ket + End +------------------------------------------------------------------ + ##string##next!## + 0: next! + 1: next + 2: + 3: ne + 4: ! + __aastxaa:__ + 0: stxaa: + 1: stxaa + 2: stx + 3: + 4: : + __abababab:__ +No match + +/(?[a-z]++)##(*scan_substring:('XX').*(..)$)\2/B +------------------------------------------------------------------ + Bra + CBra 1 + [a-z]++ + Ket + ## + Scan substring + 1 Capture ref + Any* + CBra 2 + Any + Any + Ket + $ + Ket + \2 + Ket + End +------------------------------------------------------------------ + ##abcd##abcd##cd## + 0: abcd##cd + 1: abcd + 2: cd + ##abcd##abcd##abcd## +No match + +/([a-z])([a-z]++)(#+)(*scs:(2)(ab.))/ + xab## +No match + yabc### + 0: yabc### + 1: y + 2: abc + 3: ### + 4: abc + zababc#### + 0: zababc#### + 1: z + 2: ababc + 3: #### + 4: aba + +/(?:(?[a-z]++)|(?[0-9]++)|$)(*scan_substring:('YYY')((?.).*\k$))/dupnames + $$abacd$$112345$$abca$$ + 0: abca + 1: abca + 2: + 3: abca + 4: a + $$abcdeaf$$1234567819$$123456781$$ + 0: 123456781 + 1: + 2: 123456781 + 3: 123456781 + 4: 1 + +/([a-zA-Z]+)(*scs:(1).*?(?[A-Z]+)(*scan_substring:('ABC').*(.)\3))#+/ + ##abABCtuTUVXz##abCDEFGxyCDEEFGhi## + 0: abCDEFGxyCDEEFGhi## + 1: abCDEFGxyCDEEFGhi + 2: CDEEFG + 3: E + ##abAABCtuTUVXXz!!abCDEFGxyCDEFGGhi## + 0: abCDEFGxyCDEFGGhi## + 1: abCDEFGxyCDEFGGhi + 2: CDEFGG + 3: G + +/([a-zA-Z]+)(*scs:(1)(xy|ab(*ACCEPT)cd))/B +------------------------------------------------------------------ + Bra + CBra 1 + [A-Za-z]+ + Ket + Scan substring + 1 Capture ref + CBra 2 + xy + Alt + ab + Close 2 + *ASSERT_ACCEPT + cd + Ket + Ket + Ket + End +------------------------------------------------------------------ + ##cdefgh##cdeabxy## + 0: abxy + 1: abxy + 2: ab + +/(?[a-zA-Z]+)(*scs:('AA')(ab(*ACCEPT)cd|xy))/B +------------------------------------------------------------------ + Bra + CBra 1 + [A-Za-z]+ + Ket + Scan substring + 1 Capture ref + CBra 2 + ab + Close 2 + *ASSERT_ACCEPT + cd + Alt + xy + Ket + Ket + Ket + End +------------------------------------------------------------------ + ##cdefgh##cdeabxy## + 0: abxy + 1: abxy + 2: ab + +/([a-z]++)##(*scs:(1)(abc))?!/ + ##xyz##abc##! + 0: abc##! + 1: abc + 2: abc + ##xyz##! + 0: xyz##! + 1: xyz + ##xyz## +No match + +/([a-z]++)##(*scs:(1)(abc))??(?(2)!|:)/ + ##abc##abc##! + 0: abc##! + 1: abc + 2: abc + ##abc##xyz##: + 0: xyz##: + 1: xyz + ##abc### +No match + +/([a-z]++)##(*scs:(1)(abc)|xyz){8}(?(2)!|:)/ + ##abc##abc##! + 0: abc##! + 1: abc + 2: abc + ##abc##xyz##: + 0: xyz##: + 1: xyz + ##nnn##! +No match + ##nnn##: +No match + +/[A-Z]{3}([A-Z]++)#(*scs:(1)(?<=BC)XY)#/ + ABCXY##AKCXY## + 0: ABCXY## + 1: XY + +/()(\w++)=(*scs:(2)(?=abc))(\w++)/ + xabcx=pqr. + 0: abcx=pqr + 1: + 2: abcx + 3: pqr + +/(\d++)(*scs:(1)\d+\z)(\w+)/ + X123XYZ + 0: 123XYZ + 1: 123 + 2: XYZ + +/(\d++)(*scs:(1)\d+\Z)(\w+)/ + X123XYZ + 0: 123XYZ + 1: 123 + 2: XYZ + +/(\d++)(*scs:(1)\d+$)(\w+)/ + X123XYZ + 0: 123XYZ + 1: 123 + 2: XYZ + +/([a-z]{2})[a-z](*scs:(1)(.*?))\2$/ + abcab + 0: abcab + 1: ab + 2: ab + abcabc + 0: bcabc + 1: bc + 2: bc + +/^(([a-z]([a-z]*+))(*scs:(2).(?=(?1)|$)\3)|#){5}/ + abcdefg#hijk#! +No match + abcdefg#hijk#lmnopqr# + 0: abcdefg#hijk#lmnopqr + 1: lmnopqr + 2: lmnopqr + 3: mnopqr + +/(*scs:(1)a)(a)|x/ + a +No match + x + 0: x + +/(*scs:()a)(?a)(?b)(?c)(?d)|x/dupnames + abcd +No match + x + 0: x + +/(*scs:(1)a)?(a)/ + b +No match + a + 0: a + 1: a + +/(*scs:(1)a)??(a)/ + b +No match + a + 0: a + 1: a + +# Custom backtrack, goes back n - 1 characters in the input (n=8) +/x(?|(*scs:(1)(?<=(.)))|()){8}/ + abcdefghx + 0: x + 1: c + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE)x)).+|(.+)/ + abcdef + 0: bcdef + 1: + 2: + 3: bcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr)x)).+|(.+)/mark + abcdef + 0: bcdef + 1: + 2: + 3: bcdef + +/(a)(b)(*scs:(2)(*scs:(1)a(*PRUNE:markstr))).+|(.+)/mark + abcdef + 0: abcdef + 1: a + 2: b +MK: markstr + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT)x)).+|(.+)/ + abcdef +No match + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr)x)).+|(.+)/mark + abcdef +No match, mark = markstr + +/(a)(b)(*scs:(2)(*scs:(1)a(*COMMIT:markstr))).+|(.+)/mark + abcdef + 0: abcdef + 1: a + 2: b +MK: markstr + +/(abc)(def)(*scs:(1)(*scs:(2)de(*SKIP)x)).+|(.+)/ + abcdefghi + 0: fghi + 1: + 2: + 3: fghi + +/(abc)(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(.+)/ + abcdefghi + 0: bcdefghi + 1: + 2: + 3: bcdefghi + +/(?<=(abc))(def)(*scs:(2)(*scs:(1)(*SKIP)x)).+|(ef.+)/ + abcdefghi + 0: efghi + 1: + 2: + 3: efghi + +/(abc)(def)(*scs:(2)(?:(*scs:(1)abc(*SKIP:notfound)x|abcd|(abc)))).+/ + abcdefghi + 0: abcdefghi + 1: abc + 2: def + 3: abc + +/(abc)(def)(*MARK:markstr)(*scs:(2)(?:(*scs:(1)abc(*SKIP:markstr)x))).+|(.+)/ + abcdefghi + 0: ghi + 1: + 2: + 3: ghi + +/^([a-z]++)(?:((?6))|((?7))|((?8))|(#))(?(DEFINE)((*scs:(1)abc(*PRUNE)d))((*scs:(1)abc(*COMMIT)e))((*scs:(1)abc(*SKIP)f)))/ + abcd# + 0: abcd + 1: abcd + 2: + abce# + 0: abce + 1: abce + 2: + 3: + abcf# + 0: abcf + 1: abcf + 2: + 3: + 4: + abc# + 0: abc# + 1: abc + 2: + 3: + 4: + 5: # + +/\b(\w++)(*scs:(1)^)/ + sausages and mash + 0: sausages + 1: sausages +\= Expect no match + !sausages and mash +No match + +/(\b\w{3,}+\b)(*scs:(1)\W*+(?:((.)\W*+(?2)\W*+\3|)|((.)\W*+(?4)\W*+\5|\W*+.\W*+))\W*+$)/ig + ipsum lorem revel level able was I ere I saw Elba + 0: level + 1: level + 2: + 3: + 4: level + 5: l + 0: ere + 1: ere + 2: + 3: + 4: ere + 5: e + +/(?:(?'A'a)|(?b))(*scs:('A')b)c/dupnames + abc + 0: bc + 1: + 2: b + +# Relative reference +/(xyz)(abc)(*scs:(-1)abc)(*scs:(-2)\1)/ + >xyzabc< + 0: xyzabc + 1: xyz + 2: abc + +/^([a-z]++)#(*scs:(1)a|ab|abc|abcd|abcde|abcdef|(abcdefg))\2/ + abcdefg#abcdefg + 0: abcdefg#abcdefg + 1: abcdefg + 2: abcdefg + +/^([a-z]++)(*scs:(1)(a+)(*THEN)b|(a+)(*THEN)c|(aa))/ + aaaax + 0: aaaax + 1: aaaax + 2: + 3: + 4: aa + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b)|(a+)(*THEN)c|(aa))/ + aaaax + 0: aaaax + 1: aaaax + 2: + 3: + 4: + 5: aa + +/^([a-z]++)(*scs:(1)((a+)(*THEN)b))?/ + aaaax + 0: aaaax + 1: aaaax + +/^([a-z]++)(*scs:(1)(abc|(a+)(*THEN)b))?/ + aaaax + 0: aaaax + 1: aaaax + +/^(?:(.){20,30}#|([a-z]++)(*scs:(1)(a+)(*THEN)b){20,30}#|(.){20,30}!)/ + aaaaaaaaaaaaaaaaaaaaaaaaab! + 0: aaaaaaaaaaaaaaaaaaaaaaaaab! + 1: + 2: + 3: + 4: b + +# List of captures + +/(?:(abc)|(?def)|ghi)(*scs:(1,'PP').(.))/B +------------------------------------------------------------------ + Bra + Bra + CBra 1 + abc + Ket + Alt + CBra 2 + def + Ket + Alt + ghi + Ket + Scan substring + 1 Capture ref + 2 Capture ref + Any + CBra 3 + Any + Ket + Ket + Ket + End +------------------------------------------------------------------ + abc + 0: abc + 1: abc + 2: + 3: b + def + 0: def + 1: + 2: def + 3: e + ghi +No match + +/(?:(?abc)|(?def)|(ghi)|(?'NN'jkl)|mno)(*scs:('MM',3,).(.))/B,dupnames +------------------------------------------------------------------ + Bra + Bra + CBra 1 + abc + Ket + Alt + CBra 2 + def + Ket + Alt + CBra 3 + ghi + Ket + Alt + CBra 4 + jkl + Ket + Alt + mno + Ket + Scan substring + Capture ref 2 + 3 Capture ref + 4 Capture ref + Any + CBra 5 + Any + Ket + Ket + Ket + End +------------------------------------------------------------------ + abc + 0: abc + 1: abc + 2: + 3: + 4: + 5: b + def + 0: def + 1: + 2: def + 3: + 4: + 5: e + ghi + 0: ghi + 1: + 2: + 3: ghi + 4: + 5: h + jkl + 0: jkl + 1: + 2: + 3: + 4: jkl + 5: k + mno +No match + +/f(?:(*scs:(+1,+2)(?<=(.)))|()){16}/ + 1234567890abcdef + 0: f + 1: 2 + 2: + 1ffffffffffffff + 0: f + 1: 1 + 2: + +/(?a)(*scan_substring:(1,'AA',1,)a)b/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Scan substring + 1 Capture ref + 1 Capture ref + 1 Capture ref + 1 Capture ref + a + Ket + b + Ket + End +------------------------------------------------------------------ + ab + 0: ab + 1: a + ac +No match + +/()()()(?<=ab(*scs:(1,2,3))cd)xyz/ + abcdxyz + 0: xyz + 1: + 2: + 3: + +/()()()(?<=ab(*ACCEPT)(*scs:(1,2,3))cd|efg)xyz/ + abxyz + 0: xyz + 1: + 2: + 3: + efgxyz + 0: xyz + 1: + 2: + 3: + +# Tests for pcre2_set_optimize() + +/abc/I,optimization_none +Capture group count = 0 +Optimizations: + +/abc/I,optimization_none,auto_possess +Capture group count = 0 +Optimizations: auto_possess + +/abc/I,optimization_none,dotstar_anchor,auto_possess +Capture group count = 0 +Optimizations: auto_possess,dotstar_anchor + +/abc/I,optimization_none,start_optimize +Capture group count = 0 +Optimizations: start_optimize +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc/I,dotstar_anchor_off,optimization_full +Capture group count = 0 +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +# If pcre2_set_optimize() is used to turn on some optimization, but at the same time, +# the compile options word turns it off... the compile options word "wins": + +/abc/I,no_auto_possess,auto_possess +Capture group count = 0 +Options: no_auto_possess +Optimizations: dotstar_anchor,start_optimize +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc/I,no_dotstar_anchor,dotstar_anchor +Capture group count = 0 +Options: no_dotstar_anchor +Optimizations: auto_possess,start_optimize +First code unit = 'a' +Last code unit = 'c' +Subject length lower bound = 3 + +/abc/I,no_start_optimize,start_optimize +Capture group count = 0 +Options: no_start_optimize +Optimizations: auto_possess,dotstar_anchor + +# -------------- + +# larger than GROUP_MAX, smaller than INT_MAX +/a\800000b/ +Failed: error 161 at offset 8: subpattern number is too big + +# coming up on INT_MAX... (used to succeed with \8 being literal 8) +/a\800000000b/ +Failed: error 161 at offset 11: subpattern number is too big + +# over INT_MAX (used to succeed with \8 being literal 8) +/a\8000000000b/ +Failed: error 161 at offset 12: subpattern number is too big + +# -------------- + +# no_bs0 + +/a\0b\x00c\00d/ + a\x{00}b\x{00}c\x{00}d + 0: a\x00b\x00c\x00d + +/a\0b/no_bs0 +Failed: error 198 at offset 3: octal digit missing after \0 (PCRE2_EXTRA_NO_BS0 is set) + +/b\x00c\00d/no_bs0 + b\x{00}c\x{00}d + 0: b\x00c\x00d + +/abc/substitute_extended + abc\=replace=a\0b\x00c\00d + 1: a\x00b\x00c\x00d + +/abc/substitute_extended,no_bs0 + abc\=replace=a\0b +Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string + abc\=replace=b\x00c\00d + 1: b\x00c\x00d + +# python_octal + +/\0-\00-\01-\012-\0123-\123-\1234/ + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + 0: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + +/\1/ +Failed: error 115 at offset 1: reference to non-existent subpattern + +/\12/ + \o{12} + 0: \x0a + +/abc/substitute_extended + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + 1: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + abc\=replace=\1 +Failed: error -49 at offset 2 in replacement: unknown substring + abc\=replace=\12 + 1: \x0a + +/\0-\00-\01-\012-\0123-\123-\1234/python_octal + \x00-\x00-\x01-\o{12}-\o{12}3-\o{123}-\o{123}4 + 0: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + +/\1/python_octal +Failed: error 115 at offset 1: reference to non-existent subpattern + +/\12/python_octal +Failed: error 115 at offset 2: reference to non-existent subpattern + +/abc/substitute_extended,python_octal + abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 + 1: \x00-\x00-\x01-\x0a-\x0a3-S-S4 + abc\=replace=\1 +Failed: error -49 at offset 2 in replacement: unknown substring + abc\=replace=\12 +Failed: error -49 at offset 3 in replacement: unknown substring + +# -------------- + +/a(?C)b/ + abc +--->abc + 0 ^^ b + 0: ab + abc\=callout_none + 0: ab + +/a(?C)b/never_callout +Failed: error 203 at offset 3: using callouts is disabled by the application + +# -------------- + +# EXTENDED CHARACTER CLASSES (UTS#18) + +/[a[]/ + [ + 0: [ + +/[a[]/alt_extended_class +Failed: error 106 at offset 4: missing terminating ] for character class + +/[a[B]/alt_extended_class +Failed: error 212 at offset 5: missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS) + +/[a[B]]C/B,alt_extended_class +------------------------------------------------------------------ + Bra + [Ba] + C + Ket + End +------------------------------------------------------------------ + aC + 0: aC + BC + 0: BC +\= Expect no match + [C +No match + +/[[A][B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AB] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B +\= Expect no match + [ +No match + ] +No match + +/[[A]||[B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AB] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B +\= Expect no match + C +No match + +/[[^A][B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^A] + Ket + End +------------------------------------------------------------------ + B + 0: B + C + 0: C +\= Expect no match + A +No match + +/[^[A][B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^AB] + Ket + End +------------------------------------------------------------------ + C + 0: C +\= Expect no match + A +No match + B +No match + +/[^[A]&&[B]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B + C + 0: C + +/[[AC]||[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A-C] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B + C + 0: C +\= Expect no match + D +No match + +/[[AC]&&[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [C] + Ket + End +------------------------------------------------------------------ + C + 0: C +\= Expect no match + A +No match + B +No match + D +No match + +/[[AC]--[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + C +No match + D +No match + +/[[AC]~~[BC]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AB] + Ket + End +------------------------------------------------------------------ + A + 0: A + B + 0: B +\= Expect no match + C +No match + D +No match + +/[A[]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A\]] + Ket + End +------------------------------------------------------------------ + A + 0: A + ] + 0: ] +\= Expect no match + [ +No match + +/[A[^]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^\]] + Ket + End +------------------------------------------------------------------ + A + 0: A + [ + 0: [ + C + 0: C +\= Expect no match + ] +No match + +/[A[]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + ] +No match + [ +No match + +/[A[^]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + A + 0: A + C + 0: C + [ + 0: [ + ] + 0: ] + +/[A-C--B]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [AC] + Ket + End +------------------------------------------------------------------ + A + 0: A + C + 0: C +\= Expect no match + B +No match + +/[^A-C--B]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^AC] + Ket + End +------------------------------------------------------------------ + B + 0: B +\= Expect no match + A +No match + C +No match + +/[[\d\D]--b]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^b] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + b +No match + +/[\dAC-E[:space:]&&[^z]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\x09-\x0d 0-9AC-E] + Ket + End +------------------------------------------------------------------ + 0 + 0: 0 + A + 0: A + C + 0: C + D + 0: D + E + 0: E + \t + 0: \x09 +\= Expect no match + B +No match + F +No match + ; +No match + +/[z||[^\dAC-E[:space:]]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [^\x09-\x0d 0-9AC-E] + Ket + End +------------------------------------------------------------------ + z + 0: z + B + 0: B + F + 0: F + ; + 0: ; +\= Expect no match + 0 +No match + A +No match + C +No match + D +No match + E +No match + \t +No match + +/[ab||cd]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-d] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + e +No match + +/[[a]b||[c]d]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-d] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + e +No match + +/[a[b]||c[d]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [a-d] + Ket + End +------------------------------------------------------------------ + a + 0: a + c + 0: c +\= Expect no match + e +No match + +/[-&&-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-] + Ket + End +------------------------------------------------------------------ + - + 0: - +\= Expect no match + a +No match + +/[a-&&-a]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/[-a&&a-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/[[a]-&&-[a]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/[-[a]&&[a]-]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-a] + Ket + End +------------------------------------------------------------------ + - + 0: - + a + 0: a +\= Expect no match + b +No match + +/(?xx:[ ^ a[ ^ b] ])/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + [b] + Ket + Ket + End +------------------------------------------------------------------ + b + 0: b +\= Expect no match + A +No match + a +No match + c +No match + +/[ ^ a[ ^ b] ]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ ^ab] + Ket + End +------------------------------------------------------------------ + \x20 + 0: + ^ + 0: ^ + a + 0: a + b + 0: b +\= Expect no match + c +No match + +/[a-c--b]+/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ac]++ + Ket + End +------------------------------------------------------------------ + ac + 0: ac + a + 0: a +\= Expect no match + b +No match + +/[a-c--b]{2,3}/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ac]{2,3}+ + Ket + End +------------------------------------------------------------------ + ac + 0: ac + cac + 0: cac +\= Expect no match + a +No match + bb +No match + +/x[a-c--b]+y/B,alt_extended_class +------------------------------------------------------------------ + Bra + x + [ac]++ + y + Ket + End +------------------------------------------------------------------ + xacy + 0: xacy + xaay + 0: xaay + xay + 0: xay +\= Expect no match + zacy +No match + xacz +No match + xy +No match + xby +No match + +/[A--B--C--D]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + +/[A--A--A]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [] + Ket + End +------------------------------------------------------------------ +\= Expect no match + A +No match + B +No match + +/[[A--A]--A]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [] + Ket + End +------------------------------------------------------------------ +\= Expect no match + A +No match + B +No match + +/[A--[A--A]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + +/[A--^B]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [A] + Ket + End +------------------------------------------------------------------ + A + 0: A +\= Expect no match + B +No match + ^ +No match + z +No match + +/([a-z--n])\1/B,alt_extended_class +------------------------------------------------------------------ + Bra + CBra 1 + [a-mo-z] + Ket + \1 + Ket + End +------------------------------------------------------------------ + aa + 0: aa + 1: a + zz + 0: zz + 1: z +\= Expect no match + az +No match + nn +No match + +/(x[a-z--n]y)\1/B,alt_extended_class +------------------------------------------------------------------ + Bra + CBra 1 + x + [a-mo-z] + y + Ket + \1 + Ket + End +------------------------------------------------------------------ + xayxay + 0: xayxay + 1: xay + xzyxzy + 0: xzyxzy + 1: xzy +\= Expect no match + xnyxny +No match + +/(?:_\1|([a-z--n])){2}/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + Ket + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + Ket + Ket + End +------------------------------------------------------------------ + a_a + 0: a_a + 1: a + z_z + 0: z_z + 1: z +\= Expect no match + a_z +No match + n_n +No match + +/(?:_\1|([a-z--n]))+/B,alt_extended_class +------------------------------------------------------------------ + Bra + Bra + _ + \1 + Alt + CBra 1 + [a-mo-z] + Ket + KetRmax + Ket + End +------------------------------------------------------------------ + a_a + 0: a_a + 1: a + z_z + 0: z_z + 1: z + a_partial + 0: a + 1: a +\= Expect no match + n_n +No match + +/[\d-[z]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-0-9z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[\d-||z]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-0-9z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[z[\d-]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-0-9z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[1-[z]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-1z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[1-||z]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-1z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[z[1-]]/B,alt_extended_class +------------------------------------------------------------------ + Bra + [\-1z] + Ket + End +------------------------------------------------------------------ + 1 + 0: 1 + - + 0: - + z + 0: z + +/[a--/alt_extended_class +Failed: error 106 at offset 4: missing terminating ] for character class + +/[a--a/alt_extended_class +Failed: error 106 at offset 5: missing terminating ] for character class + +/[a--[a/alt_extended_class +Failed: error 106 at offset 6: missing terminating ] for character class + +/[a--[a]/alt_extended_class +Failed: error 212 at offset 7: missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS) + +/[a--[a]--/alt_extended_class +Failed: error 212 at offset 9: missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS) + +/[a--]/alt_extended_class +Failed: error 210 at offset 5: expected operand after operator in extended character class + +/[--a]/alt_extended_class +Failed: error 209 at offset 3: unexpected operator in extended character class (no preceding operand) + +/[^--a]/alt_extended_class +Failed: error 209 at offset 4: unexpected operator in extended character class (no preceding operand) + +/[--]/alt_extended_class +Failed: error 209 at offset 3: unexpected operator in extended character class (no preceding operand) + +/[a---b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a----b]/alt_extended_class +Failed: error 208 at offset 6: invalid operator in extended character class + +/[a&&&b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a|||b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a~~~b]/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a~~~~b]/alt_extended_class +Failed: error 208 at offset 6: invalid operator in extended character class + +/[a~~/alt_extended_class +Failed: error 106 at offset 4: missing terminating ] for character class + +/[a~~~/alt_extended_class +Failed: error 208 at offset 5: invalid operator in extended character class + +/[a~~~~/alt_extended_class +Failed: error 208 at offset 6: invalid operator in extended character class + +/[a||b&&c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a||b~~c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a~~b&&c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a--b~~c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a--b&&c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a||b--c]/alt_extended_class +Failed: error 211 at offset 7: square brackets needed to clarify operator precedence in extended character class + +/[a||[b--c]]/alt_extended_class + a + 0: a + b + 0: b +\= Expect no match + c +No match + +/[\d-z]/B,alt_extended_class +Failed: error 150 at offset 4: invalid range in character class + +/[z-\d]/B,alt_extended_class +Failed: error 150 at offset 5: invalid range in character class + +/[abc -- b]+/B,alt_extended_class +------------------------------------------------------------------ + Bra + [ac]++ + Ket + End +------------------------------------------------------------------ + acacbac + 0: acac + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a]]]]]]]]]]]]]]]/alt_extended_class + a + 0: a +\= Expect no match + b +No match + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[z]&&a]]]]]]]]]]]]]]]/alt_extended_class +Failed: error 207 at offset 115: extended character class nesting is too deep + +/[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a[z]]]]]]]]]]]]]]]]/alt_extended_class +Failed: error 207 at offset 118: extended character class nesting is too deep + +/[z&/alt_extended_class +Failed: error 106 at offset 3: missing terminating ] for character class + +/[[^]~~[^]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + [] + Ket + End +------------------------------------------------------------------ +\= Expect no match + a +No match + +/[^[[^]~~[^]]]/B,alt_extended_class,allow_empty_class +------------------------------------------------------------------ + Bra + AllAny + Ket + End +------------------------------------------------------------------ + a + 0: a + +# -------------- + +# EXTENDED CHARACTER CLASSES (Perl) + +# allow-empty-class does nothing inside (?[...]) +/(?[ []] ])/B,allow_empty_class +------------------------------------------------------------------ + Bra + [\]] + Ket + End +------------------------------------------------------------------ + ] + 0: ] + +# bad-escape-is-literal does nothing inside (?[...]) +/[ \j ]/ +Failed: error 103 at offset 3: unrecognized character follows \ + +/[ /\ +Failed: error 101 at offset 3: \ at end of pattern + +/(?[ \j ])/ +Failed: error 103 at offset 5: unrecognized character follows \ + +/(?[ /\ +Failed: error 101 at offset 5: \ at end of pattern + +/[ \j ]/bad_escape_is_literal + j + 0: j +\= Expect no match + k +No match + +/[ /\bad_escape_is_literal +Failed: error 106 at offset 3: missing terminating ] for character class + +/(?[ \j ])/bad_escape_is_literal +Failed: error 103 at offset 5: unrecognized character follows \ + +/(?[ /\bad_escape_is_literal +Failed: error 101 at offset 5: \ at end of pattern + +/(?[ [\j] ])/bad_escape_is_literal +Failed: error 103 at offset 6: unrecognized character follows \ + +/(?[ (\j) ])/bad_escape_is_literal +Failed: error 103 at offset 6: unrecognized character follows \ + +# We can't test error cases in testinput1 + +/(?[])/ +Failed: error 214 at offset 4: empty expression in extended character class + +/(?[/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/(?[]/ +Failed: error 214 at offset 4: empty expression in extended character class + +/(?[\n/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/(?[\n]/ +Failed: error 215 at offset 6: terminating ] with no following closing parenthesis in (?[...] + +/(?[\n]z)/ +Failed: error 215 at offset 6: terminating ] with no following closing parenthesis in (?[...] + +/(?[\n] )/ +Failed: error 215 at offset 6: terminating ] with no following closing parenthesis in (?[...] + +/(?[(/ +Failed: error 114 at offset 4: missing closing parenthesis + +/(?[( / +Failed: error 106 at offset 5: missing terminating ] for character class + +/(?[(\n/ +Failed: error 106 at offset 6: missing terminating ] for character class + +/(?[ \n + () ])/ +Failed: error 214 at offset 11: empty expression in extended character class + +/(?[1])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[a])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[a-c])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[(])/ +Failed: error 114 at offset 4: missing closing parenthesis + +/(?[(\n])/ +Failed: error 114 at offset 6: missing closing parenthesis + +/(?[\n)])/ +Failed: error 122 at offset 6: unmatched closing parenthesis + +/(?[^\n])/ +Failed: error 209 at offset 4: unexpected operator in extended character class (no preceding operand) + +/(?[ \n \t ])/ +Failed: error 213 at offset 9: unexpected expression in extended character class (no preceding operator) + +/(?[ \d \t ])/ +Failed: error 213 at offset 9: unexpected expression in extended character class (no preceding operator) + +/(?[ [\n] \t ])/ +Failed: error 213 at offset 11: unexpected expression in extended character class (no preceding operator) + +/(?[ (\n) \t ])/ +Failed: error 213 at offset 11: unexpected expression in extended character class (no preceding operator) + +/(?[ [:alpha:] \t ])/ +Failed: error 213 at offset 16: unexpected expression in extended character class (no preceding operator) + +/(?[ \n + \t \d ])/ +Failed: error 213 at offset 14: unexpected expression in extended character class (no preceding operator) + +/(?[ !\n \t ])/ +Failed: error 213 at offset 10: unexpected expression in extended character class (no preceding operator) + +/(?[ \n [:alpha:] ])/ +Failed: error 213 at offset 16: unexpected expression in extended character class (no preceding operator) + +/(?[ \n [\d] ])/ +Failed: error 213 at offset 8: unexpected expression in extended character class (no preceding operator) + +/(?[ \n (\t) ])/ +Failed: error 213 at offset 8: unexpected expression in extended character class (no preceding operator) + +/(?[ \n !\t ])/ +Failed: error 213 at offset 8: unexpected expression in extended character class (no preceding operator) + +/(?[ \n \t ])/ +Failed: error 213 at offset 9: unexpected expression in extended character class (no preceding operator) + +/(?[:graph:])/ +Failed: error 216 at offset 4: unexpected character in (?[...]) extended character class + +/(?[\Qn\E])/ +Failed: error 216 at offset 6: unexpected character in (?[...]) extended character class + +# maximum depth tests + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&\n))))))))))))))])/ + \n + 0: \x0a +\= Expect no match + a +No match + b +No match + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+([\n]&\n))))))))))))))])/ +Failed: error 207 at offset 158: extended character class nesting is too deep + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&[\n]))))))))))))))])/ +Failed: error 207 at offset 161: extended character class nesting is too deep + +/(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+((\n)&\n))))))))))))))])/ +Failed: error 207 at offset 158: extended character class nesting is too deep + +# -------------- + +/[[:digit:] -Z]/xx +Failed: error 150 at offset 14: invalid range in character class + +/[\d -Z]/xx +Failed: error 150 at offset 7: invalid range in character class + +/[[:digit:]\E-H]/ +Failed: error 150 at offset 13: invalid range in character class + +/[[:digit:]\Q\E-H]+/ +Failed: error 150 at offset 15: invalid range in character class + +/[z-[:space:]]/ +Failed: error 150 at offset 12: invalid range in character class + +/[z-\d]/ +Failed: error 150 at offset 5: invalid range in character class + +/[[:space:]-z]/ +Failed: error 150 at offset 11: invalid range in character class + +/[\d-z]/ +Failed: error 150 at offset 4: invalid range in character class + +/[\d-\w]/ +Failed: error 150 at offset 4: invalid range in character class + +/[\Q/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/[\Q/\ +Failed: error 106 at offset 4: missing terminating ] for character class + +/[\Q\E/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[\Q\n/ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[\Q\n]/ +Failed: error 106 at offset 6: missing terminating ] for character class + +/[\Q\n/\ +Failed: error 106 at offset 6: missing terminating ] for character class + +/[\Q\n\]/ +Failed: error 106 at offset 7: missing terminating ] for character class + +/[\Q\n\E/ +Failed: error 106 at offset 7: missing terminating ] for character class + +/[\Q\n\E]/ + \\ + 0: \ + n + 0: n +\= Expect no match + \n +No match + Q +No match + +/[z\Q/ +Failed: error 106 at offset 4: missing terminating ] for character class + +/[z\Q/\ +Failed: error 106 at offset 5: missing terminating ] for character class + +/[z\Q\E/ +Failed: error 106 at offset 6: missing terminating ] for character class + +/[/\ +Failed: error 101 at offset 2: \ at end of pattern + +/[\n/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/[\E/ +Failed: error 106 at offset 3: missing terminating ] for character class + +/[\^z]/B +------------------------------------------------------------------ + Bra + [\^z] + Ket + End +------------------------------------------------------------------ + +/[ \^]/B +------------------------------------------------------------------ + Bra + [ ^] + Ket + End +------------------------------------------------------------------ + +/[\\z]/B +------------------------------------------------------------------ + Bra + [\\z] + Ket + End +------------------------------------------------------------------ + +/[0-z]/B +------------------------------------------------------------------ + Bra + [0-z] + Ket + End +------------------------------------------------------------------ + +/[0\-z]/B +------------------------------------------------------------------ + Bra + [\-0z] + Ket + End +------------------------------------------------------------------ + +/[]z]/B +------------------------------------------------------------------ + Bra + [\]z] + Ket + End +------------------------------------------------------------------ + +/[ \]]/B +------------------------------------------------------------------ + Bra + [ \]] + Ket + End +------------------------------------------------------------------ + +/[ --]/B +------------------------------------------------------------------ + Bra + [ -\-] + Ket + End +------------------------------------------------------------------ + +/[A-\]]/B +------------------------------------------------------------------ + Bra + [A-\]] + Ket + End +------------------------------------------------------------------ + +/[A-\\]/B +------------------------------------------------------------------ + Bra + [A-\\] + Ket + End +------------------------------------------------------------------ + +/[\A]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\Z]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\z]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\G]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\K]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\g<1>]/ + < + 0: < + g + 0: g +\= Expect no match + \\ +No match + +/[\k<1>]/ + < + 0: < + k + 0: k +\= Expect no match + \\ +No match + +/[\u{ 1z}]/alt_bsux,extra_alt_bsux + u + 0: u + { + 0: { + } + 0: } + \x20 + 0: + 1 + 0: 1 +\= Expect no match + \\ +No match + +/[a\x{e1}]/iB +------------------------------------------------------------------ + Bra + [Aa\xe1] + Ket + End +------------------------------------------------------------------ + a + 0: a + A + 0: A + \x{e1} + 0: \xe1 + +# -------------- + +# Attempt at full coverage of the substitution buffer-management code - not +# just covering each line in each macro, but covering each instantiation of each +# line in those macros. + +# +# CHECKMEMCPY tests +# +# Four conditions for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Additionally some CHECKMEMCPYs have a substitute_replacement_only branch. +# + +# pre-start-offset fragment +# no "overflow after previous overflow" condition +/a/ + XYaZ\=offset=2,replace=foo + 1: XYfooZ + XYaZ\=offset=2,replace=[1]foo +Failed: error -48: no more memory + XYaZ\=offset=2,substitute_overflow_length,replace=[1]foo +Failed: error -48: no more memory: 7 code units are needed + XYaZ\=offset=2,substitute_replacement_only,replace=foo + 1: foo + +# pre-match fragment +/a/ + XYaZ\=replace=foo + 1: XYfooZ + XYaZ\=replace=[1]foo +Failed: error -48: no more memory + XYaZ\=substitute_overflow_length,replace=[1]foo +Failed: error -48: no more memory: 7 code units are needed + XXYaZ\=offset=2,substitute_overflow_length,replace=[1]foo +Failed: error -48: no more memory: 8 code units are needed + XYaZ\=substitute_replacement_only,replace=foo + 1: foo + +# empty match bumpalong +/(?<=abc)(|DEF)/g + abcDEFabcZ\=replace=+ + 3: abc++abc+Z + abcDEFabcZ\=replace=[5]+ +Failed: error -48: no more memory + abcDEFabcZ\=substitute_overflow_length,replace=[5]+ +Failed: error -48: no more memory: 11 code units are needed + abcDEFabcZ\=replace=[9]+ +Failed: error -48: no more memory + abcDEFabcZ\=substitute_overflow_length,replace=[9]+ +Failed: error -48: no more memory: 11 code units are needed + abcDEFabcZ\=substitute_overflow_length,replace=[1]+ +Failed: error -48: no more memory: 11 code units are needed + abcDEFabcZ\=substitute_replacement_only,replace=+ + 3: +++ + +# literal replacement +/a/ + XYaZ\=substitute_literal,replace=$0 + 1: XY$0Z + XYaZ\=substitute_literal,replace=[3]$0 +Failed: error -48: no more memory + XYaZ\=substitute_literal,substitute_overflow_length,replace=[3]$0 +Failed: error -48: no more memory: 6 code units are needed + XYaZ\=substitute_literal,substitute_overflow_length,replace=[1]$0 +Failed: error -48: no more memory: 6 code units are needed + +# a MARK +/(*:pear)apple/ + XappleY\=replace=${*MARK} + 1: XpearY + XappleY\=replace=[3]${*MARK} +Failed: error -48: no more memory + XappleY\=substitute_overflow_length,replace=[3]${*MARK} +Failed: error -48: no more memory: 7 code units are needed + XXappleY\=substitute_overflow_length,replace=[1]${*MARK} +Failed: error -48: no more memory: 8 code units are needed + +# a subject fragment +/a(bb)c/ + XabbcY\=replace=$1 + 1: XbbY + XabbcY\=replace=[2]$1 +Failed: error -48: no more memory + XabbcY\=substitute_overflow_length,replace=[2]$1 +Failed: error -48: no more memory: 5 code units are needed + XXabbcY\=substitute_overflow_length,replace=[1]$1 +Failed: error -48: no more memory: 6 code units are needed + +# a zero-length subject fragment +/a()c/ + XacY\=replace=$1 + 1: XY + XacY\=replace=[2]$1 +Failed: error -48: no more memory + XacY\=substitute_overflow_length,replace=[2]$1 +Failed: error -48: no more memory: 3 code units are needed + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\x{48} + 1: XHY + XabcY\=replace=[1]\x{48} +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]\x{48} +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]\x{48} +Failed: error -48: no more memory: 5 code units are needed + +# a replacement literal character +/abc/ + XabcY\=replace=Z + 1: XZY + XabcY\=replace=[1]Z +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 5 code units are needed + +# a cancelled substitution +# no "overflow after previous overflow" condition +/abc/substitute_skip=1 + XabcY\=replace=Z + 1(1) Old 1 4 "abc" New 1 2 "Z SKIPPED" + 1: XabcY + XabcY\=replace=[3]Z + 1(1) Old 1 4 "abc" New 1 2 "Z SKIPPED" +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[3]Z + 1(1) Old 1 4 "abc" New 1 2 "Z SKIPPED" +Failed: error -48: no more memory: 6 code units are needed + XabcY\=substitute_replacement_only,replace=Z + 1(1) Old 1 4 "abc" New 0 1 "Z SKIPPED" + 1: + +# the rest of the subject +/abc/ + XabcYY\=replace=Z + 1: XZYY + XabcYY\=replace=[3]Z +Failed: error -48: no more memory + XabcYY\=substitute_overflow_length,replace=[3]Z +Failed: error -48: no more memory: 5 code units are needed + XabcYY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 5 code units are needed + XabcYY\=substitute_replacement_only,replace=Z + 1: Z + +# the trailing NULL +/abc/ + XabcY\=replace=Z + 1: XZY + XabcY\=replace=[3]Z +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[3]Z +Failed: error -48: no more memory: 4 code units are needed + XabcY\=substitute_overflow_length,replace=[1]Z +Failed: error -48: no more memory: 4 code units are needed + +# +# CHECKCASECPY tests +# +# The same four conditions for CHECKCASECPY as for CHECKMEMCPY: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# overflow after previous overflow +# Also the condition where CHECKCASECPY isn't called due to a custom callout +# + +# a MARK +/(*:pear)apple/substitute_extended + XappleY\=replace=\U${*MARK} + 1: XPEARY + XappleY\=replace=[3]\U${*MARK} +Failed: error -48: no more memory + XappleY\=substitute_overflow_length,replace=[3]\U${*MARK} +Failed: error -48: no more memory: 7 code units are needed + XXappleY\=substitute_overflow_length,replace=[1]\U${*MARK} +Failed: error -48: no more memory: 8 code units are needed + XappleY\=substitute_case_callout,replace=\U${*MARK} + 1: XpeBrY + +# a subject fragment +/a(bb)c/substitute_extended + XabbcY\=replace=\U$1 + 1: XBBY + XabbcY\=replace=[2]\U$1 +Failed: error -48: no more memory + XabbcY\=substitute_overflow_length,replace=[2]\U$1 +Failed: error -48: no more memory: 5 code units are needed + XXabbcY\=substitute_overflow_length,replace=[1]\U$1 +Failed: error -48: no more memory: 6 code units are needed + XabbcY\=substitute_case_callout,replace=\U$1 + 1: XbbY + +# a zero-length subject fragment +/a()c/substitute_extended + XacY\=replace=\U$1 + 1: XY + XacY\=replace=[2]\U$1 +Failed: error -48: no more memory + XacY\=substitute_overflow_length,replace=[2]\U$1 +Failed: error -48: no more memory: 3 code units are needed + +# a data character via an escape +/abc/substitute_extended + XabcY\=replace=\U\x{48} + 1: XHY + XabcY\=replace=[1]\U\x{48} +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]\U\x{48} +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]\U\x{48} +Failed: error -48: no more memory: 5 code units are needed + XabcY\=substitute_case_callout,replace=\U\x{48} + 1: XHY + +# a replacement literal character +/abc/substitute_extended + XabcY\=replace=\UZ + 1: XZY + XabcY\=replace=[1]\UZ +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[1]\UZ +Failed: error -48: no more memory: 4 code units are needed + XXabcY\=substitute_overflow_length,replace=[1]\UZ +Failed: error -48: no more memory: 5 code units are needed + XabcY\=substitute_case_callout,replace=\UZ + 1: XZY + +# +# DELAYEDFORCECASE tests +# +# Some different triggering conditions for DELAYEDFORCECASE: +# no overflow; +# first overflow (with/without substitute_overflow_length); +# if there was a previous overflow, then the case callout can't be invoked +# Also, the CASEERROR branch. +# Also, the branch for where chars_outstanding is zero, both with and without +# a previous overflow. +# + +# on set casing mode +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=\Uf\Lq + 1: XSSqY + XabcY\=replace=[2]\Uf\Lq +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[2]\Uf\Lq +Failed: error -48: no more memory: 16 code units are needed + XabcY\=substitute_overflow_length,replace=[1]\Uf\Lq +Failed: error -48: no more memory: 25 code units are needed + XabcY\=replace=\U!\Lq +Failed: error -69: error performing replacement case transformation + XabcY\=replace=\U\Lq + 1: XqY + XXabcY\=substitute_overflow_length,replace=[1]\U\Lq +Failed: error -48: no more memory: 15 code units are needed + +# trailing fragment +/abc/substitute_extended,substitute_case_callout + XabcY\=replace=f + 1: XfY + XabcY\=replace=\Uf + 1: XSSY + XabcY\=replace=[2]\Uf +Failed: error -48: no more memory + XabcY\=substitute_overflow_length,replace=[2]\Uf +Failed: error -48: no more memory: 5 code units are needed + XabcY\=substitute_overflow_length,replace=[1]\Uf +Failed: error -48: no more memory: 14 code units are needed + XabcY\=replace=\U! +Failed: error -69: error performing replacement case transformation + XabcY\=replace=\U + 1: XY + XXabcY\=substitute_overflow_length,replace=[1]\U +Failed: error -48: no more memory: 4 code units are needed + +# +# do_case_copy tests +# + +/aa/i,substitute_extended + XaaY\=replace=\Uaa\uaa\LAA\lAA\U\lAA\L\uaa\u\LaaA\l\UAAa + 1: XAAAaaaaAaAAaAaaaAAY + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory + XaaY\=replace=[5]\uaa + 1: XAaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[5]\u$0 + 1: XAaY + XaaY\=replace=[1]\lAA +Failed: error -48: no more memory + XaaY\=replace=[2]\lAA +Failed: error -48: no more memory + XaaY\=replace=[3]\lAA +Failed: error -48: no more memory + XaaY\=replace=[4]\lAA +Failed: error -48: no more memory + XaaY\=replace=[5]\lAA + 1: XaAY + XAAY\=replace=[1]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[2]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[3]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[4]\l$0 +Failed: error -48: no more memory + XAAY\=replace=[5]\l$0 + 1: XaAY + XaaY\=replace=[1]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[2]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[3]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UAa +Failed: error -48: no more memory + XaaY\=replace=[5]\l\UAa + 1: XaAY + XAaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory + XAaY\=replace=[5]\l\U$0 + 1: XaAY + XaaY\=replace=[1]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[2]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[3]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[4]\u\LaA +Failed: error -48: no more memory + XaaY\=replace=[5]\u\LaA + 1: XAaY + XaAY\=replace=[1]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[2]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[3]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[4]\u\L$0 +Failed: error -48: no more memory + XaAY\=replace=[5]\u\L$0 + 1: XAaY + +/aa/i,substitute_extended,substitute_overflow_length + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\uaa + 1: XAaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u$0 + 1: XAaY + XaaY\=replace=[1]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\lAA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\lAA + 1: XaAY + XAAY\=replace=[1]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[2]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[3]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[4]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XAAY\=replace=[5]\l$0 + 1: XaAY + XaaY\=replace=[1]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\l\UAa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\l\UAa + 1: XaAY + XAaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XAaY\=replace=[5]\l\U$0 + 1: XaAY + XaaY\=replace=[1]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[2]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[3]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u\LaA +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u\LaA + 1: XAaY + XaAY\=replace=[1]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[2]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[3]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[4]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaAY\=replace=[5]\u\L$0 + 1: XAaY + +/aa/i,substitute_extended,substitute_case_callout + XaaY\=replace=\Uaa\uaa\LBB\lBB\U\lBB\L\uaa\u\LaaB\l\UBBa + 1: XBBBaaaaBaBBaBaaaBBY + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory + XaaY\=replace=[5]\uaa + 1: XBaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory + XaaY\=replace=[5]\u$0 + 1: XBaY + XaaY\=replace=[1]\lBB +Failed: error -48: no more memory + XaaY\=replace=[2]\lBB +Failed: error -48: no more memory + XaaY\=replace=[3]\lBB +Failed: error -48: no more memory + XaaY\=replace=[4]\lBB +Failed: error -48: no more memory + XaaY\=replace=[5]\lBB + 1: XaBY + XBBY\=replace=[1]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[2]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[3]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[4]\l$0 +Failed: error -48: no more memory + XBBY\=replace=[5]\l$0 + 0: XBBY + XaaY\=replace=[1]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[2]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[3]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UBa +Failed: error -48: no more memory + XaaY\=replace=[5]\l\UBa + 1: XaBY + XBaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory + XBaY\=replace=[5]\l\U$0 + 0: XBaY + XaaY\=replace=[1]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[2]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[3]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[4]\u\LaB +Failed: error -48: no more memory + XaaY\=replace=[5]\u\LaB + 1: XBaY + XaBY\=replace=[1]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[2]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[3]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[4]\u\L$0 +Failed: error -48: no more memory + XaBY\=replace=[5]\u\L$0 + 0: XaBY + +/aa/i,substitute_extended,substitute_case_callout,substitute_overflow_length + XaaY\=replace=[1]\uaa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\uaa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\uaa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\uaa + 1: XBaY + XaaY\=replace=[1]\u$0 +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\u$0 +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u$0 +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u$0 + 1: XBaY + XaaY\=replace=[1]\lBB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\lBB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\lBB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\lBB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\lBB + 1: XaBY + XBBY\=replace=[1]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[2]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[3]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[4]\l$0 +Failed: error -48: no more memory: 5 code units are needed + XBBY\=replace=[5]\l$0 + 0: XBBY + XaaY\=replace=[1]\l\UBa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\l\UBa +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\l\UBa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\l\UBa +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\l\UBa + 1: XaBY + XBaY\=replace=[1]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[2]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[3]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[4]\l\U$0 +Failed: error -48: no more memory: 5 code units are needed + XBaY\=replace=[5]\l\U$0 + 0: XBaY + XaaY\=replace=[1]\u\LaB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[2]\u\LaB +Failed: error -48: no more memory: 15 code units are needed + XaaY\=replace=[3]\u\LaB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[4]\u\LaB +Failed: error -48: no more memory: 5 code units are needed + XaaY\=replace=[5]\u\LaB + 1: XBaY + XaBY\=replace=[1]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[2]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[3]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[4]\u\L$0 +Failed: error -48: no more memory: 5 code units are needed + XaBY\=replace=[5]\u\L$0 + 0: XaBY + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l\U!a +Failed: error -69: error performing replacement case transformation + XaaY\=replace=\l\Ua! +Failed: error -69: error performing replacement case transformation + XaaY\=replace=\ufa + 1: XSSaY + XaaY\=replace=[3]\ufa +Failed: error -48: no more memory + XaaY\=replace=\l\Uaoo + 1: XaOOOOY + XaaY\=replace=[4]\l\Uaoo +Failed: error -48: no more memory + XaaY\=replace=\l\UPa + 1: XppBY + XaaY\=replace=[3]\l\UPa +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UPa +Failed: error -48: no more memory + XaaY\=replace=\l\UPo + 1: XppOOY + XaaY\=replace=[3]\l\UPo +Failed: error -48: no more memory + XaaY\=replace=[4]\l\UPo +Failed: error -48: no more memory + XaaY\=replace=\l\UPpp + 1: XppPY + XaaY\=replace=[4]\l\UPpp +Failed: error -48: no more memory + XaaY\=replace=[5]\l\UPpp +Failed: error -48: no more memory + +# +# special test-callback case transformation tests +# + +/aa/substitute_extended,substitute_case_callout + XaaY\=replace=\l! +Failed: error -69: error performing replacement case transformation + XaaY\=replace=\ua\lB + 1: XBaY + XaaY\=replace=\LdDZ\UdDZ\ud\uD\uZ + 1: XdddZZZDDDY + XaaY\=replace=\uf\Uf\Lf\Us\Ls\uS\lS + 1: XSSSSfSsSsY + XaaY\=replace=\LOO\LOQ\UOO\uo\lo + 1: XoOQOOOOoY + XaaY\=replace=\upq\upp\lpp\Upp\Lpp\lP\uP + 1: XpqppppPppppPY + XaaY\=replace=\ll\ul\Ul\LMmNn\UMmNn + 1: XlMnMNmmnnMMNNY + XaaY\=replace=\Uac\Uaca\Uak\Uaka\Lck\LBK\LBKB\LBK \UK + 1: XBKBKBBKBKBckacakaac KY + Xaay\=replace=\u\Lqj\u\Lij\u\LIj\u\LiJ\u\LIJ\u\Liq\u\Lij\Uij\UiIjJ\LiIjJ + 1: XqjIJIJIJIJIqIJIJIIJJiijjy + Xaay\=replace=\Uaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + 1: XBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBy + +# -------------- + +# End of testinput2 +Error -80: PCRE2_ERROR_BADDATA (unknown error number) +Error -62: bad serialized data +Error -2: partial match +Error -1: no match +Error 0: PCRE2_ERROR_BADDATA (unknown error number) +Error 100: no error +Error 101: \ at end of pattern +Error 191: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode +Error 300: PCRE2_ERROR_BADDATA (unknown error number) --- pcre2-10.45.orig/.pc/applied-patches +++ pcre2-10.45/.pc/applied-patches @@ -0,0 +1 @@ +CVE-2025-58050.patch --- pcre2-10.45.orig/debian/README.Debian +++ pcre2-10.45/debian/README.Debian @@ -0,0 +1,12 @@ + PCRE2 for Debian + ---------------- + +This is PCRE2, the newer version of the PCRE (perl-compatible regular +expression) library. New projects should use this library in +preference the older PCRE library (which is, for historical reasons, +called pcre3 in Debian). + +The names of functions in libpcre2-posix are prefixed with PCRE2_, so +they don't clash with the names in libc. + + -- Matthew Vernon , Sun, 22 Nov 2015 16:07:27 +0000 --- pcre2-10.45.orig/debian/README.source +++ pcre2-10.45/debian/README.source @@ -0,0 +1,17 @@ +The Debian packaging of pcre2 is maintained in git, using the merging +workflow described in dgit-maint-merge(7). There isn't a patch +queue that can be represented as a quilt series. + +A detailed breakdown of the changes is available from their +canonical representation - git commits in the packaging repository. +For example, to see the changes made by the Debian maintainer in +the first upload of upstream version 1.2.3, you could use: + + % git clone https://git.dgit.debian.org/pcre2 + % cd pcre2 + % git log --oneline 1.2.3..debian/1.2.3-1 -- . ':!debian' + +(If you have dgit, use `dgit clone pcre2`, rather than plain `git +clone`.) + + -- Matthew Vernon , Thu, 30 Nov 2017 14:16:33 +0000 --- pcre2-10.45.orig/debian/changelog +++ pcre2-10.45/debian/changelog @@ -0,0 +1,330 @@ +pcre2 (10.45-1ubuntu0.1) plucky-security; urgency=medium + + * SECURITY UPDATE: heap overflow in scan substring + - debian/patches/CVE-2025-58050.patch: restore buffer after an ACCEPT + inside an scan substring block in src/pcre2_match.c, + testdata/testinput2, testdata/testoutput2. + - CVE-2025-58050 + + -- Marc Deslauriers Fri, 12 Sep 2025 10:30:21 -0400 + +pcre2 (10.45-1) unstable; urgency=medium + + * New upstream release + + -- Matthew Vernon Wed, 05 Feb 2025 09:25:16 +0000 + +pcre2 (10.45~rc1-1) experimental; urgency=medium + + * Upstream RC1 for 10.45 version + + -- Matthew Vernon Thu, 23 Jan 2025 09:49:13 +0000 + +pcre2 (10.44-5) unstable; urgency=medium + + * Enable JIT on loong64 (Closes: #1089230) + + -- Matthew Vernon Sun, 08 Dec 2024 13:05:09 +0000 + +pcre2 (10.44-4) unstable; urgency=medium + + * Disable JIT on armel (it requires v7) (Closes: 1087563) + + -- Matthew Vernon Fri, 15 Nov 2024 15:53:09 +0000 + +pcre2 (10.44-3) unstable; urgency=medium + + * Use dh_autoreconf to fix FTBFS on mips64el (Closes: #1087562) + * Cherry-pick upstream commit to fix 32-bit issue (Closes: #1087564) + + -- Matthew Vernon Fri, 15 Nov 2024 14:47:09 +0000 + +pcre2 (10.44-2) unstable; urgency=medium + + * Upload to unstable (Closes: #1086187) + * Update homepage URL (Closes: #1073087) + * Mention libre2-dev in libpcre2-dev description (Closes: #1082901) + + -- Matthew Vernon Thu, 14 Nov 2024 11:40:06 +0000 + +pcre2 (10.44-1) experimental; urgency=medium + + * New upstream version + + -- Matthew Vernon Fri, 08 Nov 2024 16:27:46 +0000 + +pcre2 (10.43-1) experimental; urgency=medium + + * New upstream version + + -- Matthew Vernon Sat, 17 Feb 2024 10:22:28 +0000 + +pcre2 (10.42-4) unstable; urgency=medium + + * Build with JIT support on riscv64 (Closes: #1050819) + + -- Matthew Vernon Tue, 29 Aug 2023 18:23:44 +0100 + +pcre2 (10.42-3) unstable; urgency=medium + + * Specify source format 1.0 explicitly (Closes: #1049910) + + -- Matthew Vernon Mon, 21 Aug 2023 11:44:09 +0100 + +pcre2 (10.42-2) unstable; urgency=medium + + * disable JIT on sparc (Closes: #1034779) + + -- Matthew Vernon Tue, 11 Jul 2023 23:11:09 +0100 + +pcre2 (10.42-1) unstable; urgency=medium + + * New upstream version + + -- Matthew Vernon Sun, 01 Jan 2023 15:44:06 +0000 + +pcre2 (10.40-3) unstable; urgency=medium + + * Support the noudeb build profile (Helmut Grohne). (Closes: #1024941) + + -- Matthew Vernon Fri, 02 Dec 2022 17:40:13 +0000 + +pcre2 (10.40-2) unstable; urgency=low + + * Move to machine-readable copyright file (Bastian Germann) + * Remove filenamemangle from debian/watch (Bastian Germann) + + -- Matthew Vernon Sun, 23 Oct 2022 16:07:18 +0100 + +pcre2 (10.40-1) unstable; urgency=medium + + * New upstream release + * Update watch file to note new upstream repo location + + -- Matthew Vernon Mon, 25 Apr 2022 17:10:36 +0100 + +pcre2 (10.39-4) unstable; urgency=medium + + * Backport upstream Hurd build fix (Closes: #1009066) + + -- Matthew Vernon Mon, 11 Apr 2022 09:23:48 +0100 + +pcre2 (10.39-3) unstable; urgency=medium + + * Enable JIT on s390x (Closes: #999836) + * Re-enable JIT on MIPS (Closes: #892488) + + -- Matthew Vernon Wed, 17 Nov 2021 14:52:42 +0000 + +pcre2 (10.39-2) unstable; urgency=medium + + * Update long descriptions of runtime packages (Closes: #978013) + * Source-only upload (-1 was binary to get through NEW) + * Depend on libc-dev rather than libc6-dev (Closes: #989729) + + -- Matthew Vernon Mon, 08 Nov 2021 23:47:21 +0000 + +pcre2 (10.39-1) unstable; urgency=medium + + * New upstream release + * Update watch file to reflect new upstream home on github + * libpcre2-posix soname updated + + -- Matthew Vernon Sat, 06 Nov 2021 13:56:27 +0000 + +pcre2 (10.36-2) unstable; urgency=medium + + * Upload to unstable + + -- Matthew Vernon Sun, 13 Dec 2020 16:23:03 +0000 + +pcre2 (10.36-1) experimental; urgency=medium + + * New upstream version + * Fix watch file + + -- Matthew Vernon Mon, 07 Dec 2020 12:35:19 +0000 + +pcre2 (10.35-2) unstable; urgency=medium + + * Upload to unstable + + -- Matthew Vernon Fri, 04 Dec 2020 09:52:04 +0000 + +pcre2 (10.35-1) experimental; urgency=medium + + * New upstream version + * Bump standards-version to 4.5.1 + + -- Matthew Vernon Fri, 27 Nov 2020 09:25:08 +0000 + +pcre2 (10.34-7) unstable; urgency=medium + + * Upstream patch to fix segfault in rspamd (Closes: #946221) + + -- Matthew Vernon Sat, 07 Dec 2019 17:06:59 +0000 + +pcre2 (10.34-6) unstable; urgency=high + + * Replace: and Conflict: the previous libpcre2-posix0 package (which + erroneously had the soname 2 library in) (Closes: #946279, #946290, #946311) + * Can now make a source-only upload now libpcre2-posix2 has made it + through NEW. We need that so we can migrate to testing (Closes: #946275) + + -- Matthew Vernon Sat, 07 Dec 2019 14:34:25 +0000 + +pcre2 (10.34-5) unstable; urgency=medium + + * Sacrifice an integer so I can do a binary upload for the new + libpcre2-posix package name :-( + + -- Matthew Vernon Tue, 03 Dec 2019 21:14:29 +0000 + +pcre2 (10.34-4) unstable; urgency=high + + * Add symbols files (Closes: #945973) + * Update libpcre2-posix name to match soname + + -- Matthew Vernon Tue, 03 Dec 2019 19:20:54 +0000 + +pcre2 (10.34-3) unstable; urgency=medium + + * Remove -dbg package from debian/control as well + + -- Matthew Vernon Mon, 02 Dec 2019 11:58:33 +0000 + +pcre2 (10.34-2) unstable; urgency=high + + * Try an upstream patch to fix an ARM bug (apropos 945972) + * Remove -dbg in favour of -dbgsyms, thanks to Michael Biebl + for the patch (Closes: #891624) + + -- Matthew Vernon Mon, 02 Dec 2019 11:22:32 +0000 + +pcre2 (10.34-1) unstable; urgency=medium + + * New upstream release + + -- Matthew Vernon Thu, 28 Nov 2019 16:28:31 +0000 + +pcre2 (10.32-5) unstable; urgency=high + + * Patch from Guillem Jover to only use SSE2 + instructions on those i386 CPUs that support them (Closes: #925360) + + -- Matthew Vernon Mon, 25 Mar 2019 19:56:19 +0000 + +pcre2 (10.32-4) unstable; urgency=medium + + * Take patch from Jeremy Bicha to build with + --disable-pcre2grep-callout: A grep tool should not be expected to + execute commands (Closes: #920273) (LP: #1636666) + + -- Matthew Vernon Thu, 24 Jan 2019 09:20:51 +0000 + +pcre2 (10.32-3) unstable; urgency=medium + + * Sacrifice an integer to dgit (previous upload didn't work) + + -- Matthew Vernon Sun, 28 Oct 2018 14:19:04 +0000 + +pcre2 (10.32-2) unstable; urgency=medium + + * Take OndÅ™ej Nový's patch to remove redundant Priority field in -dbg + package + + -- Matthew Vernon Fri, 26 Oct 2018 20:01:53 +0100 + +pcre2 (10.32-1) unstable; urgency=medium + + * New upstream release + + -- Matthew Vernon Fri, 26 Oct 2018 19:27:13 +0100 + +pcre2 (10.31-3) unstable; urgency=medium + + * Disable JIT on mips* (apropos #892488) + + -- Matthew Vernon Sat, 10 Mar 2018 00:31:33 +0000 + +pcre2 (10.31-2) unstable; urgency=low + + * Point to salsa repo in vcs* fields + * priority extra -> optional + * Add homepage link in debian/control + * Update to newer standards-version + + -- Matthew Vernon Sat, 24 Feb 2018 14:57:26 +0000 + +pcre2 (10.31-1) unstable; urgency=medium + + * New upstream version (Closes: #883224) + + -- Matthew Vernon Sat, 24 Feb 2018 12:13:02 +0000 + +pcre2 (10.22-6) unstable; urgency=medium + + [ Cyril Brulebois ] + * Add a libpcre2-8-0-udeb, needed by vte2.91 (Closes: #887674) + * Enable parallel building. + + -- Matthew Vernon Sat, 03 Feb 2018 18:44:08 +0000 + +pcre2 (10.22-5) unstable; urgency=medium + + * Add Vcs-{Git,Browser} fields to debian/control + + -- Matthew Vernon Fri, 01 Dec 2017 17:35:06 +0000 + +pcre2 (10.22-4) unstable; urgency=low + + * Add README.source explaining packaging workflow (Closes: #862425) + + -- Matthew Vernon Thu, 30 Nov 2017 14:17:39 +0000 + +pcre2 (10.22-3) unstable; urgency=medium + + * CVE-2017-7186: invalid Unicode property lookup may cause denial of + service (Closes: #858233) + + -- Matthew Vernon Tue, 21 Mar 2017 22:33:58 +0000 + +pcre2 (10.22-2) unstable; urgency=low + + * re-upload with source :-( + + -- Matthew Vernon Tue, 02 Aug 2016 19:27:58 +0100 + +pcre2 (10.22-1) unstable; urgency=low + + * New upstream release + + -- Matthew Vernon Tue, 02 Aug 2016 18:59:19 +0100 + +pcre2 (10.21-1) unstable; urgency=low + + * New upstream version + * Upstream patch to fix workspace overflow for (*ACCEPT) with deeply + nested parentheses (Closes: #815920) + + -- Matthew Vernon Sat, 27 Feb 2016 15:21:34 +0000 + +pcre2 (10.20-3) unstable; urgency=low + + * Enable JIT on ppc64 & ppc64el (Closes: #806388) + + -- Matthew Vernon Fri, 27 Nov 2015 08:16:38 +0000 + +pcre2 (10.20-2) unstable; urgency=low + + * remove unnecessary -L settings (Closes: #805941) + * Improve makefile syntax, and add JIT on arm64 + + -- Matthew Vernon Tue, 24 Nov 2015 20:39:15 +0000 + +pcre2 (10.20-1) unstable; urgency=low + + * Initial debian version (Closes: #805728) + + -- Matthew Vernon Sun, 22 Nov 2015 15:58:38 +0000 + --- pcre2-10.45.orig/debian/compat +++ pcre2-10.45/debian/compat @@ -0,0 +1 @@ +9 --- pcre2-10.45.orig/debian/control +++ pcre2-10.45/debian/control @@ -0,0 +1,122 @@ +Source: pcre2 +Section: libs +Priority: optional +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Matthew Vernon +Standards-Version: 4.5.1 +Build-Depends: debhelper (>=9), dpkg-dev (>= 1.16.1~), dh-autoreconf +Vcs-Git: https://salsa.debian.org/debian/pcre2.git +Vcs-Browser: https://salsa.debian.org/debian/pcre2 +Homepage: https://github.com/PCRE2Project/pcre2 + +Package: libpcre2-8-0 +Section: libs +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Description: New Perl Compatible Regular Expression Library- 8 bit runtime files + This is PCRE2, the new implementation of PCRE, a library of functions + to support regular expressions whose syntax and semantics are as + close as possible to those of the Perl 5 language. New projects + should use this library in preference to the older library, + confusingly called pcre3 in Debian. + . + This package contains the 8 bit runtime library, which operates on + ASCII and UTF-8 input. + +Package: libpcre2-8-0-udeb +Package-Type: udeb +Build-Profiles: +Section: debian-installer +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: New Perl Compatible Regular Expression Library- 8 bit runtime files (udeb) + This package contains the 8 bit runtime library, for the Debian Installer. + +Package: libpcre2-16-0 +Section: libs +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Description: New Perl Compatible Regular Expression Library - 16 bit runtime files + This is PCRE2, the new implementation of PCRE, a library of functions + to support regular expressions whose syntax and semantics are as + close as possible to those of the Perl 5 language. New projects + should use this library in preference to the older library, + confusingly called pcre3 in Debian. + . + This package contains the 16 bit runtime library, which operates on + UTF-16 input. + +Package: libpcre2-32-0 +Section: libs +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Description: New Perl Compatible Regular Expression Library - 32 bit runtime files + This is PCRE2, the new implementation of PCRE, a library of functions + to support regular expressions whose syntax and semantics are as + close as possible to those of the Perl 5 language. New projects + should use this library in preference to the older library, + confusingly called pcre3 in Debian. + . + This package contains the 32 bit runtime library, which operates on + UTF-32 input. + +Package: libpcre2-posix3 +Section: libs +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Replaces: libpcre2-posix0 +Conflicts: libpcre2-posix0 +Description: New Perl Compatible Regular Expression Library - posix-compatible runtime files + This is PCRE2, the new implementation of PCRE, a library of functions + to support regular expressions whose syntax and semantics are as + close as possible to those of the Perl 5 language. New projects + should use this library in preference to the older library, + confusingly called pcre3 in Debian. + . + This package contains the runtime library for the posix-compatible API. + +Package: libpcre2-dev +Section: libdevel +Architecture: any +Multi-Arch: same +Conflicts: libpcre3-dev (<<2:8.35-8) +Depends: libc-dev, libpcre2-8-0 (= ${binary:Version}), libpcre2-16-0 (= ${binary:Version}), libpcre2-32-0 (= ${binary:Version}), libpcre2-posix3 (= ${binary:Version}), ${misc:Depends} +Description: New Perl Compatible Regular Expression Library - development files + This is PCRE2, the new implementation of PCRE, a library of functions + to support regular expressions whose syntax and semantics are as + close as possible to those of the Perl 5 language. New projects + should use this library in preference to the older library, + confusingly called pcre3 in Debian. + . + Unlike the previous version of PCRE, there is no C++ library included + with PCRE2. Users who wish to avoid porting their C++ code to use the + PCRE2 C API might want to consider the libre2-dev package in Debian + which attempts to provide a similar C++ API to that provided by + libpcrecpp in previous versions of PCRE. + . + This package contains the development files, including headers, + static libraries, and documentation. + +Package: pcre2-utils +Section: utils +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: New Perl Compatible Regular Expression Library - utilities + This is PCRE2, the new implementation of PCRE, a library of functions + to support regular expressions whose syntax and semantics are as + close as possible to those of the Perl 5 language. New projects + should use this library in preference to the older library, + confusingly called pcre3 in Debian. + . + This package contains the utilities pcre2grep (like grep with PCRE) + and pcre2test (a test program for the library, but also useful for + experimenting with regular expressions). Both programs are also + useful examples of programming with libpcre2. --- pcre2-10.45.orig/debian/copyright +++ pcre2-10.45/debian/copyright @@ -0,0 +1,122 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Comment: This package was debianized by Matthew Vernon +Source: https://github.com/PCRE2Project/pcre2/releases + +Files: * +Copyright: 1997-2022 University of Cambridge. +License: BSD-3-clause-Cambridge with BINARY LIBRARY-LIKE PACKAGES exception + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + . + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + . + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + . + * Neither the name of the University of Cambridge nor the names of any + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + . + . + EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES + ------------------------------------------ + . + The second condition in the BSD licence (covering binary redistributions) does + not apply all the way down a chain of software. If binary package A includes + PCRE2, it must respect the condition, but if package B is software that + includes package A, the condition is not imposed on package B unless it uses + PCRE2 independently. + +Files: cmake/Find* +Copyright: KDE Community +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + . + 1. Redistributions of source code must retain the copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Files: install-sh +Copyright: (C) 1994 X Consortium +License: X11 + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- + TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + . + Except as contained in this notice, the name of the X Consortium shall not + be used in advertising or otherwise to promote the sale, use or other deal- + ings in this Software without prior written authorization from the X Consor- + tium. +Comment: FSF changes to this file are in the public domain. + +Files: deps/sljit/* +Copyright: Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. +License: BSD-2-clause + Redistribution and use in source and binary forms, with or without modification, are + permitted provided that the following conditions are met: + . + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + . + 2. Redistributions in binary form must reproduce the above copyright notice, this list + of conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Files: testdata/* +Copyright: None (public-domain) +License: public-domain + The data in the testdata directory is not copyrighted and is in the public domain. --- pcre2-10.45.orig/debian/gbp.conf +++ pcre2-10.45/debian/gbp.conf @@ -0,0 +1,8 @@ +[DEFAULT] +upstream-branch = upstream +debian-branch = master +upstream-tag = %(version)s + +sign-tags = False +pristine-tar = False +pristine-tar-commit = False --- pcre2-10.45.orig/debian/libpcre2-16-0.install +++ pcre2-10.45/debian/libpcre2-16-0.install @@ -0,0 +1 @@ +debian/tmp/usr/lib/*/libpcre2-16.so.* --- pcre2-10.45.orig/debian/libpcre2-16-0.symbols +++ pcre2-10.45/debian/libpcre2-16-0.symbols @@ -0,0 +1,79 @@ +libpcre2-16.so.0 libpcre2-16-0 #MINVER# + pcre2_callout_enumerate_16@Base 10.22 + pcre2_code_copy_16@Base 10.22 + pcre2_code_copy_with_tables_16@Base 10.32 + pcre2_code_free_16@Base 10.22 + pcre2_compile_16@Base 10.22 + pcre2_compile_context_copy_16@Base 10.22 + pcre2_compile_context_create_16@Base 10.22 + pcre2_compile_context_free_16@Base 10.22 + pcre2_config_16@Base 10.22 + pcre2_convert_context_copy_16@Base 10.32 + pcre2_convert_context_create_16@Base 10.32 + pcre2_convert_context_free_16@Base 10.32 + pcre2_converted_pattern_free_16@Base 10.32 + pcre2_dfa_match_16@Base 10.22 + pcre2_general_context_copy_16@Base 10.22 + pcre2_general_context_create_16@Base 10.22 + pcre2_general_context_free_16@Base 10.22 + pcre2_get_error_message_16@Base 10.22 + pcre2_get_mark_16@Base 10.22 + pcre2_get_match_data_heapframes_size_16@Base 10.43 + pcre2_get_match_data_size_16@Base 10.34 + pcre2_get_ovector_count_16@Base 10.22 + pcre2_get_ovector_pointer_16@Base 10.22 + pcre2_get_startchar_16@Base 10.22 + pcre2_jit_compile_16@Base 10.22 + pcre2_jit_free_unused_memory_16@Base 10.22 + pcre2_jit_match_16@Base 10.22 + pcre2_jit_stack_assign_16@Base 10.22 + pcre2_jit_stack_create_16@Base 10.22 + pcre2_jit_stack_free_16@Base 10.22 + pcre2_maketables_16@Base 10.22 + pcre2_maketables_free_16@Base 10.34 + pcre2_match_16@Base 10.22 + pcre2_match_context_copy_16@Base 10.22 + pcre2_match_context_create_16@Base 10.22 + pcre2_match_context_free_16@Base 10.22 + pcre2_match_data_create_16@Base 10.22 + pcre2_match_data_create_from_pattern_16@Base 10.22 + pcre2_match_data_free_16@Base 10.22 + pcre2_pattern_convert_16@Base 10.32 + pcre2_pattern_info_16@Base 10.22 + pcre2_serialize_decode_16@Base 10.22 + pcre2_serialize_encode_16@Base 10.22 + pcre2_serialize_free_16@Base 10.22 + pcre2_serialize_get_number_of_codes_16@Base 10.22 + pcre2_set_bsr_16@Base 10.22 + pcre2_set_callout_16@Base 10.22 + pcre2_set_character_tables_16@Base 10.22 + pcre2_set_compile_extra_options_16@Base 10.32 + pcre2_set_compile_recursion_guard_16@Base 10.22 + pcre2_set_depth_limit_16@Base 10.32 + pcre2_set_glob_escape_16@Base 10.32 + pcre2_set_glob_separator_16@Base 10.32 + pcre2_set_heap_limit_16@Base 10.32 + pcre2_set_match_limit_16@Base 10.22 + pcre2_set_max_pattern_compiled_length_16@Base 10.44 + pcre2_set_max_pattern_length_16@Base 10.22 + pcre2_set_max_varlookbehind_16@Base 10.43 + pcre2_set_newline_16@Base 10.22 + pcre2_set_offset_limit_16@Base 10.22 + pcre2_set_optimize_16@Base 10.45~rc1 + pcre2_set_parens_nest_limit_16@Base 10.22 + pcre2_set_recursion_limit_16@Base 10.22 + pcre2_set_recursion_memory_management_16@Base 10.22 + pcre2_set_substitute_callout_16@Base 10.34 + pcre2_set_substitute_case_callout_16@Base 10.45~rc1 + pcre2_substitute_16@Base 10.22 + pcre2_substring_copy_byname_16@Base 10.22 + pcre2_substring_copy_bynumber_16@Base 10.22 + pcre2_substring_free_16@Base 10.22 + pcre2_substring_get_byname_16@Base 10.22 + pcre2_substring_get_bynumber_16@Base 10.22 + pcre2_substring_length_byname_16@Base 10.22 + pcre2_substring_length_bynumber_16@Base 10.22 + pcre2_substring_list_free_16@Base 10.22 + pcre2_substring_list_get_16@Base 10.22 + pcre2_substring_nametable_scan_16@Base 10.22 + pcre2_substring_number_from_name_16@Base 10.22 --- pcre2-10.45.orig/debian/libpcre2-32-0.install +++ pcre2-10.45/debian/libpcre2-32-0.install @@ -0,0 +1 @@ +debian/tmp/usr/lib/*/libpcre2-32.so.* --- pcre2-10.45.orig/debian/libpcre2-32-0.symbols +++ pcre2-10.45/debian/libpcre2-32-0.symbols @@ -0,0 +1,79 @@ +libpcre2-32.so.0 libpcre2-32-0 #MINVER# + pcre2_callout_enumerate_32@Base 10.22 + pcre2_code_copy_32@Base 10.22 + pcre2_code_copy_with_tables_32@Base 10.32 + pcre2_code_free_32@Base 10.22 + pcre2_compile_32@Base 10.22 + pcre2_compile_context_copy_32@Base 10.22 + pcre2_compile_context_create_32@Base 10.22 + pcre2_compile_context_free_32@Base 10.22 + pcre2_config_32@Base 10.22 + pcre2_convert_context_copy_32@Base 10.32 + pcre2_convert_context_create_32@Base 10.32 + pcre2_convert_context_free_32@Base 10.32 + pcre2_converted_pattern_free_32@Base 10.32 + pcre2_dfa_match_32@Base 10.22 + pcre2_general_context_copy_32@Base 10.22 + pcre2_general_context_create_32@Base 10.22 + pcre2_general_context_free_32@Base 10.22 + pcre2_get_error_message_32@Base 10.22 + pcre2_get_mark_32@Base 10.22 + pcre2_get_match_data_heapframes_size_32@Base 10.43 + pcre2_get_match_data_size_32@Base 10.34 + pcre2_get_ovector_count_32@Base 10.22 + pcre2_get_ovector_pointer_32@Base 10.22 + pcre2_get_startchar_32@Base 10.22 + pcre2_jit_compile_32@Base 10.22 + pcre2_jit_free_unused_memory_32@Base 10.22 + pcre2_jit_match_32@Base 10.22 + pcre2_jit_stack_assign_32@Base 10.22 + pcre2_jit_stack_create_32@Base 10.22 + pcre2_jit_stack_free_32@Base 10.22 + pcre2_maketables_32@Base 10.22 + pcre2_maketables_free_32@Base 10.34 + pcre2_match_32@Base 10.22 + pcre2_match_context_copy_32@Base 10.22 + pcre2_match_context_create_32@Base 10.22 + pcre2_match_context_free_32@Base 10.22 + pcre2_match_data_create_32@Base 10.22 + pcre2_match_data_create_from_pattern_32@Base 10.22 + pcre2_match_data_free_32@Base 10.22 + pcre2_pattern_convert_32@Base 10.32 + pcre2_pattern_info_32@Base 10.22 + pcre2_serialize_decode_32@Base 10.22 + pcre2_serialize_encode_32@Base 10.22 + pcre2_serialize_free_32@Base 10.22 + pcre2_serialize_get_number_of_codes_32@Base 10.22 + pcre2_set_bsr_32@Base 10.22 + pcre2_set_callout_32@Base 10.22 + pcre2_set_character_tables_32@Base 10.22 + pcre2_set_compile_extra_options_32@Base 10.32 + pcre2_set_compile_recursion_guard_32@Base 10.22 + pcre2_set_depth_limit_32@Base 10.32 + pcre2_set_glob_escape_32@Base 10.32 + pcre2_set_glob_separator_32@Base 10.32 + pcre2_set_heap_limit_32@Base 10.32 + pcre2_set_match_limit_32@Base 10.22 + pcre2_set_max_pattern_compiled_length_32@Base 10.44 + pcre2_set_max_pattern_length_32@Base 10.22 + pcre2_set_max_varlookbehind_32@Base 10.43 + pcre2_set_newline_32@Base 10.22 + pcre2_set_offset_limit_32@Base 10.22 + pcre2_set_optimize_32@Base 10.45~rc1 + pcre2_set_parens_nest_limit_32@Base 10.22 + pcre2_set_recursion_limit_32@Base 10.22 + pcre2_set_recursion_memory_management_32@Base 10.22 + pcre2_set_substitute_callout_32@Base 10.34 + pcre2_set_substitute_case_callout_32@Base 10.45~rc1 + pcre2_substitute_32@Base 10.22 + pcre2_substring_copy_byname_32@Base 10.22 + pcre2_substring_copy_bynumber_32@Base 10.22 + pcre2_substring_free_32@Base 10.22 + pcre2_substring_get_byname_32@Base 10.22 + pcre2_substring_get_bynumber_32@Base 10.22 + pcre2_substring_length_byname_32@Base 10.22 + pcre2_substring_length_bynumber_32@Base 10.22 + pcre2_substring_list_free_32@Base 10.22 + pcre2_substring_list_get_32@Base 10.22 + pcre2_substring_nametable_scan_32@Base 10.22 + pcre2_substring_number_from_name_32@Base 10.22 --- pcre2-10.45.orig/debian/libpcre2-8-0-udeb.install +++ pcre2-10.45/debian/libpcre2-8-0-udeb.install @@ -0,0 +1 @@ +debian/tmp/usr/lib/*/libpcre2-8.so.* --- pcre2-10.45.orig/debian/libpcre2-8-0.install +++ pcre2-10.45/debian/libpcre2-8-0.install @@ -0,0 +1 @@ +debian/tmp/usr/lib/*/libpcre2-8.so.* --- pcre2-10.45.orig/debian/libpcre2-8-0.symbols +++ pcre2-10.45/debian/libpcre2-8-0.symbols @@ -0,0 +1,79 @@ +libpcre2-8.so.0 libpcre2-8-0 #MINVER# + pcre2_callout_enumerate_8@Base 10.22 + pcre2_code_copy_8@Base 10.22 + pcre2_code_copy_with_tables_8@Base 10.32 + pcre2_code_free_8@Base 10.22 + pcre2_compile_8@Base 10.22 + pcre2_compile_context_copy_8@Base 10.22 + pcre2_compile_context_create_8@Base 10.22 + pcre2_compile_context_free_8@Base 10.22 + pcre2_config_8@Base 10.22 + pcre2_convert_context_copy_8@Base 10.32 + pcre2_convert_context_create_8@Base 10.32 + pcre2_convert_context_free_8@Base 10.32 + pcre2_converted_pattern_free_8@Base 10.32 + pcre2_dfa_match_8@Base 10.22 + pcre2_general_context_copy_8@Base 10.22 + pcre2_general_context_create_8@Base 10.22 + pcre2_general_context_free_8@Base 10.22 + pcre2_get_error_message_8@Base 10.22 + pcre2_get_mark_8@Base 10.22 + pcre2_get_match_data_heapframes_size_8@Base 10.43 + pcre2_get_match_data_size_8@Base 10.34 + pcre2_get_ovector_count_8@Base 10.22 + pcre2_get_ovector_pointer_8@Base 10.22 + pcre2_get_startchar_8@Base 10.22 + pcre2_jit_compile_8@Base 10.22 + pcre2_jit_free_unused_memory_8@Base 10.22 + pcre2_jit_match_8@Base 10.22 + pcre2_jit_stack_assign_8@Base 10.22 + pcre2_jit_stack_create_8@Base 10.22 + pcre2_jit_stack_free_8@Base 10.22 + pcre2_maketables_8@Base 10.22 + pcre2_maketables_free_8@Base 10.34 + pcre2_match_8@Base 10.22 + pcre2_match_context_copy_8@Base 10.22 + pcre2_match_context_create_8@Base 10.22 + pcre2_match_context_free_8@Base 10.22 + pcre2_match_data_create_8@Base 10.22 + pcre2_match_data_create_from_pattern_8@Base 10.22 + pcre2_match_data_free_8@Base 10.22 + pcre2_pattern_convert_8@Base 10.32 + pcre2_pattern_info_8@Base 10.22 + pcre2_serialize_decode_8@Base 10.22 + pcre2_serialize_encode_8@Base 10.22 + pcre2_serialize_free_8@Base 10.22 + pcre2_serialize_get_number_of_codes_8@Base 10.22 + pcre2_set_bsr_8@Base 10.22 + pcre2_set_callout_8@Base 10.22 + pcre2_set_character_tables_8@Base 10.22 + pcre2_set_compile_extra_options_8@Base 10.32 + pcre2_set_compile_recursion_guard_8@Base 10.22 + pcre2_set_depth_limit_8@Base 10.32 + pcre2_set_glob_escape_8@Base 10.32 + pcre2_set_glob_separator_8@Base 10.32 + pcre2_set_heap_limit_8@Base 10.32 + pcre2_set_match_limit_8@Base 10.22 + pcre2_set_max_pattern_compiled_length_8@Base 10.44 + pcre2_set_max_pattern_length_8@Base 10.22 + pcre2_set_max_varlookbehind_8@Base 10.43 + pcre2_set_newline_8@Base 10.22 + pcre2_set_offset_limit_8@Base 10.22 + pcre2_set_optimize_8@Base 10.45~rc1 + pcre2_set_parens_nest_limit_8@Base 10.22 + pcre2_set_recursion_limit_8@Base 10.22 + pcre2_set_recursion_memory_management_8@Base 10.22 + pcre2_set_substitute_callout_8@Base 10.34 + pcre2_set_substitute_case_callout_8@Base 10.45~rc1 + pcre2_substitute_8@Base 10.22 + pcre2_substring_copy_byname_8@Base 10.22 + pcre2_substring_copy_bynumber_8@Base 10.22 + pcre2_substring_free_8@Base 10.22 + pcre2_substring_get_byname_8@Base 10.22 + pcre2_substring_get_bynumber_8@Base 10.22 + pcre2_substring_length_byname_8@Base 10.22 + pcre2_substring_length_bynumber_8@Base 10.22 + pcre2_substring_list_free_8@Base 10.22 + pcre2_substring_list_get_8@Base 10.22 + pcre2_substring_nametable_scan_8@Base 10.22 + pcre2_substring_number_from_name_8@Base 10.22 --- pcre2-10.45.orig/debian/libpcre2-dev.install +++ pcre2-10.45/debian/libpcre2-dev.install @@ -0,0 +1,6 @@ +debian/tmp/usr/include/* +debian/tmp/usr/lib/*/lib*.a +debian/tmp/usr/lib/*/lib*.so +debian/tmp/usr/lib/*/pkgconfig/* +debian/tmp/usr/bin/*-config +debian/tmp/usr/share/man/man3/* --- pcre2-10.45.orig/debian/libpcre2-dev.manpages +++ pcre2-10.45/debian/libpcre2-dev.manpages @@ -0,0 +1 @@ +debian/tmp/usr/share/man/man1/pcre2-config.1 --- pcre2-10.45.orig/debian/libpcre2-posix3.install +++ pcre2-10.45/debian/libpcre2-posix3.install @@ -0,0 +1 @@ +debian/tmp/usr/lib/*/libpcre2-posix.so.* --- pcre2-10.45.orig/debian/libpcre2-posix3.symbols +++ pcre2-10.45/debian/libpcre2-posix3.symbols @@ -0,0 +1,9 @@ +libpcre2-posix.so.3 libpcre2-posix3 #MINVER# + pcre2_regcomp@Base 10.34 + pcre2_regerror@Base 10.34 + pcre2_regexec@Base 10.34 + pcre2_regfree@Base 10.34 +#MISSING: 10.39-1# regcomp@Base 10.34 +#MISSING: 10.39-1# regerror@Base 10.34 +#MISSING: 10.39-1# regexec@Base 10.34 +#MISSING: 10.39-1# regfree@Base 10.34 --- pcre2-10.45.orig/debian/patches/CVE-2025-58050.patch +++ pcre2-10.45/debian/patches/CVE-2025-58050.patch @@ -0,0 +1,65 @@ +From a141712e5967d448c7ce13090ab530c8e3d82254 Mon Sep 17 00:00:00 2001 +From: Zoltan Herczeg +Date: Wed, 13 Aug 2025 07:17:30 +0000 +Subject: [PATCH] Restore buffer after an ACCEPT inside an scan substring block + +--- + src/pcre2_match.c | 3 +++ + testdata/testinput2 | 6 ++++++ + testdata/testoutput2 | 12 ++++++++++++ + 3 files changed, 21 insertions(+) + +diff --git a/src/pcre2_match.c b/src/pcre2_match.c +index 5adc03480..34a92eaa3 100644 +--- a/src/pcre2_match.c ++++ b/src/pcre2_match.c +@@ -5824,6 +5824,9 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top = assert_accept_frame->offset_top; + Fmark = assert_accept_frame->mark; ++ mb->end_subject = Lsaved_end_subject; ++ mb->true_end_subject = mb->end_subject + Ltrue_end_extra; ++ mb->moptions = Lsaved_moptions; + break; + } + +diff --git a/testdata/testinput2 b/testdata/testinput2 +index b162c539e..aa9ff30a8 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -6672,6 +6672,12 @@ a)"xI + abxyz + efgxyz + ++/(a)(*scs:(1)a(*ACCEPT))bbb/ ++ abbb ++ ++/(a)(b+)(*scs:(1)a(*ACCEPT))(\2)/ ++ abbb ++ + # Tests for pcre2_set_optimize() + + /abc/I,optimization_none +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index de4752e2b..4fe19d902 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -19664,6 +19664,18 @@ No match + 2: + 3: + ++/(a)(*scs:(1)a(*ACCEPT))bbb/ ++ abbb ++ 0: abbb ++ 1: a ++ ++/(a)(b+)(*scs:(1)a(*ACCEPT))(\2)/ ++ abbb ++ 0: abb ++ 1: a ++ 2: b ++ 3: b ++ + # Tests for pcre2_set_optimize() + + /abc/I,optimization_none --- pcre2-10.45.orig/debian/patches/series +++ pcre2-10.45/debian/patches/series @@ -0,0 +1 @@ +CVE-2025-58050.patch --- pcre2-10.45.orig/debian/pcre2-utils.install +++ pcre2-10.45/debian/pcre2-utils.install @@ -0,0 +1,4 @@ +debian/tmp/usr/bin/pcre2test +debian/tmp/usr/bin/pcre2grep +debian/tmp/usr/share/man/man1/pcre2test.1 +debian/tmp/usr/share/man/man1/pcre2grep.1 --- pcre2-10.45.orig/debian/rules +++ pcre2-10.45/debian/rules @@ -0,0 +1,32 @@ +#!/usr/bin/make -f +# See debhelper(7) +# output every command that modifies files on the build system. +#DH_VERBOSE = 1 + +# see dpkg-buildflags(1) +DPKG_EXPORT_BUILDFLAGS = 1 +include /usr/share/dpkg/default.mk +# prefix names of functions in libpcreposix with PCRE2 to avoid clash with +# ones in libc. +export DEB_CFLAGS_MAINT_APPEND = -Dregcomp=PCRE2regcomp -Dregexec=PCRE2regexec -Dregerror=PCRE2regerror -Dregfree=PCRE2regfree + + +deb_maint_conf_args = --enable-pcre2-16 --enable-pcre2-32 --disable-pcre2grep-callout +#enable JIT only on architectures that support it (see pcre2jit.3) +ifneq ($(filter amd64 armhf arm64 i386 loong64 mips mipsel mips64el powerpc ppc64 ppc64el riscv64 s390x, $(DEB_HOST_ARCH)),) +deb_maint_conf_args +=--enable-jit +else +deb_maint_conf_args +=--disable-jit +endif + +%: + dh $@ --parallel --with autoreconf + +override_dh_auto_configure: + dh_auto_configure -- $(deb_maint_conf_args) + +override_dh_makeshlibs: + dh_makeshlibs -V --add-udeb=libpcre2-8-0-udeb + +override_dh_strip: + dh_strip --dbgsym-migration='libpcre2-dbg (<< 10.34-2~)' --- pcre2-10.45.orig/debian/source/format +++ pcre2-10.45/debian/source/format @@ -0,0 +1 @@ +1.0 --- pcre2-10.45.orig/debian/watch +++ pcre2-10.45/debian/watch @@ -0,0 +1,2 @@ +version=4 + https://github.com/PCRE2Project/pcre2/releases .*/?(\d{2}\.\d{2})\.tar\.gz --- pcre2-10.45.orig/pcre2-config.in +++ pcre2-10.45/pcre2-config.in @@ -28,19 +28,8 @@ fi libR= -case `uname -s` in - *SunOS*) - libR=" -R@libdir@" - ;; - *BSD*) - libR=" -Wl,-R@libdir@" - ;; -esac libS= -if test @libdir@ != /usr/lib ; then - libS=-L@libdir@ -fi while test $# -gt 0; do case "$1" in --- pcre2-10.45.orig/src/pcre2_match.c +++ pcre2-10.45/src/pcre2_match.c @@ -5824,6 +5824,9 @@ assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); Foffset_top = assert_accept_frame->offset_top; Fmark = assert_accept_frame->mark; + mb->end_subject = Lsaved_end_subject; + mb->true_end_subject = mb->end_subject + Ltrue_end_extra; + mb->moptions = Lsaved_moptions; break; } --- pcre2-10.45.orig/testdata/testinput2 +++ pcre2-10.45/testdata/testinput2 @@ -6672,6 +6672,12 @@ abxyz efgxyz +/(a)(*scs:(1)a(*ACCEPT))bbb/ + abbb + +/(a)(b+)(*scs:(1)a(*ACCEPT))(\2)/ + abbb + # Tests for pcre2_set_optimize() /abc/I,optimization_none --- pcre2-10.45.orig/testdata/testoutput2 +++ pcre2-10.45/testdata/testoutput2 @@ -19664,6 +19664,18 @@ 2: 3: +/(a)(*scs:(1)a(*ACCEPT))bbb/ + abbb + 0: abbb + 1: a + +/(a)(b+)(*scs:(1)a(*ACCEPT))(\2)/ + abbb + 0: abb + 1: a + 2: b + 3: b + # Tests for pcre2_set_optimize() /abc/I,optimization_none