     1			/*    regcomp.c
     2			 */
     3			
     4			/*
     5			 * "A fair jaw-cracker dwarf-language must be."  --Samwise Gamgee
     6			 */
     7			
     8			/* This file contains functions for compiling a regular expression.  See
     9			 * also regexec.c which funnily enough, contains functions for executing
    10			 * a regular expression.
    11			 *
    12			 * This file is also copied at build time to ext/re/re_comp.c, where
    13			 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
    14			 * This causes the main functions to be compiled under new names and with
    15			 * debugging support added, which makes "use re 'debug'" work.
    16			 */
    17			
    18			/* NOTE: this is derived from Henry Spencer's regexp code, and should not
    19			 * confused with the original package (see point 3 below).  Thanks, Henry!
    20			 */
    21			
    22			/* Additional note: this code is very heavily munged from Henry's version
    23			 * in places.  In some spots I've traded clarity for efficiency, so don't
    24			 * blame Henry for some of the lack of readability.
    25			 */
    26			
    27			/* The names of the functions have been changed from regcomp and
    28			 * regexec to  pregcomp and pregexec in order to avoid conflicts
    29			 * with the POSIX routines of the same names.
    30			*/
    31			
    32			#ifdef PERL_EXT_RE_BUILD
    33			/* need to replace pregcomp et al, so enable that */
    34			#  ifndef PERL_IN_XSUB_RE
    35			#    define PERL_IN_XSUB_RE
    36			#  endif
    37			/* need access to debugger hooks */
    38			#  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
    39			#    define DEBUGGING
    40			#  endif
    41			#endif
    42			
    43			#ifdef PERL_IN_XSUB_RE
    44			/* We *really* need to overwrite these symbols: */
    45			#  define Perl_pregcomp my_regcomp
    46			#  define Perl_regdump my_regdump
    47			#  define Perl_regprop my_regprop
    48			#  define Perl_pregfree my_regfree
    49			#  define Perl_re_intuit_string my_re_intuit_string
    50			/* *These* symbols are masked to allow static link. */
    51			#  define Perl_regnext my_regnext
    52			#  define Perl_save_re_context my_save_re_context
    53			#  define Perl_reginitcolors my_reginitcolors
    54			
    55			#  define PERL_NO_GET_CONTEXT
    56			#endif
    57			
    58			/*
    59			 * pregcomp and pregexec -- regsub and regerror are not used in perl
    60			 *
    61			 *	Copyright (c) 1986 by University of Toronto.
    62			 *	Written by Henry Spencer.  Not derived from licensed software.
    63			 *
    64			 *	Permission is granted to anyone to use this software for any
    65			 *	purpose on any computer system, and to redistribute it freely,
    66			 *	subject to the following restrictions:
    67			 *
    68			 *	1. The author is not responsible for the consequences of use of
    69			 *		this software, no matter how awful, even if they arise
    70			 *		from defects in it.
    71			 *
    72			 *	2. The origin of this software must not be misrepresented, either
    73			 *		by explicit claim or by omission.
    74			 *
    75			 *	3. Altered versions must be plainly marked as such, and must not
    76			 *		be misrepresented as being the original software.
    77			 *
    78			 *
    79			 ****    Alterations to Henry's code are...
    80			 ****
    81			 ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
    82			 ****    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
    83			 ****
    84			 ****    You may distribute under the terms of either the GNU General Public
    85			 ****    License or the Artistic License, as specified in the README file.
    86			
    87			 *
    88			 * Beware that some of this code is subtly aware of the way operator
    89			 * precedence is structured in regular expressions.  Serious changes in
    90			 * regular-expression syntax might require a total rethink.
    91			 */
    92			#include "EXTERN.h"
    93			#define PERL_IN_REGCOMP_C
    94			#include "perl.h"
    95			
    96			#ifndef PERL_IN_XSUB_RE
    97			#  include "INTERN.h"
    98			#endif
    99			
   100			#define REG_COMP_C
   101			#include "regcomp.h"
   102			
   103			#ifdef op
   104			#undef op
   105			#endif /* op */
   106			
   107			#ifdef MSDOS
   108			#  if defined(BUGGY_MSC6)
   109			 /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
   110			#    pragma optimize("a",off)
   111			 /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
   112			#    pragma optimize("w",on )
   113			#  endif /* BUGGY_MSC6 */
   114			#endif /* MSDOS */
   115			
   116			#ifndef STATIC
   117			#define	STATIC	static
   118			#endif
   119			
   120			typedef struct RExC_state_t {
   121			    U32		flags;			/* are we folding, multilining? */
   122			    char	*precomp;		/* uncompiled string. */
   123			    regexp	*rx;
   124			    char	*start;			/* Start of input for compile */
   125			    char	*end;			/* End of input for compile */
   126			    char	*parse;			/* Input-scan pointer. */
   127			    I32		whilem_seen;		/* number of WHILEM in this expr */
   128			    regnode	*emit_start;		/* Start of emitted-code area */
   129			    regnode	*emit;			/* Code-emit pointer; &regdummy = don't = compiling */
   130			    I32		naughty;		/* How bad is this pattern? */
   131			    I32		sawback;		/* Did we see \1, ...? */
   132			    U32		seen;
   133			    I32		size;			/* Code size. */
   134			    I32		npar;			/* () count. */
   135			    I32		extralen;
   136			    I32		seen_zerolen;
   137			    I32		seen_evals;
   138			    I32		utf8;
   139			#if ADD_TO_REGEXEC
   140			    char 	*starttry;		/* -Dr: where regtry was called. */
   141			#define RExC_starttry	(pRExC_state->starttry)
   142			#endif
   143			} RExC_state_t;
   144			
   145			#define RExC_flags	(pRExC_state->flags)
   146			#define RExC_precomp	(pRExC_state->precomp)
   147			#define RExC_rx		(pRExC_state->rx)
   148			#define RExC_start	(pRExC_state->start)
   149			#define RExC_end	(pRExC_state->end)
   150			#define RExC_parse	(pRExC_state->parse)
   151			#define RExC_whilem_seen	(pRExC_state->whilem_seen)
   152			#define RExC_offsets	(pRExC_state->rx->offsets) /* I am not like the others */
   153			#define RExC_emit	(pRExC_state->emit)
   154			#define RExC_emit_start	(pRExC_state->emit_start)
   155			#define RExC_naughty	(pRExC_state->naughty)
   156			#define RExC_sawback	(pRExC_state->sawback)
   157			#define RExC_seen	(pRExC_state->seen)
   158			#define RExC_size	(pRExC_state->size)
   159			#define RExC_npar	(pRExC_state->npar)
   160			#define RExC_extralen	(pRExC_state->extralen)
   161			#define RExC_seen_zerolen	(pRExC_state->seen_zerolen)
   162			#define RExC_seen_evals	(pRExC_state->seen_evals)
   163			#define RExC_utf8	(pRExC_state->utf8)
   164			
   165			#define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
   166			#define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \
   167				((*s) == '{' && regcurly(s)))
   168			
   169			#ifdef SPSTART
   170			#undef SPSTART		/* dratted cpp namespace... */
   171			#endif
   172			/*
   173			 * Flags to be passed up and down.
   174			 */
   175			#define	WORST		0	/* Worst case. */
   176			#define	HASWIDTH	0x1	/* Known to match non-null strings. */
   177			#define	SIMPLE		0x2	/* Simple enough to be STAR/PLUS operand. */
   178			#define	SPSTART		0x4	/* Starts with * or +. */
   179			#define TRYAGAIN	0x8	/* Weeded out a declaration. */
   180			
   181			/* Length of a variant. */
   182			
   183			typedef struct scan_data_t {
   184			    I32 len_min;
   185			    I32 len_delta;
   186			    I32 pos_min;
   187			    I32 pos_delta;
   188			    SV *last_found;
   189			    I32 last_end;			/* min value, <0 unless valid. */
   190			    I32 last_start_min;
   191			    I32 last_start_max;
   192			    SV **longest;			/* Either &l_fixed, or &l_float. */
   193			    SV *longest_fixed;
   194			    I32 offset_fixed;
   195			    SV *longest_float;
   196			    I32 offset_float_min;
   197			    I32 offset_float_max;
   198			    I32 flags;
   199			    I32 whilem_c;
   200			    I32 *last_closep;
   201			    struct regnode_charclass_class *start_class;
   202			} scan_data_t;
   203			
   204			/*
   205			 * Forward declarations for pregcomp()'s friends.
   206			 */
   207			
   208			static const scan_data_t zero_scan_data =
   209			  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   210			
   211			#define SF_BEFORE_EOL		(SF_BEFORE_SEOL|SF_BEFORE_MEOL)
   212			#define SF_BEFORE_SEOL		0x1
   213			#define SF_BEFORE_MEOL		0x2
   214			#define SF_FIX_BEFORE_EOL	(SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
   215			#define SF_FL_BEFORE_EOL	(SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
   216			
   217			#ifdef NO_UNARY_PLUS
   218			#  define SF_FIX_SHIFT_EOL	(0+2)
   219			#  define SF_FL_SHIFT_EOL		(0+4)
   220			#else
   221			#  define SF_FIX_SHIFT_EOL	(+2)
   222			#  define SF_FL_SHIFT_EOL		(+4)
   223			#endif
   224			
   225			#define SF_FIX_BEFORE_SEOL	(SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
   226			#define SF_FIX_BEFORE_MEOL	(SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
   227			
   228			#define SF_FL_BEFORE_SEOL	(SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
   229			#define SF_FL_BEFORE_MEOL	(SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
   230			#define SF_IS_INF		0x40
   231			#define SF_HAS_PAR		0x80
   232			#define SF_IN_PAR		0x100
   233			#define SF_HAS_EVAL		0x200
   234			#define SCF_DO_SUBSTR		0x400
   235			#define SCF_DO_STCLASS_AND	0x0800
   236			#define SCF_DO_STCLASS_OR	0x1000
   237			#define SCF_DO_STCLASS		(SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
   238			#define SCF_WHILEM_VISITED_POS	0x2000
   239			
   240			#define UTF (RExC_utf8 != 0)
   241			#define LOC ((RExC_flags & PMf_LOCALE) != 0)
   242			#define FOLD ((RExC_flags & PMf_FOLD) != 0)
   243			
   244			#define OOB_UNICODE		12345678
   245			#define OOB_NAMEDCLASS		-1
   246			
   247			#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
   248			#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
   249			
   250			
   251			/* length of regex to show in messages that don't mark a position within */
   252			#define RegexLengthToShowInErrorMessages 127
   253			
   254			/*
   255			 * If MARKER[12] are adjusted, be sure to adjust the constants at the top
   256			 * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
   257			 * op/pragma/warn/regcomp.
   258			 */
   259			#define MARKER1 "<-- HERE"    /* marker as it appears in the description */
   260			#define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
   261			
   262			#define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/"
   263			
   264			/*
   265			 * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
   266			 * arg. Show regex, up to a maximum length. If it's too long, chop and add
   267			 * "...".
   268			 */
   269			#define	FAIL(msg) STMT_START {						\
   270			    const char *ellipses = "";						\
   271			    IV len = RExC_end - RExC_precomp;					\
   272												\
   273			    if (!SIZE_ONLY)							\
   274				SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);			\
   275			    if (len > RegexLengthToShowInErrorMessages) {			\
   276				/* chop 10 shorter than the max, to ensure meaning of "..." */	\
   277				len = RegexLengthToShowInErrorMessages - 10;			\
   278				ellipses = "...";						\
   279			    }									\
   280			    Perl_croak(aTHX_ "%s in regex m/%.*s%s/",				\
   281				    msg, (int)len, RExC_precomp, ellipses);			\
   282			} STMT_END
   283			
   284			/*
   285			 * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
   286			 * args. Show regex, up to a maximum length. If it's too long, chop and add
   287			 * "...".
   288			 */
   289			#define	FAIL2(pat,msg) STMT_START {					\
   290			    const char *ellipses = "";						\
   291			    IV len = RExC_end - RExC_precomp;					\
   292												\
   293			    if (!SIZE_ONLY)							\
   294				SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);			\
   295			    if (len > RegexLengthToShowInErrorMessages) {			\
   296				/* chop 10 shorter than the max, to ensure meaning of "..." */	\
   297				len = RegexLengthToShowInErrorMessages - 10;			\
   298				ellipses = "...";						\
   299			    }									\
   300			    S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/",			\
   301				    msg, (int)len, RExC_precomp, ellipses);			\
   302			} STMT_END
   303			
   304			
   305			/*
   306			 * Simple_vFAIL -- like FAIL, but marks the current location in the scan
   307			 */
   308			#define	Simple_vFAIL(m) STMT_START {					\
   309			    const IV offset = RExC_parse - RExC_precomp;			\
   310			    Perl_croak(aTHX_ "%s" REPORT_LOCATION,				\
   311				    m, (int)offset, RExC_precomp, RExC_precomp + offset);	\
   312			} STMT_END
   313			
   314			/*
   315			 * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
   316			 */
   317			#define	vFAIL(m) STMT_START {				\
   318			    if (!SIZE_ONLY)					\
   319				SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);	\
   320			    Simple_vFAIL(m);					\
   321			} STMT_END
   322			
   323			/*
   324			 * Like Simple_vFAIL(), but accepts two arguments.
   325			 */
   326			#define	Simple_vFAIL2(m,a1) STMT_START {			\
   327			    const IV offset = RExC_parse - RExC_precomp;			\
   328			    S_re_croak2(aTHX_ m, REPORT_LOCATION, a1,			\
   329				    (int)offset, RExC_precomp, RExC_precomp + offset);	\
   330			} STMT_END
   331			
   332			/*
   333			 * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
   334			 */
   335			#define	vFAIL2(m,a1) STMT_START {			\
   336			    if (!SIZE_ONLY)					\
   337				SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);	\
   338			    Simple_vFAIL2(m, a1);				\
   339			} STMT_END
   340			
   341			
   342			/*
   343			 * Like Simple_vFAIL(), but accepts three arguments.
   344			 */
   345			#define	Simple_vFAIL3(m, a1, a2) STMT_START {			\
   346			    const IV offset = RExC_parse - RExC_precomp;		\
   347			    S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2,		\
   348				    (int)offset, RExC_precomp, RExC_precomp + offset);	\
   349			} STMT_END
   350			
   351			/*
   352			 * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
   353			 */
   354			#define	vFAIL3(m,a1,a2) STMT_START {			\
   355			    if (!SIZE_ONLY)					\
   356				SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);	\
   357			    Simple_vFAIL3(m, a1, a2);				\
   358			} STMT_END
   359			
   360			/*
   361			 * Like Simple_vFAIL(), but accepts four arguments.
   362			 */
   363			#define	Simple_vFAIL4(m, a1, a2, a3) STMT_START {		\
   364			    const IV offset = RExC_parse - RExC_precomp;		\
   365			    S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,		\
   366				    (int)offset, RExC_precomp, RExC_precomp + offset);	\
   367			} STMT_END
   368			
   369			#define	vWARN(loc,m) STMT_START {					\
   370			    const IV offset = loc - RExC_precomp;				\
   371			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,	\
   372				    m, (int)offset, RExC_precomp, RExC_precomp + offset);	\
   373			} STMT_END
   374			
   375			#define	vWARNdep(loc,m) STMT_START {					\
   376			    const IV offset = loc - RExC_precomp;				\
   377			    Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),		\
   378				    "%s" REPORT_LOCATION,					\
   379				    m, (int)offset, RExC_precomp, RExC_precomp + offset);	\
   380			} STMT_END
   381			
   382			
   383			#define	vWARN2(loc, m, a1) STMT_START {					\
   384			    const IV offset = loc - RExC_precomp;				\
   385			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
   386				    a1, (int)offset, RExC_precomp, RExC_precomp + offset);	\
   387			} STMT_END
   388			
   389			#define	vWARN3(loc, m, a1, a2) STMT_START {				\
   390			    const IV offset = loc - RExC_precomp;				\
   391			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
   392				    a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset);	\
   393			} STMT_END
   394			
   395			#define	vWARN4(loc, m, a1, a2, a3) STMT_START {				\
   396			    const IV offset = loc - RExC_precomp;				\
   397			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
   398				    a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
   399			} STMT_END
   400			
   401			#define	vWARN5(loc, m, a1, a2, a3, a4) STMT_START {			\
   402			    const IV offset = loc - RExC_precomp;				\
   403			    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
   404				    a1, a2, a3, a4, (int)offset, RExC_precomp, RExC_precomp + offset); \
   405			} STMT_END
   406			
   407			
   408			/* Allow for side effects in s */
   409			#define REGC(c,s) STMT_START {			\
   410			    if (!SIZE_ONLY) *(s) = (c); else (void)(s);	\
   411			} STMT_END
   412			
   413			/* Macros for recording node offsets.   20001227 mjd@plover.com 
   414			 * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
   415			 * element 2*n-1 of the array.  Element #2n holds the byte length node #n.
   416			 * Element 0 holds the number n.
   417			 */
   418			
   419			#define MJD_OFFSET_DEBUG(x)
   420			/* #define MJD_OFFSET_DEBUG(x) DEBUG_r(Perl_warn_nocontext x) */
   421			
   422			
   423			#define Set_Node_Offset_To_R(node,byte) STMT_START {			\
   424			    if (! SIZE_ONLY) {							\
   425				MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n",		\
   426					__LINE__, (node), (byte)));				\
   427				if((node) < 0) {						\
   428				    Perl_croak(aTHX_ "value of node is %d in Offset macro", node); \
   429				} else {							\
   430				    RExC_offsets[2*(node)-1] = (byte);				\
   431				}								\
   432			    }									\
   433			} STMT_END
   434			
   435			#define Set_Node_Offset(node,byte) \
   436			    Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
   437			#define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
   438			
   439			#define Set_Node_Length_To_R(node,len) STMT_START {			\
   440			    if (! SIZE_ONLY) {							\
   441				MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n",		\
   442					__LINE__, (node), (len)));				\
   443				if((node) < 0) {						\
   444				    Perl_croak(aTHX_ "value of node is %d in Length macro", node); \
   445				} else {							\
   446				    RExC_offsets[2*(node)] = (len);				\
   447				}								\
   448			    }									\
   449			} STMT_END
   450			
   451			#define Set_Node_Length(node,len) \
   452			    Set_Node_Length_To_R((node)-RExC_emit_start, len)
   453			#define Set_Cur_Node_Length(len) Set_Node_Length(RExC_emit, len)
   454			#define Set_Node_Cur_Length(node) \
   455			    Set_Node_Length(node, RExC_parse - parse_start)
   456			
   457			/* Get offsets and lengths */
   458			#define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
   459			#define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
   460			
   461			static void clear_re(pTHX_ void *r);
   462			
   463			/* Mark that we cannot extend a found fixed substring at this point.
   464			   Updata the longest found anchored substring and the longest found
   465			   floating substrings if needed. */
   466			
   467			STATIC void
   468			S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
   469	      ######    {
   470	      ######        const STRLEN l = CHR_SVLEN(data->last_found);
   471	      ######        const STRLEN old_l = CHR_SVLEN(*data->longest);
   472			
   473	      ######        if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
   474	      ######    	SvSetMagicSV(*data->longest, data->last_found);
   475	      ######    	if (*data->longest == data->longest_fixed) {
   476	      ######    	    data->offset_fixed = l ? data->last_start_min : data->pos_min;
   477	      ######    	    if (data->flags & SF_BEFORE_EOL)
   478	      ######    		data->flags
   479					    |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
   480				    else
   481	      ######    		data->flags &= ~SF_FIX_BEFORE_EOL;
   482				}
   483				else {
   484	      ######    	    data->offset_float_min = l ? data->last_start_min : data->pos_min;
   485	      ######    	    data->offset_float_max = (l
   486							      ? data->last_start_max
   487							      : data->pos_min + data->pos_delta);
   488	      ######    	    if ((U32)data->offset_float_max > (U32)I32_MAX)
   489	      ######    		data->offset_float_max = I32_MAX;
   490	      ######    	    if (data->flags & SF_BEFORE_EOL)
   491	      ######    		data->flags
   492					    |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
   493				    else
   494	      ######    		data->flags &= ~SF_FL_BEFORE_EOL;
   495				}
   496			    }
   497	      ######        SvCUR_set(data->last_found, 0);
   498			    {
   499	      ######    	SV * const sv = data->last_found;
   500	      ######    	MAGIC * const mg =
   501	      ######    	    SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
   502	      ######    	if (mg && mg->mg_len > 0)
   503	      ######    	    mg->mg_len = 0;
   504			    }
   505	      ######        data->last_end = -1;
   506	      ######        data->flags &= ~SF_BEFORE_EOL;
   507			}
   508			
   509			/* Can match anything (initialization) */
   510			STATIC void
   511			S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
   512	      ######    {
   513	      ######        ANYOF_CLASS_ZERO(cl);
   514	      ######        ANYOF_BITMAP_SETALL(cl);
   515	      ######        cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
   516	      ######        if (LOC)
   517	      ######    	cl->flags |= ANYOF_LOCALE;
   518			}
   519			
   520			/* Can match anything (initialization) */
   521			STATIC int
   522			S_cl_is_anything(pTHX_ const struct regnode_charclass_class *cl)
   523	      ######    {
   524	      ######        int value;
   525			
   526	      ######        for (value = 0; value <= ANYOF_MAX; value += 2)
   527	      ######    	if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
   528	      ######    	    return 1;
   529	      ######        if (!(cl->flags & ANYOF_UNICODE_ALL))
   530	      ######    	return 0;
   531	      ######        if (!ANYOF_BITMAP_TESTALLSET(cl))
   532	      ######    	return 0;
   533	      ######        return 1;
   534			}
   535			
   536			/* Can match anything (initialization) */
   537			STATIC void
   538			S_cl_init(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
   539	      ######    {
   540	      ######        Zero(cl, 1, struct regnode_charclass_class);
   541	      ######        cl->type = ANYOF;
   542	      ######        cl_anything(pRExC_state, cl);
   543			}
   544			
   545			STATIC void
   546			S_cl_init_zero(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
   547	      ######    {
   548	      ######        Zero(cl, 1, struct regnode_charclass_class);
   549	      ######        cl->type = ANYOF;
   550	      ######        cl_anything(pRExC_state, cl);
   551	      ######        if (LOC)
   552	      ######    	cl->flags |= ANYOF_LOCALE;
   553			}
   554			
   555			/* 'And' a given class with another one.  Can create false positives */
   556			/* We assume that cl is not inverted */
   557			STATIC void
   558			S_cl_and(pTHX_ struct regnode_charclass_class *cl,
   559				const struct regnode_charclass_class *and_with)
   560	      ######    {
   561	      ######        if (!(and_with->flags & ANYOF_CLASS)
   562				&& !(cl->flags & ANYOF_CLASS)
   563				&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
   564				&& !(and_with->flags & ANYOF_FOLD)
   565				&& !(cl->flags & ANYOF_FOLD)) {
   566	      ######    	int i;
   567			
   568	      ######    	if (and_with->flags & ANYOF_INVERT)
   569	      ######    	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
   570	      ######    		cl->bitmap[i] &= ~and_with->bitmap[i];
   571				else
   572	      ######    	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
   573	      ######    		cl->bitmap[i] &= and_with->bitmap[i];
   574			    } /* XXXX: logic is complicated otherwise, leave it along for a moment. */
   575	      ######        if (!(and_with->flags & ANYOF_EOS))
   576	      ######    	cl->flags &= ~ANYOF_EOS;
   577			
   578	      ######        if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
   579				!(and_with->flags & ANYOF_INVERT)) {
   580	      ######    	cl->flags &= ~ANYOF_UNICODE_ALL;
   581	      ######    	cl->flags |= ANYOF_UNICODE;
   582	      ######    	ARG_SET(cl, ARG(and_with));
   583			    }
   584	      ######        if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
   585				!(and_with->flags & ANYOF_INVERT))
   586	      ######    	cl->flags &= ~ANYOF_UNICODE_ALL;
   587	      ######        if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) &&
   588				!(and_with->flags & ANYOF_INVERT))
   589	      ######    	cl->flags &= ~ANYOF_UNICODE;
   590			}
   591			
   592			/* 'OR' a given class with another one.  Can create false positives */
   593			/* We assume that cl is not inverted */
   594			STATIC void
   595			S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
   596	      ######    {
   597	      ######        if (or_with->flags & ANYOF_INVERT) {
   598				/* We do not use
   599				 * (B1 | CL1) | (!B2 & !CL2) = (B1 | !B2 & !CL2) | (CL1 | (!B2 & !CL2))
   600				 *   <= (B1 | !B2) | (CL1 | !CL2)
   601				 * which is wasteful if CL2 is small, but we ignore CL2:
   602				 *   (B1 | CL1) | (!B2 & !CL2) <= (B1 | CL1) | !B2 = (B1 | !B2) | CL1
   603				 * XXXX Can we handle case-fold?  Unclear:
   604				 *   (OK1(i) | OK1(i')) | !(OK1(i) | OK1(i')) =
   605				 *   (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
   606				 */
   607	      ######    	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
   608				     && !(or_with->flags & ANYOF_FOLD)
   609				     && !(cl->flags & ANYOF_FOLD) ) {
   610	      ######    	    int i;
   611			
   612	      ######    	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
   613	      ######    		cl->bitmap[i] |= ~or_with->bitmap[i];
   614				} /* XXXX: logic is complicated otherwise */
   615				else {
   616	      ######    	    cl_anything(pRExC_state, cl);
   617				}
   618			    } else {
   619				/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
   620	      ######    	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
   621				     && (!(or_with->flags & ANYOF_FOLD)
   622					 || (cl->flags & ANYOF_FOLD)) ) {
   623	      ######    	    int i;
   624			
   625				    /* OR char bitmap and class bitmap separately */
   626	      ######    	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
   627	      ######    		cl->bitmap[i] |= or_with->bitmap[i];
   628	      ######    	    if (or_with->flags & ANYOF_CLASS) {
   629	      ######    		for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
   630	      ######    		    cl->classflags[i] |= or_with->classflags[i];
   631	      ######    		cl->flags |= ANYOF_CLASS;
   632				    }
   633				}
   634				else { /* XXXX: logic is complicated, leave it along for a moment. */
   635	      ######    	    cl_anything(pRExC_state, cl);
   636				}
   637			    }
   638	      ######        if (or_with->flags & ANYOF_EOS)
   639	      ######    	cl->flags |= ANYOF_EOS;
   640			
   641	      ######        if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
   642				ARG(cl) != ARG(or_with)) {
   643	      ######    	cl->flags |= ANYOF_UNICODE_ALL;
   644	      ######    	cl->flags &= ~ANYOF_UNICODE;
   645			    }
   646	      ######        if (or_with->flags & ANYOF_UNICODE_ALL) {
   647	      ######    	cl->flags |= ANYOF_UNICODE_ALL;
   648	      ######    	cl->flags &= ~ANYOF_UNICODE;
   649			    }
   650			}
   651			
   652			/*
   653			
   654			 make_trie(startbranch,first,last,tail,flags)
   655			  startbranch: the first branch in the whole branch sequence
   656			  first      : start branch of sequence of branch-exact nodes.
   657				       May be the same as startbranch
   658			  last       : Thing following the last branch.
   659				       May be the same as tail.
   660			  tail       : item following the branch sequence
   661			  flags      : currently the OP() type we will be building one of /EXACT(|F|Fl)/
   662			
   663			Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
   664			
   665			A trie is an N'ary tree where the branches are determined by digital
   666			decomposition of the key. IE, at the root node you look up the 1st character and
   667			follow that branch repeat until you find the end of the branches. Nodes can be
   668			marked as "accepting" meaning they represent a complete word. Eg:
   669			
   670			  /he|she|his|hers/
   671			
   672			would convert into the following structure. Numbers represent states, letters
   673			following numbers represent valid transitions on the letter from that state, if
   674			the number is in square brackets it represents an accepting state, otherwise it
   675			will be in parenthesis.
   676			
   677			      +-h->+-e->[3]-+-r->(8)-+-s->[9]
   678			      |    |
   679			      |   (2)
   680			      |    |
   681			     (1)   +-i->(6)-+-s->[7]
   682			      |
   683			      +-s->(3)-+-h->(4)-+-e->[5]
   684			
   685			      Accept Word Mapping: 3=>1 (he),5=>2 (she), 7=>3 (his), 9=>4 (hers)
   686			
   687			This shows that when matching against the string 'hers' we will begin at state 1
   688			read 'h' and move to state 2, read 'e' and move to state 3 which is accepting,
   689			then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
   690			is also accepting. Thus we know that we can match both 'he' and 'hers' with a
   691			single traverse. We store a mapping from accepting to state to which word was
   692			matched, and then when we have multiple possibilities we try to complete the
   693			rest of the regex in the order in which they occured in the alternation.
   694			
   695			The only prior NFA like behaviour that would be changed by the TRIE support is
   696			the silent ignoring of duplicate alternations which are of the form:
   697			
   698			 / (DUPE|DUPE) X? (?{ ... }) Y /x
   699			
   700			Thus EVAL blocks follwing a trie may be called a different number of times with
   701			and without the optimisation. With the optimisations dupes will be silently
   702			ignored. This inconsistant behaviour of EVAL type nodes is well established as
   703			the following demonstrates:
   704			
   705			 'words'=~/(word|word|word)(?{ print $1 })[xyz]/
   706			
   707			which prints out 'word' three times, but
   708			
   709			 'words'=~/(word|word|word)(?{ print $1 })S/
   710			
   711			which doesnt print it out at all. This is due to other optimisations kicking in.
   712			
   713			Example of what happens on a structural level:
   714			
   715			The regexp /(ac|ad|ab)+/ will produce the folowing debug output:
   716			
   717			   1: CURLYM[1] {1,32767}(18)
   718			   5:   BRANCH(8)
   719			   6:     EXACT <ac>(16)
   720			   8:   BRANCH(11)
   721			   9:     EXACT <ad>(16)
   722			  11:   BRANCH(14)
   723			  12:     EXACT <ab>(16)
   724			  16:   SUCCEED(0)
   725			  17:   NOTHING(18)
   726			  18: END(0)
   727			
   728			This would be optimizable with startbranch=5, first=5, last=16, tail=16
   729			and should turn into:
   730			
   731			   1: CURLYM[1] {1,32767}(18)
   732			   5:   TRIE(16)
   733				[Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
   734				  <ac>
   735				  <ad>
   736				  <ab>
   737			  16:   SUCCEED(0)
   738			  17:   NOTHING(18)
   739			  18: END(0)
   740			
   741			Cases where tail != last would be like /(?foo|bar)baz/:
   742			
   743			   1: BRANCH(4)
   744			   2:   EXACT <foo>(8)
   745			   4: BRANCH(7)
   746			   5:   EXACT <bar>(8)
   747			   7: TAIL(8)
   748			   8: EXACT <baz>(10)
   749			  10: END(0)
   750			
   751			which would be optimizable with startbranch=1, first=1, last=7, tail=8
   752			and would end up looking like:
   753			
   754			    1: TRIE(8)
   755			      [Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
   756				<foo>
   757				<bar>
   758			   7: TAIL(8)
   759			   8: EXACT <baz>(10)
   760			  10: END(0)
   761			
   762			*/
   763			
   764			#define TRIE_DEBUG_CHAR                                                    \
   765			    DEBUG_TRIE_COMPILE_r({                                                 \
   766				SV *tmp;                                                           \
   767				if ( UTF ) {                                                       \
   768				    tmp = newSVpvn( "", 0 );                                       \
   769				    pv_uni_display( tmp, uc, len, 60, UNI_DISPLAY_REGEX );         \
   770				} else {                                                           \
   771				    tmp = Perl_newSVpvf_nocontext( "%c", (int)uvc );               \
   772				}                                                                  \
   773				av_push( trie->revcharmap, tmp );                                  \
   774			    })
   775			
   776			#define TRIE_READ_CHAR STMT_START {                                           \
   777			    if ( UTF ) {                                                              \
   778				if ( folder ) {                                                       \
   779				    if ( foldlen > 0 ) {                                              \
   780				       uvc = utf8n_to_uvuni( scan, UTF8_MAXLEN, &len, uniflags );     \
   781				       foldlen -= len;                                                \
   782				       scan += len;                                                   \
   783				       len = 0;                                                       \
   784				    } else {                                                          \
   785					uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
   786					uvc = to_uni_fold( uvc, foldbuf, &foldlen );                  \
   787					foldlen -= UNISKIP( uvc );                                    \
   788					scan = foldbuf + UNISKIP( uvc );                              \
   789				    }                                                                 \
   790				} else {                                                              \
   791				    uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
   792				}                                                                     \
   793			    } else {                                                                  \
   794				uvc = (U32)*uc;                                                       \
   795				len = 1;                                                              \
   796			    }                                                                         \
   797			} STMT_END
   798			
   799			
   800			#define TRIE_LIST_ITEM(state,idx) (trie->states[state].trans.list)[ idx ]
   801			#define TRIE_LIST_CUR(state)  ( TRIE_LIST_ITEM( state, 0 ).forid )
   802			#define TRIE_LIST_LEN(state) ( TRIE_LIST_ITEM( state, 0 ).newstate )
   803			#define TRIE_LIST_USED(idx)  ( trie->states[state].trans.list ? (TRIE_LIST_CUR( idx ) - 1) : 0 )
   804			
   805			#define TRIE_LIST_PUSH(state,fid,ns) STMT_START {               \
   806			    if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) {    \
   807				TRIE_LIST_LEN( state ) *= 2;                            \
   808				Renew( trie->states[ state ].trans.list,                \
   809				       TRIE_LIST_LEN( state ), reg_trie_trans_le );     \
   810			    }                                                           \
   811			    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid;     \
   812			    TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns;   \
   813			    TRIE_LIST_CUR( state )++;                                   \
   814			} STMT_END
   815			
   816			#define TRIE_LIST_NEW(state) STMT_START {                       \
   817			    Newz( 1023, trie->states[ state ].trans.list,               \
   818				4, reg_trie_trans_le );                                 \
   819			     TRIE_LIST_CUR( state ) = 1;                                \
   820			     TRIE_LIST_LEN( state ) = 4;                                \
   821			} STMT_END
   822			
   823			STATIC I32
   824			S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *first, regnode *last, regnode *tail, U32 flags)
   825	      ######    {
   826			    dVAR;
   827			    /* first pass, loop through and scan words */
   828	      ######        reg_trie_data *trie;
   829	      ######        regnode *cur;
   830	      ######        const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
   831	      ######        STRLEN len = 0;
   832	      ######        UV uvc = 0;
   833	      ######        U16 curword = 0;
   834	      ######        U32 next_alloc = 0;
   835			    /* we just use folder as a flag in utf8 */
   836	      ######        const U8 * const folder = ( flags == EXACTF
   837			                       ? PL_fold
   838			                       : ( flags == EXACTFL
   839			                           ? PL_fold_locale
   840			                           : NULL
   841			                         )
   842	      ######                         );
   843			
   844	      ######        const U32 data_slot = add_data( pRExC_state, 1, "t" );
   845	      ######        SV *re_trie_maxbuff;
   846			
   847	      ######        GET_RE_DEBUG_FLAGS_DECL;
   848			
   849	      ######        Newz( 848200, trie, 1, reg_trie_data );
   850	      ######        trie->refcount = 1;
   851	      ######        RExC_rx->data->data[ data_slot ] = (void*)trie;
   852	      ######        Newz( 848201, trie->charmap, 256, U16 );
   853			    DEBUG_r({
   854			        trie->words = newAV();
   855			        trie->revcharmap = newAV();
   856	      ######        });
   857			
   858			
   859	      ######        re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
   860	      ######        if (!SvIOK(re_trie_maxbuff)) {
   861	      ######            sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
   862			    }
   863			
   864			    /*  -- First loop and Setup --
   865			
   866			       We first traverse the branches and scan each word to determine if it
   867			       contains widechars, and how many unique chars there are, this is
   868			       important as we have to build a table with at least as many columns as we
   869			       have unique chars.
   870			
   871			       We use an array of integers to represent the character codes 0..255
   872			       (trie->charmap) and we use a an HV* to store unicode characters. We use the
   873			       native representation of the character value as the key and IV's for the
   874			       coded index.
   875			
   876			       *TODO* If we keep track of how many times each character is used we can
   877			       remap the columns so that the table compression later on is more
   878			       efficient in terms of memory by ensuring most common value is in the
   879			       middle and the least common are on the outside.  IMO this would be better
   880			       than a most to least common mapping as theres a decent chance the most
   881			       common letter will share a node with the least common, meaning the node
   882			       will not be compressable. With a middle is most common approach the worst
   883			       case is when we have the least common nodes twice.
   884			
   885			     */
   886			
   887			
   888	      ######        for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
   889	      ######            regnode *noper = NEXTOPER( cur );
   890	      ######            const U8 *uc = (U8*)STRING( noper );
   891	      ######            const U8 * const e  = uc + STR_LEN( noper );
   892	      ######            STRLEN foldlen = 0;
   893	      ######            U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
   894	      ######            const U8 *scan = (U8*)NULL;
   895			
   896	      ######            for ( ; uc < e ; uc += len ) {
   897	      ######                trie->charcount++;
   898	      ######                TRIE_READ_CHAR;
   899	      ######                if ( uvc < 256 ) {
   900	      ######                    if ( !trie->charmap[ uvc ] ) {
   901	      ######                        trie->charmap[ uvc ]=( ++trie->uniquecharcount );
   902	      ######                        if ( folder )
   903	      ######                            trie->charmap[ folder[ uvc ] ] = trie->charmap[ uvc ];
   904	      ######                        TRIE_DEBUG_CHAR;
   905			                }
   906			            } else {
   907	      ######                    SV** svpp;
   908	      ######                    if ( !trie->widecharmap )
   909	      ######                        trie->widecharmap = newHV();
   910			
   911	      ######                    svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 1 );
   912			
   913	      ######                    if ( !svpp )
   914	      ######                        Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
   915			
   916	      ######                    if ( !SvTRUE( *svpp ) ) {
   917	      ######                        sv_setiv( *svpp, ++trie->uniquecharcount );
   918	      ######                        TRIE_DEBUG_CHAR;
   919			                }
   920			            }
   921			        }
   922	      ######            trie->wordcount++;
   923			    } /* end first pass */
   924			    DEBUG_TRIE_COMPILE_r(
   925			        PerlIO_printf( Perl_debug_log, "TRIE(%s): W:%d C:%d Uq:%d \n",
   926			                ( trie->widecharmap ? "UTF8" : "NATIVE" ), trie->wordcount,
   927			                (int)trie->charcount, trie->uniquecharcount )
   928	      ######        );
   929			
   930			
   931			    /*
   932			        We now know what we are dealing with in terms of unique chars and
   933			        string sizes so we can calculate how much memory a naive
   934			        representation using a flat table  will take. If it's over a reasonable
   935			        limit (as specified by ${^RE_TRIE_MAXBUF}) we use a more memory
   936			        conservative but potentially much slower representation using an array
   937			        of lists.
   938			
   939			        At the end we convert both representations into the same compressed
   940			        form that will be used in regexec.c for matching with. The latter
   941			        is a form that cannot be used to construct with but has memory
   942			        properties similar to the list form and access properties similar
   943			        to the table form making it both suitable for fast searches and
   944			        small enough that its feasable to store for the duration of a program.
   945			
   946			        See the comment in the code where the compressed table is produced
   947			        inplace from the flat tabe representation for an explanation of how
   948			        the compression works.
   949			
   950			    */
   951			
   952			
   953	      ######        if ( (IV)( ( trie->charcount + 1 ) * trie->uniquecharcount + 1) > SvIV(re_trie_maxbuff) ) {
   954			        /*
   955			            Second Pass -- Array Of Lists Representation
   956			
   957			            Each state will be represented by a list of charid:state records
   958			            (reg_trie_trans_le) the first such element holds the CUR and LEN
   959			            points of the allocated array. (See defines above).
   960			
   961			            We build the initial structure using the lists, and then convert
   962			            it into the compressed table form which allows faster lookups
   963			            (but cant be modified once converted).
   964			
   965			
   966			        */
   967			
   968			
   969	      ######            STRLEN transcount = 1;
   970			
   971	      ######            Newz( 848204, trie->states, trie->charcount + 2, reg_trie_state );
   972	      ######            TRIE_LIST_NEW(1);
   973	      ######            next_alloc = 2;
   974			
   975	      ######            for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
   976			
   977	      ######            regnode *noper   = NEXTOPER( cur );
   978	      ######            U8 *uc           = (U8*)STRING( noper );
   979	      ######            const U8 * const e = uc + STR_LEN( noper );
   980	      ######            U32 state        = 1;         /* required init */
   981	      ######            U16 charid       = 0;         /* sanity init */
   982	      ######            U8 *scan         = (U8*)NULL; /* sanity init */
   983	      ######            STRLEN foldlen   = 0;         /* required init */
   984	      ######            U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
   985			
   986			
   987	      ######            for ( ; uc < e ; uc += len ) {
   988			
   989	      ######                TRIE_READ_CHAR;
   990			
   991	      ######                if ( uvc < 256 ) {
   992	      ######                    charid = trie->charmap[ uvc ];
   993			            } else {
   994	      ######                    SV** svpp=(SV**)NULL;
   995	      ######                    svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
   996	      ######                    if ( !svpp ) {
   997	      ######                        charid = 0;
   998			                } else {
   999	      ######                        charid=(U16)SvIV( *svpp );
  1000			                }
  1001			            }
  1002	      ######                if ( charid ) {
  1003			
  1004	      ######                    U16 check;
  1005	      ######                    U32 newstate = 0;
  1006			
  1007	      ######                    charid--;
  1008	      ######                    if ( !trie->states[ state ].trans.list ) {
  1009	      ######                        TRIE_LIST_NEW( state );
  1010			                }
  1011	      ######                    for ( check = 1; check <= TRIE_LIST_USED( state ); check++ ) {
  1012	      ######                        if ( TRIE_LIST_ITEM( state, check ).forid == charid ) {
  1013	      ######                            newstate = TRIE_LIST_ITEM( state, check ).newstate;
  1014	      ######                            break;
  1015			                    }
  1016					}
  1017	      ######    		if ( ! newstate ) {
  1018	      ######    		    newstate = next_alloc++;
  1019	      ######    		    TRIE_LIST_PUSH( state, charid, newstate );
  1020	      ######    		    transcount++;
  1021					}
  1022	      ######    		state = newstate;
  1023			            } else {
  1024	      ######                    Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
  1025			            }
  1026			            /* charid is now 0 if we dont know the char read, or nonzero if we do */
  1027			        }
  1028			
  1029	      ######            if ( !trie->states[ state ].wordnum ) {
  1030			            /* we havent inserted this word into the structure yet. */
  1031	      ######                trie->states[ state ].wordnum = ++curword;
  1032			
  1033			            DEBUG_r({
  1034			                /* store the word for dumping */
  1035			                SV* tmp = newSVpvn( STRING( noper ), STR_LEN( noper ) );
  1036			                if ( UTF ) SvUTF8_on( tmp );
  1037			                av_push( trie->words, tmp );
  1038	      ######                });
  1039			
  1040			        } else {
  1041			            /* Its a dupe. So ignore it. */
  1042			        }
  1043			
  1044			        } /* end second pass */
  1045			
  1046	      ######            trie->laststate = next_alloc;
  1047	      ######            Renew( trie->states, next_alloc, reg_trie_state );
  1048			
  1049			        DEBUG_TRIE_COMPILE_MORE_r({
  1050			            U32 state;
  1051			
  1052			            /* print out the table precompression.  */
  1053			
  1054			            PerlIO_printf( Perl_debug_log, "\nState :Word | Transition Data\n" );
  1055			            PerlIO_printf( Perl_debug_log,   "------:-----+-----------------" );
  1056			
  1057			            for( state=1 ; state < next_alloc ; state ++ ) {
  1058					U16 charid;
  1059			
  1060			                PerlIO_printf( Perl_debug_log, "\n %04"UVXf" :", (UV)state  );
  1061			                if ( ! trie->states[ state ].wordnum ) {
  1062			                    PerlIO_printf( Perl_debug_log, "%5s| ","");
  1063			                } else {
  1064			                    PerlIO_printf( Perl_debug_log, "W%04x| ",
  1065			                        trie->states[ state ].wordnum
  1066			                    );
  1067			                }
  1068			                for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
  1069			                    SV **tmp = av_fetch( trie->revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
  1070			                    PerlIO_printf( Perl_debug_log, "%s:%3X=%04"UVXf" | ",
  1071			                        SvPV_nolen_const( *tmp ),
  1072			                        TRIE_LIST_ITEM(state,charid).forid,
  1073			                        (UV)TRIE_LIST_ITEM(state,charid).newstate
  1074			                    );
  1075			                }
  1076			
  1077			            }
  1078			            PerlIO_printf( Perl_debug_log, "\n\n" );
  1079	      ######            });
  1080			
  1081	      ######            Newz( 848203, trie->trans, transcount ,reg_trie_trans );
  1082			        {
  1083	      ######                U32 state;
  1084	      ######                U32 tp = 0;
  1085	      ######                U32 zp = 0;
  1086			
  1087			
  1088	      ######                for( state=1 ; state < next_alloc ; state ++ ) {
  1089	      ######                    U32 base=0;
  1090			
  1091			                /*
  1092			                DEBUG_TRIE_COMPILE_MORE_r(
  1093			                    PerlIO_printf( Perl_debug_log, "tp: %d zp: %d ",tp,zp)
  1094			                );
  1095			                */
  1096			
  1097	      ######                    if (trie->states[state].trans.list) {
  1098	      ######                        U16 minid=TRIE_LIST_ITEM( state, 1).forid;
  1099	      ######                        U16 maxid=minid;
  1100	      ######    		    U16 idx;
  1101			
  1102	      ######                        for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
  1103	      ######                            if ( TRIE_LIST_ITEM( state, idx).forid < minid ) {
  1104	      ######                                minid=TRIE_LIST_ITEM( state, idx).forid;
  1105	      ######                            } else if ( TRIE_LIST_ITEM( state, idx).forid > maxid ) {
  1106	      ######                                maxid=TRIE_LIST_ITEM( state, idx).forid;
  1107			                        }
  1108			                    }
  1109	      ######                        if ( transcount < tp + maxid - minid + 1) {
  1110	      ######                            transcount *= 2;
  1111	      ######                            Renew( trie->trans, transcount, reg_trie_trans );
  1112	      ######                            Zero( trie->trans + (transcount / 2), transcount / 2 , reg_trie_trans );
  1113			                    }
  1114	      ######                        base = trie->uniquecharcount + tp - minid;
  1115	      ######                        if ( maxid == minid ) {
  1116	      ######                            U32 set = 0;
  1117	      ######                            for ( ; zp < tp ; zp++ ) {
  1118	      ######                                if ( ! trie->trans[ zp ].next ) {
  1119	      ######                                    base = trie->uniquecharcount + zp - minid;
  1120	      ######                                    trie->trans[ zp ].next = TRIE_LIST_ITEM( state, 1).newstate;
  1121	      ######                                    trie->trans[ zp ].check = state;
  1122	      ######                                    set = 1;
  1123	      ######                                    break;
  1124			                            }
  1125			                        }
  1126	      ######                            if ( !set ) {
  1127	      ######                                trie->trans[ tp ].next = TRIE_LIST_ITEM( state, 1).newstate;
  1128	      ######                                trie->trans[ tp ].check = state;
  1129	      ######                                tp++;
  1130	      ######                                zp = tp;
  1131			                        }
  1132			                    } else {
  1133	      ######                            for ( idx=1; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
  1134	      ######                                U32 tid = base -  trie->uniquecharcount + TRIE_LIST_ITEM( state, idx ).forid;
  1135	      ######                                trie->trans[ tid ].next = TRIE_LIST_ITEM( state, idx ).newstate;
  1136	      ######                                trie->trans[ tid ].check = state;
  1137			                        }
  1138	      ######                            tp += ( maxid - minid + 1 );
  1139			                    }
  1140	      ######                        Safefree(trie->states[ state ].trans.list);
  1141			                }
  1142			                /*
  1143			                DEBUG_TRIE_COMPILE_MORE_r(
  1144			                    PerlIO_printf( Perl_debug_log, " base: %d\n",base);
  1145			                );
  1146			                */
  1147	      ######                    trie->states[ state ].trans.base=base;
  1148			            }
  1149	      ######                trie->lasttrans = tp + 1;
  1150			        }
  1151			    } else {
  1152			        /*
  1153			           Second Pass -- Flat Table Representation.
  1154			
  1155			           we dont use the 0 slot of either trans[] or states[] so we add 1 to each.
  1156			           We know that we will need Charcount+1 trans at most to store the data
  1157			           (one row per char at worst case) So we preallocate both structures
  1158			           assuming worst case.
  1159			
  1160			           We then construct the trie using only the .next slots of the entry
  1161			           structs.
  1162			
  1163			           We use the .check field of the first entry of the node  temporarily to
  1164			           make compression both faster and easier by keeping track of how many non
  1165			           zero fields are in the node.
  1166			
  1167			           Since trans are numbered from 1 any 0 pointer in the table is a FAIL
  1168			           transition.
  1169			
  1170			           There are two terms at use here: state as a TRIE_NODEIDX() which is a
  1171			           number representing the first entry of the node, and state as a
  1172			           TRIE_NODENUM() which is the trans number. state 1 is TRIE_NODEIDX(1) and
  1173			           TRIE_NODENUM(1), state 2 is TRIE_NODEIDX(2) and TRIE_NODENUM(3) if there
  1174			           are 2 entrys per node. eg:
  1175			
  1176			             A B       A B
  1177			          1. 2 4    1. 3 7
  1178			          2. 0 3    3. 0 5
  1179			          3. 0 0    5. 0 0
  1180			          4. 0 0    7. 0 0
  1181			
  1182			           The table is internally in the right hand, idx form. However as we also
  1183			           have to deal with the states array which is indexed by nodenum we have to
  1184			           use TRIE_NODENUM() to convert.
  1185			
  1186			        */
  1187			
  1188			        Newz( 848203, trie->trans, ( trie->charcount + 1 ) * trie->uniquecharcount + 1,
  1189	      ######                  reg_trie_trans );
  1190	      ######            Newz( 848204, trie->states, trie->charcount + 2, reg_trie_state );
  1191	      ######            next_alloc = trie->uniquecharcount + 1;
  1192			
  1193	      ######            for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
  1194			
  1195	      ######                regnode *noper   = NEXTOPER( cur );
  1196	      ######    	    const U8 *uc     = (U8*)STRING( noper );
  1197	      ######    	    const U8 * const e = uc + STR_LEN( noper );
  1198			
  1199	      ######                U32 state        = 1;         /* required init */
  1200			
  1201	      ######                U16 charid       = 0;         /* sanity init */
  1202	      ######                U32 accept_state = 0;         /* sanity init */
  1203	      ######                U8 *scan         = (U8*)NULL; /* sanity init */
  1204			
  1205	      ######                STRLEN foldlen   = 0;         /* required init */
  1206	      ######                U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
  1207			
  1208			
  1209	      ######                for ( ; uc < e ; uc += len ) {
  1210			
  1211	      ######                    TRIE_READ_CHAR;
  1212			
  1213	      ######                    if ( uvc < 256 ) {
  1214	      ######                        charid = trie->charmap[ uvc ];
  1215			                } else {
  1216	      ######                        SV** svpp=(SV**)NULL;
  1217	      ######                        svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
  1218	      ######                        if ( !svpp ) {
  1219	      ######                            charid = 0;
  1220			                    } else {
  1221	      ######                            charid=(U16)SvIV( *svpp );
  1222			                    }
  1223			                }
  1224	      ######                    if ( charid ) {
  1225	      ######                        charid--;
  1226	      ######                        if ( !trie->trans[ state + charid ].next ) {
  1227	      ######                            trie->trans[ state + charid ].next = next_alloc;
  1228	      ######                            trie->trans[ state ].check++;
  1229	      ######                            next_alloc += trie->uniquecharcount;
  1230			                    }
  1231	      ######                        state = trie->trans[ state + charid ].next;
  1232			                } else {
  1233	      ######                        Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %"IVdf, uvc );
  1234			                }
  1235			                /* charid is now 0 if we dont know the char read, or nonzero if we do */
  1236			            }
  1237			
  1238	      ######                accept_state = TRIE_NODENUM( state );
  1239	      ######                if ( !trie->states[ accept_state ].wordnum ) {
  1240			                /* we havent inserted this word into the structure yet. */
  1241	      ######                    trie->states[ accept_state ].wordnum = ++curword;
  1242			
  1243			                DEBUG_r({
  1244			                    /* store the word for dumping */
  1245			                    SV* tmp = newSVpvn( STRING( noper ), STR_LEN( noper ) );
  1246			                    if ( UTF ) SvUTF8_on( tmp );
  1247			                    av_push( trie->words, tmp );
  1248	      ######                    });
  1249			
  1250			            } else {
  1251			                /* Its a dupe. So ignore it. */
  1252			            }
  1253			
  1254			        } /* end second pass */
  1255			
  1256			        DEBUG_TRIE_COMPILE_MORE_r({
  1257			            /*
  1258			               print out the table precompression so that we can do a visual check
  1259			               that they are identical.
  1260			             */
  1261			            U32 state;
  1262			            U16 charid;
  1263			            PerlIO_printf( Perl_debug_log, "\nChar : " );
  1264			
  1265			            for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
  1266			                SV **tmp = av_fetch( trie->revcharmap, charid, 0);
  1267			                if ( tmp ) {
  1268			                  PerlIO_printf( Perl_debug_log, "%4.4s ", SvPV_nolen_const( *tmp ) );
  1269			                }
  1270			            }
  1271			
  1272			            PerlIO_printf( Perl_debug_log, "\nState+-" );
  1273			
  1274			            for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
  1275			                PerlIO_printf( Perl_debug_log, "%4s-", "----" );
  1276			            }
  1277			
  1278			            PerlIO_printf( Perl_debug_log, "\n" );
  1279			
  1280			            for( state=1 ; state < next_alloc ; state += trie->uniquecharcount ) {
  1281			
  1282			                PerlIO_printf( Perl_debug_log, "%04"UVXf" : ", (UV)TRIE_NODENUM( state ) );
  1283			
  1284			                for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
  1285			                    PerlIO_printf( Perl_debug_log, "%04"UVXf" ",
  1286			                        (UV)SAFE_TRIE_NODENUM( trie->trans[ state + charid ].next ) );
  1287			                }
  1288			                if ( ! trie->states[ TRIE_NODENUM( state ) ].wordnum ) {
  1289			                    PerlIO_printf( Perl_debug_log, " (%04"UVXf")\n", (UV)trie->trans[ state ].check );
  1290			                } else {
  1291			                    PerlIO_printf( Perl_debug_log, " (%04"UVXf") W%04X\n", (UV)trie->trans[ state ].check,
  1292			                    trie->states[ TRIE_NODENUM( state ) ].wordnum );
  1293			                }
  1294			            }
  1295			            PerlIO_printf( Perl_debug_log, "\n\n" );
  1296	      ######            });
  1297			        {
  1298			        /*
  1299			           * Inplace compress the table.*
  1300			
  1301			           For sparse data sets the table constructed by the trie algorithm will
  1302			           be mostly 0/FAIL transitions or to put it another way mostly empty.
  1303			           (Note that leaf nodes will not contain any transitions.)
  1304			
  1305			           This algorithm compresses the tables by eliminating most such
  1306			           transitions, at the cost of a modest bit of extra work during lookup:
  1307			
  1308			           - Each states[] entry contains a .base field which indicates the
  1309			           index in the state[] array wheres its transition data is stored.
  1310			
  1311			           - If .base is 0 there are no  valid transitions from that node.
  1312			
  1313			           - If .base is nonzero then charid is added to it to find an entry in
  1314			           the trans array.
  1315			
  1316			           -If trans[states[state].base+charid].check!=state then the
  1317			           transition is taken to be a 0/Fail transition. Thus if there are fail
  1318			           transitions at the front of the node then the .base offset will point
  1319			           somewhere inside the previous nodes data (or maybe even into a node
  1320			           even earlier), but the .check field determines if the transition is
  1321			           valid.
  1322			
  1323			           The following process inplace converts the table to the compressed
  1324			           table: We first do not compress the root node 1,and mark its all its
  1325			           .check pointers as 1 and set its .base pointer as 1 as well. This
  1326			           allows to do a DFA construction from the compressed table later, and
  1327			           ensures that any .base pointers we calculate later are greater than
  1328			           0.
  1329			
  1330			           - We set 'pos' to indicate the first entry of the second node.
  1331			
  1332			           - We then iterate over the columns of the node, finding the first and
  1333			           last used entry at l and m. We then copy l..m into pos..(pos+m-l),
  1334			           and set the .check pointers accordingly, and advance pos
  1335			           appropriately and repreat for the next node. Note that when we copy
  1336			           the next pointers we have to convert them from the original
  1337			           NODEIDX form to NODENUM form as the former is not valid post
  1338			           compression.
  1339			
  1340			           - If a node has no transitions used we mark its base as 0 and do not
  1341			           advance the pos pointer.
  1342			
  1343			           - If a node only has one transition we use a second pointer into the
  1344			           structure to fill in allocated fail transitions from other states.
  1345			           This pointer is independent of the main pointer and scans forward
  1346			           looking for null transitions that are allocated to a state. When it
  1347			           finds one it writes the single transition into the "hole".  If the
  1348			           pointer doesnt find one the single transition is appeneded as normal.
  1349			
  1350			           - Once compressed we can Renew/realloc the structures to release the
  1351			           excess space.
  1352			
  1353			           See "Table-Compression Methods" in sec 3.9 of the Red Dragon,
  1354			           specifically Fig 3.47 and the associated pseudocode.
  1355			
  1356			           demq
  1357			        */
  1358	      ######            const U32 laststate = TRIE_NODENUM( next_alloc );
  1359	      ######    	U32 state, charid;
  1360	      ######            U32 pos = 0, zp=0;
  1361	      ######            trie->laststate = laststate;
  1362			
  1363	      ######            for ( state = 1 ; state < laststate ; state++ ) {
  1364	      ######                U8 flag = 0;
  1365	      ######    	    const U32 stateidx = TRIE_NODEIDX( state );
  1366	      ######    	    const U32 o_used = trie->trans[ stateidx ].check;
  1367	      ######    	    U32 used = trie->trans[ stateidx ].check;
  1368	      ######                trie->trans[ stateidx ].check = 0;
  1369			
  1370	      ######                for ( charid = 0 ; used && charid < trie->uniquecharcount ; charid++ ) {
  1371	      ######                    if ( flag || trie->trans[ stateidx + charid ].next ) {
  1372	      ######                        if ( trie->trans[ stateidx + charid ].next ) {
  1373	      ######                            if (o_used == 1) {
  1374	      ######                                for ( ; zp < pos ; zp++ ) {
  1375	      ######                                    if ( ! trie->trans[ zp ].next ) {
  1376	      ######                                        break;
  1377			                                }
  1378			                            }
  1379	      ######                                trie->states[ state ].trans.base = zp + trie->uniquecharcount - charid ;
  1380	      ######                                trie->trans[ zp ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
  1381	      ######                                trie->trans[ zp ].check = state;
  1382	      ######                                if ( ++zp > pos ) pos = zp;
  1383	      ######                                break;
  1384			                        }
  1385	      ######                            used--;
  1386			                    }
  1387	      ######                        if ( !flag ) {
  1388	      ######                            flag = 1;
  1389	      ######                            trie->states[ state ].trans.base = pos + trie->uniquecharcount - charid ;
  1390			                    }
  1391	      ######                        trie->trans[ pos ].next = SAFE_TRIE_NODENUM( trie->trans[ stateidx + charid ].next );
  1392	      ######                        trie->trans[ pos ].check = state;
  1393	      ######                        pos++;
  1394			                }
  1395			            }
  1396			        }
  1397	      ######            trie->lasttrans = pos + 1;
  1398	      ######            Renew( trie->states, laststate + 1, reg_trie_state);
  1399			        DEBUG_TRIE_COMPILE_MORE_r(
  1400			                PerlIO_printf( Perl_debug_log,
  1401					    " Alloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
  1402					    (int)( ( trie->charcount + 1 ) * trie->uniquecharcount + 1 ),
  1403					    (IV)next_alloc,
  1404					    (IV)pos,
  1405			                    ( ( next_alloc - pos ) * 100 ) / (double)next_alloc );
  1406	      ######                );
  1407			
  1408			        } /* end table compress */
  1409			    }
  1410			    /* resize the trans array to remove unused space */
  1411	      ######        Renew( trie->trans, trie->lasttrans, reg_trie_trans);
  1412			
  1413			    DEBUG_TRIE_COMPILE_r({
  1414			        U32 state;
  1415			        /*
  1416			           Now we print it out again, in a slightly different form as there is additional
  1417			           info we want to be able to see when its compressed. They are close enough for
  1418			           visual comparison though.
  1419			         */
  1420			        PerlIO_printf( Perl_debug_log, "\nChar : %-6s%-6s%-4s ","Match","Base","Ofs" );
  1421			
  1422			        for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
  1423			            SV **tmp = av_fetch( trie->revcharmap, state, 0);
  1424			            if ( tmp ) {
  1425			              PerlIO_printf( Perl_debug_log, "%4.4s ", SvPV_nolen_const( *tmp ) );
  1426			            }
  1427			        }
  1428			        PerlIO_printf( Perl_debug_log, "\n-----:-----------------------");
  1429			
  1430			        for( state = 0 ; state < trie->uniquecharcount ; state++ )
  1431			            PerlIO_printf( Perl_debug_log, "-----");
  1432			        PerlIO_printf( Perl_debug_log, "\n");
  1433			
  1434			        for( state = 1 ; state < trie->laststate ; state++ ) {
  1435				    const U32 base = trie->states[ state ].trans.base;
  1436			
  1437			            PerlIO_printf( Perl_debug_log, "#%04"UVXf" ", (UV)state);
  1438			
  1439			            if ( trie->states[ state ].wordnum ) {
  1440			                PerlIO_printf( Perl_debug_log, " W%04X", trie->states[ state ].wordnum );
  1441			            } else {
  1442			                PerlIO_printf( Perl_debug_log, "%6s", "" );
  1443			            }
  1444			
  1445			            PerlIO_printf( Perl_debug_log, " @%04"UVXf" ", (UV)base );
  1446			
  1447			            if ( base ) {
  1448			                U32 ofs = 0;
  1449			
  1450			                while( ( base + ofs  < trie->uniquecharcount ) ||
  1451			                       ( base + ofs - trie->uniquecharcount < trie->lasttrans
  1452			                         && trie->trans[ base + ofs - trie->uniquecharcount ].check != state))
  1453			                        ofs++;
  1454			
  1455			                PerlIO_printf( Perl_debug_log, "+%02"UVXf"[ ", (UV)ofs);
  1456			
  1457			                for ( ofs = 0 ; ofs < trie->uniquecharcount ; ofs++ ) {
  1458			                    if ( ( base + ofs >= trie->uniquecharcount ) &&
  1459			                         ( base + ofs - trie->uniquecharcount < trie->lasttrans ) &&
  1460			                         trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
  1461			                    {
  1462			                       PerlIO_printf( Perl_debug_log, "%04"UVXf" ",
  1463			                        (UV)trie->trans[ base + ofs - trie->uniquecharcount ].next );
  1464			                    } else {
  1465			                        PerlIO_printf( Perl_debug_log, "%4s ","   0" );
  1466			                    }
  1467			                }
  1468			
  1469			                PerlIO_printf( Perl_debug_log, "]");
  1470			
  1471			            }
  1472			            PerlIO_printf( Perl_debug_log, "\n" );
  1473			        }
  1474	      ######        });
  1475			
  1476			    {
  1477			        /* now finally we "stitch in" the new TRIE node
  1478			           This means we convert either the first branch or the first Exact,
  1479			           depending on whether the thing following (in 'last') is a branch
  1480			           or not and whther first is the startbranch (ie is it a sub part of
  1481			           the alternation or is it the whole thing.)
  1482			           Assuming its a sub part we conver the EXACT otherwise we convert
  1483			           the whole branch sequence, including the first.
  1484			        */
  1485	      ######            regnode *convert;
  1486			
  1487			
  1488			
  1489			
  1490	      ######            if ( first == startbranch && OP( last ) != BRANCH ) {
  1491	      ######                convert = first;
  1492			        } else {
  1493	      ######                convert = NEXTOPER( first );
  1494	      ######                NEXT_OFF( first ) = (U16)(last - first);
  1495			        }
  1496			
  1497	      ######            OP( convert ) = TRIE + (U8)( flags - EXACT );
  1498	      ######            NEXT_OFF( convert ) = (U16)(tail - convert);
  1499	      ######            ARG_SET( convert, data_slot );
  1500			
  1501			        /* tells us if we need to handle accept buffers specially */
  1502	      ######            convert->flags = ( RExC_seen_evals ? 1 : 0 );
  1503			
  1504			
  1505			        /* needed for dumping*/
  1506			        DEBUG_r({
  1507			            regnode *optimize = convert + NODE_STEP_REGNODE + regarglen[ TRIE ];
  1508			            /* We now need to mark all of the space originally used by the
  1509			               branches as optimized away. This keeps the dumpuntil from
  1510			               throwing a wobbly as it doesnt use regnext() to traverse the
  1511			               opcodes.
  1512			             */
  1513			            while( optimize < last ) {
  1514			                OP( optimize ) = OPTIMIZED;
  1515			                optimize++;
  1516			            }
  1517	      ######            });
  1518			    } /* end node insert */
  1519	      ######        return 1;
  1520			}
  1521			
  1522			
  1523			
  1524			/*
  1525			 * There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
  1526			 * These need to be revisited when a newer toolchain becomes available.
  1527			 */
  1528			#if defined(__sparc64__) && defined(__GNUC__)
  1529			#   if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
  1530			#       undef  SPARC64_GCC_WORKAROUND
  1531			#       define SPARC64_GCC_WORKAROUND 1
  1532			#   endif
  1533			#endif
  1534			
  1535			/* REx optimizer.  Converts nodes into quickier variants "in place".
  1536			   Finds fixed substrings.  */
  1537			
  1538			/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
  1539			   to the position after last scanned or to NULL. */
  1540			
  1541			
  1542			STATIC I32
  1543			S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags, U32 depth)
  1544						/* scanp: Start here (read-write). */
  1545						/* deltap: Write maxlen-minlen here. */
  1546						/* last: Stop before this one. */
  1547	      ######    {
  1548	      ######        I32 min = 0, pars = 0, code;
  1549	      ######        regnode *scan = *scanp, *next;
  1550	      ######        I32 delta = 0;
  1551	      ######        int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
  1552	      ######        int is_inf_internal = 0;		/* The studied chunk is infinite */
  1553	      ######        I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
  1554	      ######        scan_data_t data_fake;
  1555	      ######        struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */
  1556	      ######        SV *re_trie_maxbuff = NULL;
  1557			
  1558	      ######        GET_RE_DEBUG_FLAGS_DECL;
  1559			
  1560	      ######        while (scan && OP(scan) != END && scan < last) {
  1561				/* Peephole optimizer: */
  1562				DEBUG_OPTIMISE_r({
  1563				  SV *mysv=sv_newmortal();
  1564				  regprop( mysv, scan);
  1565				  PerlIO_printf(Perl_debug_log, "%*speep: %s (0x%08"UVXf")\n",
  1566				    (int)depth*2, "", SvPV_nolen_const(mysv), PTR2UV(scan));
  1567	      ######    	});
  1568			
  1569	      ######    	if (PL_regkind[(U8)OP(scan)] == EXACT) {
  1570				    /* Merge several consecutive EXACTish nodes into one. */
  1571	      ######    	    regnode *n = regnext(scan);
  1572	      ######    	    U32 stringok = 1;
  1573			#ifdef DEBUGGING
  1574	      ######    	    regnode *stop = scan;
  1575			#endif
  1576			
  1577	      ######    	    next = scan + NODE_SZ_STR(scan);
  1578				    /* Skip NOTHING, merge EXACT*. */
  1579	      ######    	    while (n &&
  1580					   ( PL_regkind[(U8)OP(n)] == NOTHING ||
  1581					     (stringok && (OP(n) == OP(scan))))
  1582					   && NEXT_OFF(n)
  1583					   && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
  1584	      ######    		if (OP(n) == TAIL || n > next)
  1585	      ######    		    stringok = 0;
  1586	      ######    		if (PL_regkind[(U8)OP(n)] == NOTHING) {
  1587	      ######    		    NEXT_OFF(scan) += NEXT_OFF(n);
  1588	      ######    		    next = n + NODE_STEP_REGNODE;
  1589			#ifdef DEBUGGING
  1590	      ######    		    if (stringok)
  1591	      ######    			stop = n;
  1592			#endif
  1593	      ######    		    n = regnext(n);
  1594					}
  1595	      ######    		else if (stringok) {
  1596	      ######    		    const int oldl = STR_LEN(scan);
  1597	      ######    		    regnode *nnext = regnext(n);
  1598			
  1599	      ######    		    if (oldl + STR_LEN(n) > U8_MAX)
  1600	      ######    			break;
  1601	      ######    		    NEXT_OFF(scan) += NEXT_OFF(n);
  1602	      ######    		    STR_LEN(scan) += STR_LEN(n);
  1603	      ######    		    next = n + NODE_SZ_STR(n);
  1604					    /* Now we can overwrite *n : */
  1605	      ######    		    Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
  1606			#ifdef DEBUGGING
  1607	      ######    		    stop = next - 1;
  1608			#endif
  1609	      ######    		    n = nnext;
  1610					}
  1611				    }
  1612			
  1613	      ######    	    if (UTF && ( OP(scan) == EXACTF ) && ( STR_LEN(scan) >= 6 ) ) {
  1614			/*
  1615			  Two problematic code points in Unicode casefolding of EXACT nodes:
  1616			
  1617			   U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
  1618			   U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
  1619			
  1620			   which casefold to
  1621			
  1622			   Unicode			UTF-8
  1623			
  1624			   U+03B9 U+0308 U+0301		0xCE 0xB9 0xCC 0x88 0xCC 0x81
  1625			   U+03C5 U+0308 U+0301		0xCF 0x85 0xCC 0x88 0xCC 0x81
  1626			
  1627			   This means that in case-insensitive matching (or "loose matching",
  1628			   as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
  1629			   length of the above casefolded versions) can match a target string
  1630			   of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
  1631			   This would rather mess up the minimum length computation.
  1632			
  1633			   What we'll do is to look for the tail four bytes, and then peek
  1634			   at the preceding two bytes to see whether we need to decrease
  1635			   the minimum length by four (six minus two).
  1636			
  1637			   Thanks to the design of UTF-8, there cannot be false matches:
  1638			   A sequence of valid UTF-8 bytes cannot be a subsequence of
  1639			   another valid sequence of UTF-8 bytes.
  1640			
  1641			*/
  1642	      ######    		 char *s0 = STRING(scan), *s, *t;
  1643	      ######    		 char *s1 = s0 + STR_LEN(scan) - 1, *s2 = s1 - 4;
  1644	      ######    		 const char * const t0 = "\xcc\x88\xcc\x81";
  1645	      ######    		 const char * const t1 = t0 + 3;
  1646			
  1647	      ######    		 for (s = s0 + 2;
  1648					      s < s2 && (t = ninstr(s, s1, t0, t1));
  1649					      s = t + 4) {
  1650	      ######    		      if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
  1651						  ((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
  1652	      ######    			   min -= 4;
  1653					 }
  1654				    }
  1655			
  1656			#ifdef DEBUGGING
  1657				    /* Allow dumping */
  1658	      ######    	    n = scan + NODE_SZ_STR(scan);
  1659	      ######    	    while (n <= stop) {
  1660	      ######    		if (PL_regkind[(U8)OP(n)] != NOTHING || OP(n) == NOTHING) {
  1661	      ######    		    OP(n) = OPTIMIZED;
  1662	      ######    		    NEXT_OFF(n) = 0;
  1663					}
  1664	      ######    		n++;
  1665				    }
  1666			#endif
  1667				}
  1668			
  1669			
  1670			
  1671				/* Follow the next-chain of the current node and optimize
  1672				   away all the NOTHINGs from it.  */
  1673	      ######    	if (OP(scan) != CURLYX) {
  1674	      ######    	    const int max = (reg_off_by_arg[OP(scan)]
  1675					       ? I32_MAX
  1676					       /* I32 may be smaller than U16 on CRAYs! */
  1677	      ######    		       : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
  1678	      ######    	    int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
  1679	      ######    	    int noff;
  1680	      ######    	    regnode *n = scan;
  1681				
  1682				    /* Skip NOTHING and LONGJMP. */
  1683	      ######    	    while ((n = regnext(n))
  1684					   && ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
  1685					       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
  1686					   && off + noff < max)
  1687	      ######    		off += noff;
  1688	      ######    	    if (reg_off_by_arg[OP(scan)])
  1689	      ######    		ARG(scan) = off;
  1690				    else
  1691	      ######    		NEXT_OFF(scan) = off;
  1692				}
  1693			
  1694				/* The principal pseudo-switch.  Cannot be a switch, since we
  1695				   look into several different things.  */
  1696	      ######    	if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
  1697					   || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
  1698	      ######    	    next = regnext(scan);
  1699	      ######    	    code = OP(scan);
  1700				    /* demq: the op(next)==code check is to see if we have "branch-branch" AFAICT */
  1701				
  1702	      ######    	    if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
  1703	      ######    		I32 max1 = 0, min1 = I32_MAX, num = 0;
  1704	      ######    		struct regnode_charclass_class accum;
  1705	      ######    		regnode *startbranch=scan;
  1706					
  1707	      ######    		if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
  1708	      ######    		    scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
  1709	      ######    		if (flags & SCF_DO_STCLASS)
  1710	      ######    		    cl_init_zero(pRExC_state, &accum);
  1711			
  1712	      ######    		while (OP(scan) == code) {
  1713	      ######    		    I32 deltanext, minnext, f = 0, fake;
  1714	      ######    		    struct regnode_charclass_class this_class;
  1715			
  1716	      ######    		    num++;
  1717	      ######    		    data_fake.flags = 0;
  1718	      ######    		    if (data) {		
  1719	      ######    			data_fake.whilem_c = data->whilem_c;
  1720	      ######    			data_fake.last_closep = data->last_closep;
  1721					    }
  1722					    else
  1723	      ######    			data_fake.last_closep = &fake;
  1724	      ######    		    next = regnext(scan);
  1725	      ######    		    scan = NEXTOPER(scan);
  1726	      ######    		    if (code != BRANCH)
  1727	      ######    			scan = NEXTOPER(scan);
  1728	      ######    		    if (flags & SCF_DO_STCLASS) {
  1729	      ######    			cl_init(pRExC_state, &this_class);
  1730	      ######    			data_fake.start_class = &this_class;
  1731	      ######    			f = SCF_DO_STCLASS_AND;
  1732					    }		
  1733	      ######    		    if (flags & SCF_WHILEM_VISITED_POS)
  1734	      ######    			f |= SCF_WHILEM_VISITED_POS;
  1735			
  1736					    /* we suppose the run is continuous, last=next...*/
  1737	      ######    		    minnext = study_chunk(pRExC_state, &scan, &deltanext,
  1738								  next, &data_fake, f,depth+1);
  1739	      ######    		    if (min1 > minnext)
  1740	      ######    			min1 = minnext;
  1741	      ######    		    if (max1 < minnext + deltanext)
  1742	      ######    			max1 = minnext + deltanext;
  1743	      ######    		    if (deltanext == I32_MAX)
  1744	      ######    			is_inf = is_inf_internal = 1;
  1745	      ######    		    scan = next;
  1746	      ######    		    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
  1747	      ######    			pars++;
  1748	      ######    		    if (data && (data_fake.flags & SF_HAS_EVAL))
  1749	      ######    			data->flags |= SF_HAS_EVAL;
  1750	      ######    		    if (data)
  1751	      ######    			data->whilem_c = data_fake.whilem_c;
  1752	      ######    		    if (flags & SCF_DO_STCLASS)
  1753	      ######    			cl_or(pRExC_state, &accum, &this_class);
  1754	      ######    		    if (code == SUSPEND)
  1755	      ######    			break;
  1756					}
  1757	      ######    		if (code == IFTHEN && num < 2) /* Empty ELSE branch */
  1758	      ######    		    min1 = 0;
  1759	      ######    		if (flags & SCF_DO_SUBSTR) {
  1760	      ######    		    data->pos_min += min1;
  1761	      ######    		    data->pos_delta += max1 - min1;
  1762	      ######    		    if (max1 != min1 || is_inf)
  1763	      ######    			data->longest = &(data->longest_float);
  1764					}
  1765	      ######    		min += min1;
  1766	      ######    		delta += max1 - min1;
  1767	      ######    		if (flags & SCF_DO_STCLASS_OR) {
  1768	      ######    		    cl_or(pRExC_state, data->start_class, &accum);
  1769	      ######    		    if (min1) {
  1770	      ######    			cl_and(data->start_class, &and_with);
  1771	      ######    			flags &= ~SCF_DO_STCLASS;
  1772					    }
  1773					}
  1774	      ######    		else if (flags & SCF_DO_STCLASS_AND) {
  1775	      ######    		    if (min1) {
  1776	      ######    			cl_and(data->start_class, &accum);
  1777	      ######    			flags &= ~SCF_DO_STCLASS;
  1778					    }
  1779					    else {
  1780						/* Switch to OR mode: cache the old value of
  1781						 * data->start_class */
  1782						StructCopy(data->start_class, &and_with,
  1783	      ######    				   struct regnode_charclass_class);
  1784	      ######    			flags &= ~SCF_DO_STCLASS_AND;
  1785						StructCopy(&accum, data->start_class,
  1786	      ######    				   struct regnode_charclass_class);
  1787	      ######    			flags |= SCF_DO_STCLASS_OR;
  1788	      ######    			data->start_class->flags |= ANYOF_EOS;
  1789					    }
  1790					}
  1791			
  1792					/* demq.
  1793			
  1794					   Assuming this was/is a branch we are dealing with: 'scan' now
  1795					   points at the item that follows the branch sequence, whatever
  1796					   it is. We now start at the beginning of the sequence and look
  1797					   for subsequences of
  1798			
  1799					   BRANCH->EXACT=>X
  1800					   BRANCH->EXACT=>X
  1801			
  1802					   which would be constructed from a pattern like /A|LIST|OF|WORDS/
  1803			
  1804					   If we can find such a subseqence we need to turn the first
  1805					   element into a trie and then add the subsequent branch exact
  1806					   strings to the trie.
  1807			
  1808					   We have two cases
  1809			
  1810					     1. patterns where the whole set of branch can be converted to a trie,
  1811			
  1812					     2. patterns where only a subset of the alternations can be
  1813					     converted to a trie.
  1814			
  1815					   In case 1 we can replace the whole set with a single regop
  1816					   for the trie. In case 2 we need to keep the start and end
  1817					   branchs so
  1818			
  1819					     'BRANCH EXACT; BRANCH EXACT; BRANCH X'
  1820					     becomes BRANCH TRIE; BRANCH X;
  1821			
  1822					   Hypthetically when we know the regex isnt anchored we can
  1823					   turn a case 1 into a DFA and let it rip... Every time it finds a match
  1824					   it would just call its tail, no WHILEM/CURLY needed.
  1825			
  1826					*/
  1827	      ######    		if (DO_TRIE) {
  1828	      ######    		    if (!re_trie_maxbuff) {
  1829	      ######    			re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
  1830	      ######    			if (!SvIOK(re_trie_maxbuff))
  1831	      ######    			    sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
  1832					    }
  1833	      ######                        if ( SvIV(re_trie_maxbuff)>=0 && OP( startbranch )==BRANCH ) {
  1834	      ######                            regnode *cur;
  1835	      ######                            regnode *first = (regnode *)NULL;
  1836	      ######                            regnode *last = (regnode *)NULL;
  1837	      ######                            regnode *tail = scan;
  1838	      ######                            U8 optype = 0;
  1839	      ######                            U32 count=0;
  1840			
  1841			#ifdef DEBUGGING
  1842	      ######                            SV *mysv = sv_newmortal();       /* for dumping */
  1843			#endif
  1844			                        /* var tail is used because there may be a TAIL
  1845			                           regop in the way. Ie, the exacts will point to the
  1846			                           thing following the TAIL, but the last branch will
  1847			                           point at the TAIL. So we advance tail. If we
  1848			                           have nested (?:) we may have to move through several
  1849			                           tails.
  1850			                         */
  1851			
  1852	      ######                            while ( OP( tail ) == TAIL ) {
  1853			                            /* this is the TAIL generated by (?:) */
  1854	      ######                                tail = regnext( tail );
  1855			                        }
  1856			
  1857			                        DEBUG_OPTIMISE_r({
  1858			                            regprop( mysv, tail );
  1859			                            PerlIO_printf( Perl_debug_log, "%*s%s%s%s\n",
  1860			                                (int)depth * 2 + 2, "", "Tail node is:", SvPV_nolen_const( mysv ),
  1861			                                (RExC_seen_evals) ? "[EVAL]" : ""
  1862			                            );
  1863	      ######                            });
  1864			                        /*
  1865			
  1866			                           step through the branches, cur represents each
  1867			                           branch, noper is the first thing to be matched
  1868			                           as part of that branch and noper_next is the
  1869			                           regnext() of that node. if noper is an EXACT
  1870			                           and noper_next is the same as scan (our current
  1871			                           position in the regex) then the EXACT branch is
  1872			                           a possible optimization target. Once we have
  1873			                           two or more consequetive such branches we can
  1874			                           create a trie of the EXACT's contents and stich
  1875			                           it in place. If the sequence represents all of
  1876			                           the branches we eliminate the whole thing and
  1877			                           replace it with a single TRIE. If it is a
  1878			                           subsequence then we need to stitch it in. This
  1879			                           means the first branch has to remain, and needs
  1880			                           to be repointed at the item on the branch chain
  1881			                           following the last branch optimized. This could
  1882			                           be either a BRANCH, in which case the
  1883			                           subsequence is internal, or it could be the
  1884			                           item following the branch sequence in which
  1885			                           case the subsequence is at the end.
  1886			
  1887			                        */
  1888			
  1889			                        /* dont use tail as the end marker for this traverse */
  1890	      ######                            for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
  1891	      ######                                regnode *noper = NEXTOPER( cur );
  1892	      ######                                regnode *noper_next = regnext( noper );
  1893			
  1894			                            DEBUG_OPTIMISE_r({
  1895			                                regprop( mysv, cur);
  1896			                                PerlIO_printf( Perl_debug_log, "%*s%s",
  1897			                                   (int)depth * 2 + 2,"  ", SvPV_nolen_const( mysv ) );
  1898			
  1899			                                regprop( mysv, noper);
  1900			                                PerlIO_printf( Perl_debug_log, " -> %s",
  1901			                                    SvPV_nolen_const(mysv));
  1902			
  1903			                                if ( noper_next ) {
  1904			                                  regprop( mysv, noper_next );
  1905			                                  PerlIO_printf( Perl_debug_log,"\t=> %s\t",
  1906			                                    SvPV_nolen_const(mysv));
  1907			                                }
  1908			                                PerlIO_printf( Perl_debug_log, "0x%p,0x%p,0x%p)\n",
  1909			                                   first, last, cur );
  1910	      ######                                });
  1911	      ######                                if ( ( first ? OP( noper ) == optype
  1912			                                         : PL_regkind[ (U8)OP( noper ) ] == EXACT )
  1913			                                  && noper_next == tail && count<U16_MAX)
  1914			                            {
  1915	      ######                                    count++;
  1916	      ######                                    if ( !first ) {
  1917	      ######                                        first = cur;
  1918	      ######                                        optype = OP( noper );
  1919			                                } else {
  1920			                                    DEBUG_OPTIMISE_r(
  1921			                                        if (!last ) {
  1922			                                            regprop( mysv, first);
  1923			                                            PerlIO_printf( Perl_debug_log, "%*s%s",
  1924			                                              (int)depth * 2 + 2, "F:", SvPV_nolen_const( mysv ) );
  1925			                                            regprop( mysv, NEXTOPER(first) );
  1926			                                            PerlIO_printf( Perl_debug_log, " -> %s\n",
  1927			                                              SvPV_nolen_const( mysv ) );
  1928			                                        }
  1929	      ######                                        );
  1930	      ######                                        last = cur;
  1931			                                    DEBUG_OPTIMISE_r({
  1932			                                        regprop( mysv, cur);
  1933			                                        PerlIO_printf( Perl_debug_log, "%*s%s",
  1934			                                          (int)depth * 2 + 2, "N:", SvPV_nolen_const( mysv ) );
  1935			                                        regprop( mysv, noper );
  1936			                                        PerlIO_printf( Perl_debug_log, " -> %s\n",
  1937			                                          SvPV_nolen_const( mysv ) );
  1938	      ######                                        });
  1939			                                }
  1940			                            } else {
  1941	      ######                                    if ( last ) {
  1942			                                    DEBUG_OPTIMISE_r(
  1943			                                        PerlIO_printf( Perl_debug_log, "%*s%s\n",
  1944			                                            (int)depth * 2 + 2, "E:", "**END**" );
  1945	      ######                                        );
  1946	      ######                                        make_trie( pRExC_state, startbranch, first, cur, tail, optype );
  1947			                                }
  1948	      ######                                    if ( PL_regkind[ (U8)OP( noper ) ] == EXACT
  1949			                                     && noper_next == tail )
  1950			                                {
  1951	      ######                                        count = 1;
  1952	      ######                                        first = cur;
  1953	      ######                                        optype = OP( noper );
  1954			                                } else {
  1955	      ######                                        count = 0;
  1956	      ######                                        first = NULL;
  1957	      ######                                        optype = 0;
  1958			                                }
  1959	      ######                                    last = NULL;
  1960			                            }
  1961			                        }
  1962			                        DEBUG_OPTIMISE_r({
  1963			                            regprop( mysv, cur);
  1964			                            PerlIO_printf( Perl_debug_log,
  1965			                              "%*s%s\t(0x%p,0x%p,0x%p)\n", (int)depth * 2 + 2,
  1966			                              "  ", SvPV_nolen_const( mysv ), first, last, cur);
  1967			
  1968	      ######                            });
  1969	      ######                            if ( last ) {
  1970			                            DEBUG_OPTIMISE_r(
  1971			                                PerlIO_printf( Perl_debug_log, "%*s%s\n",
  1972			                                    (int)depth * 2 + 2, "E:", "==END==" );
  1973	      ######                                );
  1974	      ######                                make_trie( pRExC_state, startbranch, first, scan, tail, optype );
  1975			                        }
  1976			                    }
  1977			                }
  1978				    }
  1979	      ######    	    else if ( code == BRANCHJ ) {  /* single branch is optimized. */
  1980	      ######    		scan = NEXTOPER(NEXTOPER(scan));
  1981				    } else			/* single branch is optimized. */
  1982	      ######    		scan = NEXTOPER(scan);
  1983	      ######    	    continue;
  1984				}
  1985	      ######    	else if (OP(scan) == EXACT) {
  1986	      ######    	    I32 l = STR_LEN(scan);
  1987	      ######    	    UV uc = *((U8*)STRING(scan));
  1988	      ######    	    if (UTF) {
  1989	      ######    		const U8 * const s = (U8*)STRING(scan);
  1990	      ######    		l = utf8_length(s, s + l);
  1991	      ######    		uc = utf8_to_uvchr(s, NULL);
  1992				    }
  1993	      ######    	    min += l;
  1994	      ######    	    if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
  1995					/* The code below prefers earlier match for fixed
  1996					   offset, later match for variable offset.  */
  1997	      ######    		if (data->last_end == -1) { /* Update the start info. */
  1998	      ######    		    data->last_start_min = data->pos_min;
  1999	      ######     		    data->last_start_max = is_inf
  2000			 			? I32_MAX : data->pos_min + data->pos_delta;
  2001					}
  2002	      ######    		sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
  2003					{
  2004	      ######    		    SV * sv = data->last_found;
  2005	      ######    		    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
  2006	      ######    			mg_find(sv, PERL_MAGIC_utf8) : NULL;
  2007	      ######    		    if (mg && mg->mg_len >= 0)
  2008	      ######    			mg->mg_len += utf8_length((U8*)STRING(scan),
  2009									  (U8*)STRING(scan)+STR_LEN(scan));
  2010					}
  2011	      ######    		if (UTF)
  2012	      ######    		    SvUTF8_on(data->last_found);
  2013	      ######    		data->last_end = data->pos_min + l;
  2014	      ######    		data->pos_min += l; /* As in the first entry. */
  2015	      ######    		data->flags &= ~SF_BEFORE_EOL;
  2016				    }
  2017	      ######    	    if (flags & SCF_DO_STCLASS_AND) {
  2018					/* Check whether it is compatible with what we know already! */
  2019	      ######    		int compat = 1;
  2020			
  2021	      ######    		if (uc >= 0x100 ||
  2022					    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
  2023					    && !ANYOF_BITMAP_TEST(data->start_class, uc)
  2024					    && (!(data->start_class->flags & ANYOF_FOLD)
  2025						|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
  2026			                    )
  2027	      ######    		    compat = 0;
  2028	      ######    		ANYOF_CLASS_ZERO(data->start_class);
  2029	      ######    		ANYOF_BITMAP_ZERO(data->start_class);
  2030	      ######    		if (compat)
  2031	      ######    		    ANYOF_BITMAP_SET(data->start_class, uc);
  2032	      ######    		data->start_class->flags &= ~ANYOF_EOS;
  2033	      ######    		if (uc < 0x100)
  2034	      ######    		  data->start_class->flags &= ~ANYOF_UNICODE_ALL;
  2035				    }
  2036	      ######    	    else if (flags & SCF_DO_STCLASS_OR) {
  2037					/* false positive possible if the class is case-folded */
  2038	      ######    		if (uc < 0x100)
  2039	      ######    		    ANYOF_BITMAP_SET(data->start_class, uc);
  2040					else
  2041	      ######    		    data->start_class->flags |= ANYOF_UNICODE_ALL;
  2042	      ######    		data->start_class->flags &= ~ANYOF_EOS;
  2043	      ######    		cl_and(data->start_class, &and_with);
  2044				    }
  2045	      ######    	    flags &= ~SCF_DO_STCLASS;
  2046				}
  2047	      ######    	else if (PL_regkind[(U8)OP(scan)] == EXACT) { /* But OP != EXACT! */
  2048	      ######    	    I32 l = STR_LEN(scan);
  2049	      ######    	    UV uc = *((U8*)STRING(scan));
  2050			
  2051				    /* Search for fixed substrings supports EXACT only. */
  2052	      ######    	    if (flags & SCF_DO_SUBSTR)
  2053	      ######    		scan_commit(pRExC_state, data);
  2054	      ######    	    if (UTF) {
  2055	      ######    		U8 *s = (U8 *)STRING(scan);
  2056	      ######    		l = utf8_length(s, s + l);
  2057	      ######    		uc = utf8_to_uvchr(s, NULL);
  2058				    }
  2059	      ######    	    min += l;
  2060	      ######    	    if (data && (flags & SCF_DO_SUBSTR))
  2061	      ######    		data->pos_min += l;
  2062	      ######    	    if (flags & SCF_DO_STCLASS_AND) {
  2063					/* Check whether it is compatible with what we know already! */
  2064	      ######    		int compat = 1;
  2065			
  2066	      ######    		if (uc >= 0x100 ||
  2067					    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
  2068					    && !ANYOF_BITMAP_TEST(data->start_class, uc)
  2069					     && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
  2070	      ######    		    compat = 0;
  2071	      ######    		ANYOF_CLASS_ZERO(data->start_class);
  2072	      ######    		ANYOF_BITMAP_ZERO(data->start_class);
  2073	      ######    		if (compat) {
  2074	      ######    		    ANYOF_BITMAP_SET(data->start_class, uc);
  2075	      ######    		    data->start_class->flags &= ~ANYOF_EOS;
  2076	      ######    		    data->start_class->flags |= ANYOF_FOLD;
  2077	      ######    		    if (OP(scan) == EXACTFL)
  2078	      ######    			data->start_class->flags |= ANYOF_LOCALE;
  2079					}
  2080				    }
  2081	      ######    	    else if (flags & SCF_DO_STCLASS_OR) {
  2082	      ######    		if (data->start_class->flags & ANYOF_FOLD) {
  2083					    /* false positive possible if the class is case-folded.
  2084					       Assume that the locale settings are the same... */
  2085	      ######    		    if (uc < 0x100)
  2086	      ######    			ANYOF_BITMAP_SET(data->start_class, uc);
  2087	      ######    		    data->start_class->flags &= ~ANYOF_EOS;
  2088					}
  2089	      ######    		cl_and(data->start_class, &and_with);
  2090				    }
  2091	      ######    	    flags &= ~SCF_DO_STCLASS;
  2092				}
  2093	      ######    	else if (strchr((const char*)PL_varies,OP(scan))) {
  2094	      ######    	    I32 mincount, maxcount, minnext, deltanext, fl = 0;
  2095	      ######    	    I32 f = flags, pos_before = 0;
  2096	      ######    	    regnode *oscan = scan;
  2097	      ######    	    struct regnode_charclass_class this_class;
  2098	      ######    	    struct regnode_charclass_class *oclass = NULL;
  2099	      ######    	    I32 next_is_eval = 0;
  2100			
  2101	      ######    	    switch (PL_regkind[(U8)OP(scan)]) {
  2102				    case WHILEM:		/* End of (?:...)* . */
  2103	      ######    		scan = NEXTOPER(scan);
  2104	      ######    		goto finish;
  2105				    case PLUS:
  2106	      ######    		if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
  2107	      ######    		    next = NEXTOPER(scan);
  2108	      ######    		    if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) {
  2109	      ######    			mincount = 1;
  2110	      ######    			maxcount = REG_INFTY;
  2111	      ######    			next = regnext(scan);
  2112	      ######    			scan = NEXTOPER(scan);
  2113	      ######    			goto do_curly;
  2114					    }
  2115					}
  2116	      ######    		if (flags & SCF_DO_SUBSTR)
  2117	      ######    		    data->pos_min++;
  2118	      ######    		min++;
  2119					/* Fall through. */
  2120				    case STAR:
  2121	      ######    		if (flags & SCF_DO_STCLASS) {
  2122	      ######    		    mincount = 0;
  2123	      ######    		    maxcount = REG_INFTY;
  2124	      ######    		    next = regnext(scan);
  2125	      ######    		    scan = NEXTOPER(scan);
  2126	      ######    		    goto do_curly;
  2127					}
  2128	      ######    		is_inf = is_inf_internal = 1;
  2129	      ######    		scan = regnext(scan);
  2130	      ######    		if (flags & SCF_DO_SUBSTR) {
  2131	      ######    		    scan_commit(pRExC_state, data); /* Cannot extend fixed substrings */
  2132	      ######    		    data->longest = &(data->longest_float);
  2133					}
  2134	      ######    		goto optimize_curly_tail;
  2135				    case CURLY:
  2136	      ######    		mincount = ARG1(scan);
  2137	      ######    		maxcount = ARG2(scan);
  2138	      ######    		next = regnext(scan);
  2139	      ######    		if (OP(scan) == CURLYX) {
  2140	      ######    		    I32 lp = (data ? *(data->last_closep) : 0);
  2141	      ######    		    scan->flags = ((lp <= U8_MAX) ? (U8)lp : U8_MAX);
  2142					}
  2143	      ######    		scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
  2144	      ######    		next_is_eval = (OP(scan) == EVAL);
  2145				      do_curly:
  2146	      ######    		if (flags & SCF_DO_SUBSTR) {
  2147	      ######    		    if (mincount == 0) scan_commit(pRExC_state,data); /* Cannot extend fixed substrings */
  2148	      ######    		    pos_before = data->pos_min;
  2149					}
  2150	      ######    		if (data) {
  2151	      ######    		    fl = data->flags;
  2152	      ######    		    data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
  2153	      ######    		    if (is_inf)
  2154	      ######    			data->flags |= SF_IS_INF;
  2155					}
  2156	      ######    		if (flags & SCF_DO_STCLASS) {
  2157	      ######    		    cl_init(pRExC_state, &this_class);
  2158	      ######    		    oclass = data->start_class;
  2159	      ######    		    data->start_class = &this_class;
  2160	      ######    		    f |= SCF_DO_STCLASS_AND;
  2161	      ######    		    f &= ~SCF_DO_STCLASS_OR;
  2162					}
  2163					/* These are the cases when once a subexpression
  2164					   fails at a particular position, it cannot succeed
  2165					   even after backtracking at the enclosing scope.
  2166					
  2167					   XXXX what if minimal match and we are at the
  2168					        initial run of {n,m}? */
  2169	      ######    		if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
  2170	      ######    		    f &= ~SCF_WHILEM_VISITED_POS;
  2171			
  2172					/* This will finish on WHILEM, setting scan, or on NULL: */
  2173	      ######    		minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data,
  2174							      (mincount == 0
  2175								? (f & ~SCF_DO_SUBSTR) : f),depth+1);
  2176			
  2177	      ######    		if (flags & SCF_DO_STCLASS)
  2178	      ######    		    data->start_class = oclass;
  2179	      ######    		if (mincount == 0 || minnext == 0) {
  2180	      ######    		    if (flags & SCF_DO_STCLASS_OR) {
  2181	      ######    			cl_or(pRExC_state, data->start_class, &this_class);
  2182					    }
  2183	      ######    		    else if (flags & SCF_DO_STCLASS_AND) {
  2184						/* Switch to OR mode: cache the old value of
  2185						 * data->start_class */
  2186						StructCopy(data->start_class, &and_with,
  2187	      ######    				   struct regnode_charclass_class);
  2188	      ######    			flags &= ~SCF_DO_STCLASS_AND;
  2189						StructCopy(&this_class, data->start_class,
  2190	      ######    				   struct regnode_charclass_class);
  2191	      ######    			flags |= SCF_DO_STCLASS_OR;
  2192	      ######    			data->start_class->flags |= ANYOF_EOS;
  2193					    }
  2194					} else {		/* Non-zero len */
  2195	      ######    		    if (flags & SCF_DO_STCLASS_OR) {
  2196	      ######    			cl_or(pRExC_state, data->start_class, &this_class);
  2197	      ######    			cl_and(data->start_class, &and_with);
  2198					    }
  2199	      ######    		    else if (flags & SCF_DO_STCLASS_AND)
  2200	      ######    			cl_and(data->start_class, &this_class);
  2201	      ######    		    flags &= ~SCF_DO_STCLASS;
  2202					}
  2203	      ######    		if (!scan) 		/* It was not CURLYX, but CURLY. */
  2204	      ######    		    scan = next;
  2205	      ######    		if (ckWARN(WARN_REGEXP)
  2206					       /* ? quantifier ok, except for (?{ ... }) */
  2207					    && (next_is_eval || !(mincount == 0 && maxcount == 1))
  2208					    && (minnext == 0) && (deltanext == 0)
  2209					    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
  2210					    && maxcount <= REG_INFTY/3) /* Complement check for big count */
  2211					{
  2212					    vWARN(RExC_parse,
  2213	      ######    			  "Quantifier unexpected on zero-length expression");
  2214					}
  2215			
  2216	      ######    		min += minnext * mincount;
  2217	      ######    		is_inf_internal |= ((maxcount == REG_INFTY
  2218							     && (minnext + deltanext) > 0)
  2219							    || deltanext == I32_MAX);
  2220	      ######    		is_inf |= is_inf_internal;
  2221	      ######    		delta += (minnext + deltanext) * maxcount - minnext * mincount;
  2222			
  2223					/* Try powerful optimization CURLYX => CURLYN. */
  2224	      ######    		if (  OP(oscan) == CURLYX && data
  2225					      && data->flags & SF_IN_PAR
  2226					      && !(data->flags & SF_HAS_EVAL)
  2227					      && !deltanext && minnext == 1 ) {
  2228					    /* Try to optimize to CURLYN.  */
  2229	      ######    		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
  2230	      ######    		    regnode *nxt1 = nxt;
  2231			#ifdef DEBUGGING
  2232	      ######    		    regnode *nxt2;
  2233			#endif
  2234			
  2235					    /* Skip open. */
  2236	      ######    		    nxt = regnext(nxt);
  2237	      ######    		    if (!strchr((const char*)PL_simple,OP(nxt))
  2238						&& !(PL_regkind[(U8)OP(nxt)] == EXACT
  2239						     && STR_LEN(nxt) == 1))
  2240	      ######    			goto nogo;
  2241			#ifdef DEBUGGING
  2242	      ######    		    nxt2 = nxt;
  2243			#endif
  2244	      ######    		    nxt = regnext(nxt);
  2245	      ######    		    if (OP(nxt) != CLOSE)
  2246	      ######    			goto nogo;
  2247					    /* Now we know that nxt2 is the only contents: */
  2248	      ######    		    oscan->flags = (U8)ARG(nxt);
  2249	      ######    		    OP(oscan) = CURLYN;
  2250	      ######    		    OP(nxt1) = NOTHING;	/* was OPEN. */
  2251			#ifdef DEBUGGING
  2252	      ######    		    OP(nxt1 + 1) = OPTIMIZED; /* was count. */
  2253	      ######    		    NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
  2254	      ######    		    NEXT_OFF(nxt2) = 0;	/* just for consistancy with CURLY. */
  2255	      ######    		    OP(nxt) = OPTIMIZED;	/* was CLOSE. */
  2256	      ######    		    OP(nxt + 1) = OPTIMIZED; /* was count. */
  2257	      ######    		    NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
  2258			#endif
  2259					}
  2260				      nogo:
  2261			
  2262					/* Try optimization CURLYX => CURLYM. */
  2263	      ######    		if (  OP(oscan) == CURLYX && data
  2264					      && !(data->flags & SF_HAS_PAR)
  2265					      && !(data->flags & SF_HAS_EVAL)
  2266					      && !deltanext	/* atom is fixed width */
  2267					      && minnext != 0	/* CURLYM can't handle zero width */
  2268					) {
  2269					    /* XXXX How to optimize if data == 0? */
  2270					    /* Optimize to a simpler form.  */
  2271	      ######    		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
  2272	      ######    		    regnode *nxt2;
  2273			
  2274	      ######    		    OP(oscan) = CURLYM;
  2275	      ######    		    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
  2276						    && (OP(nxt2) != WHILEM))
  2277	      ######    			nxt = nxt2;
  2278	      ######    		    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
  2279					    /* Need to optimize away parenths. */
  2280	      ######    		    if (data->flags & SF_IN_PAR) {
  2281						/* Set the parenth number.  */
  2282	      ######    			regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
  2283			
  2284	      ######    			if (OP(nxt) != CLOSE)
  2285	      ######    			    FAIL("Panic opt close");
  2286	      ######    			oscan->flags = (U8)ARG(nxt);
  2287	      ######    			OP(nxt1) = OPTIMIZED;	/* was OPEN. */
  2288	      ######    			OP(nxt) = OPTIMIZED;	/* was CLOSE. */
  2289			#ifdef DEBUGGING
  2290	      ######    			OP(nxt1 + 1) = OPTIMIZED; /* was count. */
  2291	      ######    			OP(nxt + 1) = OPTIMIZED; /* was count. */
  2292	      ######    			NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
  2293	      ######    			NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
  2294			#endif
  2295			#if 0
  2296						while ( nxt1 && (OP(nxt1) != WHILEM)) {
  2297						    regnode *nnxt = regnext(nxt1);
  2298						
  2299						    if (nnxt == nxt) {
  2300							if (reg_off_by_arg[OP(nxt1)])
  2301							    ARG_SET(nxt1, nxt2 - nxt1);
  2302							else if (nxt2 - nxt1 < U16_MAX)
  2303							    NEXT_OFF(nxt1) = nxt2 - nxt1;
  2304							else
  2305							    OP(nxt) = NOTHING;	/* Cannot beautify */
  2306						    }
  2307						    nxt1 = nnxt;
  2308						}
  2309			#endif
  2310						/* Optimize again: */
  2311	      ######    			study_chunk(pRExC_state, &nxt1, &deltanext, nxt,
  2312							    NULL, 0,depth+1);
  2313					    }
  2314					    else
  2315	      ######    			oscan->flags = 0;
  2316					}
  2317	      ######    		else if ((OP(oscan) == CURLYX)
  2318						 && (flags & SCF_WHILEM_VISITED_POS)
  2319						 /* See the comment on a similar expression above.
  2320						    However, this time it not a subexpression
  2321						    we care about, but the expression itself. */
  2322						 && (maxcount == REG_INFTY)
  2323						 && data && ++data->whilem_c < 16) {
  2324					    /* This stays as CURLYX, we can put the count/of pair. */
  2325					    /* Find WHILEM (as in regexec.c) */
  2326	      ######    		    regnode *nxt = oscan + NEXT_OFF(oscan);
  2327			
  2328	      ######    		    if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
  2329	      ######    			nxt += ARG(nxt);
  2330	      ######    		    PREVOPER(nxt)->flags = (U8)(data->whilem_c
  2331						| (RExC_whilem_seen << 4)); /* On WHILEM */
  2332					}
  2333	      ######    		if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
  2334	      ######    		    pars++;
  2335	      ######    		if (flags & SCF_DO_SUBSTR) {
  2336	      ######    		    SV *last_str = Nullsv;
  2337	      ######    		    int counted = mincount != 0;
  2338			
  2339	      ######    		    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
  2340			#if defined(SPARC64_GCC_WORKAROUND)
  2341						I32 b = 0;
  2342						STRLEN l = 0;
  2343						const char *s = NULL;
  2344						I32 old = 0;
  2345			
  2346						if (pos_before >= data->last_start_min)
  2347						    b = pos_before;
  2348						else
  2349						    b = data->last_start_min;
  2350			
  2351						l = 0;
  2352						s = SvPV_const(data->last_found, l);
  2353						old = b - data->last_start_min;
  2354			
  2355			#else
  2356	      ######    			I32 b = pos_before >= data->last_start_min
  2357	      ######    			    ? pos_before : data->last_start_min;
  2358	      ######    			STRLEN l;
  2359	      ######    			const char *s = SvPV_const(data->last_found, l);
  2360	      ######    			I32 old = b - data->last_start_min;
  2361			#endif
  2362			
  2363	      ######    			if (UTF)
  2364	      ######    			    old = utf8_hop((U8*)s, old) - (U8*)s;
  2365						
  2366	      ######    			l -= old;
  2367						/* Get the added string: */
  2368	      ######    			last_str = newSVpvn(s  + old, l);
  2369	      ######    			if (UTF)
  2370	      ######    			    SvUTF8_on(last_str);
  2371	      ######    			if (deltanext == 0 && pos_before == b) {
  2372						    /* What was added is a constant string */
  2373	      ######    			    if (mincount > 1) {
  2374	      ######    				SvGROW(last_str, (mincount * l) + 1);
  2375	      ######    				repeatcpy(SvPVX(last_str) + l,
  2376								  SvPVX_const(last_str), l, mincount - 1);
  2377	      ######    				SvCUR_set(last_str, SvCUR(last_str) * mincount);
  2378							/* Add additional parts. */
  2379							SvCUR_set(data->last_found,
  2380	      ######    					  SvCUR(data->last_found) - l);
  2381	      ######    				sv_catsv(data->last_found, last_str);
  2382							{
  2383	      ######    				    SV * sv = data->last_found;
  2384	      ######    				    MAGIC *mg =
  2385								SvUTF8(sv) && SvMAGICAL(sv) ?
  2386	      ######    					mg_find(sv, PERL_MAGIC_utf8) : NULL;
  2387	      ######    				    if (mg && mg->mg_len >= 0)
  2388	      ######    					mg->mg_len += CHR_SVLEN(last_str);
  2389							}
  2390	      ######    				data->last_end += l * (mincount - 1);
  2391						    }
  2392						} else {
  2393						    /* start offset must point into the last copy */
  2394	      ######    			    data->last_start_min += minnext * (mincount - 1);
  2395	      ######    			    data->last_start_max += is_inf ? I32_MAX
  2396							: (maxcount - 1) * (minnext + data->pos_delta);
  2397						}
  2398					    }
  2399					    /* It is counted once already... */
  2400	      ######    		    data->pos_min += minnext * (mincount - counted);
  2401	      ######    		    data->pos_delta += - counted * deltanext +
  2402						(minnext + deltanext) * maxcount - minnext * mincount;
  2403	      ######    		    if (mincount != maxcount) {
  2404						 /* Cannot extend fixed substrings found inside
  2405						    the group.  */
  2406	      ######    			scan_commit(pRExC_state,data);
  2407	      ######    			if (mincount && last_str) {
  2408	      ######    			    sv_setsv(data->last_found, last_str);
  2409	      ######    			    data->last_end = data->pos_min;
  2410	      ######    			    data->last_start_min =
  2411							data->pos_min - CHR_SVLEN(last_str);
  2412	      ######    			    data->last_start_max = is_inf
  2413							? I32_MAX
  2414							: data->pos_min + data->pos_delta
  2415							- CHR_SVLEN(last_str);
  2416						}
  2417	      ######    			data->longest = &(data->longest_float);
  2418					    }
  2419	      ######    		    SvREFCNT_dec(last_str);
  2420					}
  2421	      ######    		if (data && (fl & SF_HAS_EVAL))
  2422	      ######    		    data->flags |= SF_HAS_EVAL;
  2423				      optimize_curly_tail:
  2424	      ######    		if (OP(oscan) != CURLYX) {
  2425	      ######    		    while (PL_regkind[(U8)OP(next = regnext(oscan))] == NOTHING
  2426						   && NEXT_OFF(next))
  2427	      ######    			NEXT_OFF(oscan) += NEXT_OFF(next);
  2428					}
  2429	      ######    		continue;
  2430				    default:			/* REF and CLUMP only? */
  2431	      ######    		if (flags & SCF_DO_SUBSTR) {
  2432	      ######    		    scan_commit(pRExC_state,data);	/* Cannot expect anything... */
  2433	      ######    		    data->longest = &(data->longest_float);
  2434					}
  2435	      ######    		is_inf = is_inf_internal = 1;
  2436	      ######    		if (flags & SCF_DO_STCLASS_OR)
  2437	      ######    		    cl_anything(pRExC_state, data->start_class);
  2438	      ######    		flags &= ~SCF_DO_STCLASS;
  2439	      ######    		break;
  2440				    }
  2441				}
  2442	      ######    	else if (strchr((const char*)PL_simple,OP(scan))) {
  2443	      ######    	    int value = 0;
  2444			
  2445	      ######    	    if (flags & SCF_DO_SUBSTR) {
  2446	      ######    		scan_commit(pRExC_state,data);
  2447	      ######    		data->pos_min++;
  2448				    }
  2449	      ######    	    min++;
  2450	      ######    	    if (flags & SCF_DO_STCLASS) {
  2451	      ######    		data->start_class->flags &= ~ANYOF_EOS;	/* No match on empty */
  2452			
  2453					/* Some of the logic below assumes that switching
  2454					   locale on will only add false positives. */
  2455	      ######    		switch (PL_regkind[(U8)OP(scan)]) {
  2456					case SANY:
  2457					default:
  2458					  do_default:
  2459					    /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
  2460	      ######    		    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
  2461	      ######    			cl_anything(pRExC_state, data->start_class);
  2462	      ######    		    break;
  2463					case REG_ANY:
  2464	      ######    		    if (OP(scan) == SANY)
  2465	      ######    			goto do_default;
  2466	      ######    		    if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
  2467	      ######    			value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
  2468							 || (data->start_class->flags & ANYOF_CLASS));
  2469	      ######    			cl_anything(pRExC_state, data->start_class);
  2470					    }
  2471	      ######    		    if (flags & SCF_DO_STCLASS_AND || !value)
  2472	      ######    			ANYOF_BITMAP_CLEAR(data->start_class,'\n');
  2473	      ######    		    break;
  2474					case ANYOF:
  2475	      ######    		    if (flags & SCF_DO_STCLASS_AND)
  2476	      ######    			cl_and(data->start_class,
  2477						       (struct regnode_charclass_class*)scan);
  2478					    else
  2479	      ######    			cl_or(pRExC_state, data->start_class,
  2480						      (struct regnode_charclass_class*)scan);
  2481	      ######    		    break;
  2482					case ALNUM:
  2483	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2484	      ######    			if (!(data->start_class->flags & ANYOF_LOCALE)) {
  2485	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
  2486	      ######    			    for (value = 0; value < 256; value++)
  2487	      ######    				if (!isALNUM(value))
  2488	      ######    				    ANYOF_BITMAP_CLEAR(data->start_class, value);
  2489						}
  2490					    }
  2491					    else {
  2492	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2493	      ######    			    ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
  2494						else {
  2495	      ######    			    for (value = 0; value < 256; value++)
  2496	      ######    				if (isALNUM(value))
  2497	      ######    				    ANYOF_BITMAP_SET(data->start_class, value);			
  2498						}
  2499					    }
  2500	      ######    		    break;
  2501					case ALNUML:
  2502	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2503	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2504	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
  2505					    }
  2506					    else {
  2507	      ######    			ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
  2508	      ######    			data->start_class->flags |= ANYOF_LOCALE;
  2509					    }
  2510	      ######    		    break;
  2511					case NALNUM:
  2512	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2513	      ######    			if (!(data->start_class->flags & ANYOF_LOCALE)) {
  2514	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
  2515	      ######    			    for (value = 0; value < 256; value++)
  2516	      ######    				if (isALNUM(value))
  2517	      ######    				    ANYOF_BITMAP_CLEAR(data->start_class, value);
  2518						}
  2519					    }
  2520					    else {
  2521	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2522	      ######    			    ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
  2523						else {
  2524	      ######    			    for (value = 0; value < 256; value++)
  2525	      ######    				if (!isALNUM(value))
  2526	      ######    				    ANYOF_BITMAP_SET(data->start_class, value);			
  2527						}
  2528					    }
  2529	      ######    		    break;
  2530					case NALNUML:
  2531	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2532	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2533	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
  2534					    }
  2535					    else {
  2536	      ######    			data->start_class->flags |= ANYOF_LOCALE;
  2537	      ######    			ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
  2538					    }
  2539	      ######    		    break;
  2540					case SPACE:
  2541	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2542	      ######    			if (!(data->start_class->flags & ANYOF_LOCALE)) {
  2543	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
  2544	      ######    			    for (value = 0; value < 256; value++)
  2545	      ######    				if (!isSPACE(value))
  2546	      ######    				    ANYOF_BITMAP_CLEAR(data->start_class, value);
  2547						}
  2548					    }
  2549					    else {
  2550	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2551	      ######    			    ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
  2552						else {
  2553	      ######    			    for (value = 0; value < 256; value++)
  2554	      ######    				if (isSPACE(value))
  2555	      ######    				    ANYOF_BITMAP_SET(data->start_class, value);			
  2556						}
  2557					    }
  2558	      ######    		    break;
  2559					case SPACEL:
  2560	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2561	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2562	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
  2563					    }
  2564					    else {
  2565	      ######    			data->start_class->flags |= ANYOF_LOCALE;
  2566	      ######    			ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
  2567					    }
  2568	      ######    		    break;
  2569					case NSPACE:
  2570	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2571	      ######    			if (!(data->start_class->flags & ANYOF_LOCALE)) {
  2572	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
  2573	      ######    			    for (value = 0; value < 256; value++)
  2574	      ######    				if (isSPACE(value))
  2575	      ######    				    ANYOF_BITMAP_CLEAR(data->start_class, value);
  2576						}
  2577					    }
  2578					    else {
  2579	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2580	      ######    			    ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
  2581						else {
  2582	      ######    			    for (value = 0; value < 256; value++)
  2583	      ######    				if (!isSPACE(value))
  2584	      ######    				    ANYOF_BITMAP_SET(data->start_class, value);			
  2585						}
  2586					    }
  2587	      ######    		    break;
  2588					case NSPACEL:
  2589	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2590	      ######    			if (data->start_class->flags & ANYOF_LOCALE) {
  2591	      ######    			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
  2592	      ######    			    for (value = 0; value < 256; value++)
  2593	      ######    				if (!isSPACE(value))
  2594	      ######    				    ANYOF_BITMAP_CLEAR(data->start_class, value);
  2595						}
  2596					    }
  2597					    else {
  2598	      ######    			data->start_class->flags |= ANYOF_LOCALE;
  2599	      ######    			ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
  2600					    }
  2601	      ######    		    break;
  2602					case DIGIT:
  2603	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2604	      ######    			ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT);
  2605	      ######    			for (value = 0; value < 256; value++)
  2606	      ######    			    if (!isDIGIT(value))
  2607	      ######    				ANYOF_BITMAP_CLEAR(data->start_class, value);
  2608					    }
  2609					    else {
  2610	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2611	      ######    			    ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
  2612						else {
  2613	      ######    			    for (value = 0; value < 256; value++)
  2614	      ######    				if (isDIGIT(value))
  2615	      ######    				    ANYOF_BITMAP_SET(data->start_class, value);			
  2616						}
  2617					    }
  2618	      ######    		    break;
  2619					case NDIGIT:
  2620	      ######    		    if (flags & SCF_DO_STCLASS_AND) {
  2621	      ######    			ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT);
  2622	      ######    			for (value = 0; value < 256; value++)
  2623	      ######    			    if (isDIGIT(value))
  2624	      ######    				ANYOF_BITMAP_CLEAR(data->start_class, value);
  2625					    }
  2626					    else {
  2627	      ######    			if (data->start_class->flags & ANYOF_LOCALE)
  2628	      ######    			    ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
  2629						else {
  2630	      ######    			    for (value = 0; value < 256; value++)
  2631	      ######    				if (!isDIGIT(value))
  2632	      ######    				    ANYOF_BITMAP_SET(data->start_class, value);			
  2633						}
  2634					    }
  2635	      ######    		    break;
  2636					}
  2637	      ######    		if (flags & SCF_DO_STCLASS_OR)
  2638	      ######    		    cl_and(data->start_class, &and_with);
  2639	      ######    		flags &= ~SCF_DO_STCLASS;
  2640				    }
  2641				}
  2642	      ######    	else if (PL_regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
  2643	      ######    	    data->flags |= (OP(scan) == MEOL
  2644						    ? SF_BEFORE_MEOL
  2645						    : SF_BEFORE_SEOL);
  2646				}
  2647	      ######    	else if (  PL_regkind[(U8)OP(scan)] == BRANCHJ
  2648					 /* Lookbehind, or need to calculate parens/evals/stclass: */
  2649					   && (scan->flags || data || (fl