     1			/*    toke.c
     2			 *
     3			 *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
     4			 *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
     5			 *
     6			 *    You may distribute under the terms of either the GNU General Public
     7			 *    License or the Artistic License, as specified in the README file.
     8			 *
     9			 */
    10			
    11			/*
    12			 *   "It all comes from here, the stench and the peril."  --Frodo
    13			 */
    14			
    15			/*
    16			 * This file is the lexer for Perl.  It's closely linked to the
    17			 * parser, perly.y.
    18			 *
    19			 * The main routine is yylex(), which returns the next token.
    20			 */
    21			
    22			#include "EXTERN.h"
    23			#define PERL_IN_TOKE_C
    24			#include "perl.h"
    25			
    26			#define yychar	(*PL_yycharp)
    27			#define yylval	(*PL_yylvalp)
    28			
    29			static const char ident_too_long[] =
    30			  "Identifier too long";
    31			static const char c_without_g[] =
    32			  "Use of /c modifier is meaningless without /g";
    33			static const char c_in_subst[] =
    34			  "Use of /c modifier is meaningless in s///";
    35			
    36			static void restore_rsfp(pTHX_ void *f);
    37			#ifndef PERL_NO_UTF16_FILTER
    38			static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
    39			static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
    40			#endif
    41			
    42			#define XFAKEBRACK 128
    43			#define XENUMMASK 127
    44			
    45			#ifdef USE_UTF8_SCRIPTS
    46			#   define UTF (!IN_BYTES)
    47			#else
    48			#   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
    49			#endif
    50			
    51			/* In variables named $^X, these are the legal values for X.
    52			 * 1999-02-27 mjd-perl-patch@plover.com */
    53			#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
    54			
    55			/* On MacOS, respect nonbreaking spaces */
    56			#ifdef MACOS_TRADITIONAL
    57			#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
    58			#else
    59			#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
    60			#endif
    61			
    62			/* LEX_* are values for PL_lex_state, the state of the lexer.
    63			 * They are arranged oddly so that the guard on the switch statement
    64			 * can get by with a single comparison (if the compiler is smart enough).
    65			 */
    66			
    67			/* #define LEX_NOTPARSING		11 is done in perl.h. */
    68			
    69			#define LEX_NORMAL		10
    70			#define LEX_INTERPNORMAL	 9
    71			#define LEX_INTERPCASEMOD	 8
    72			#define LEX_INTERPPUSH		 7
    73			#define LEX_INTERPSTART		 6
    74			#define LEX_INTERPEND		 5
    75			#define LEX_INTERPENDMAYBE	 4
    76			#define LEX_INTERPCONCAT	 3
    77			#define LEX_INTERPCONST		 2
    78			#define LEX_FORMLINE		 1
    79			#define LEX_KNOWNEXT		 0
    80			
    81			#ifdef DEBUGGING
    82			static const char* const lex_state_names[] = {
    83			    "KNOWNEXT",
    84			    "FORMLINE",
    85			    "INTERPCONST",
    86			    "INTERPCONCAT",
    87			    "INTERPENDMAYBE",
    88			    "INTERPEND",
    89			    "INTERPSTART",
    90			    "INTERPPUSH",
    91			    "INTERPCASEMOD",
    92			    "INTERPNORMAL",
    93			    "NORMAL"
    94			};
    95			#endif
    96			
    97			#ifdef ff_next
    98			#undef ff_next
    99			#endif
   100			
   101			#include "keywords.h"
   102			
   103			/* CLINE is a macro that ensures PL_copline has a sane value */
   104			
   105			#ifdef CLINE
   106			#undef CLINE
   107			#endif
   108			#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
   109			
   110			/*
   111			 * Convenience functions to return different tokens and prime the
   112			 * lexer for the next token.  They all take an argument.
   113			 *
   114			 * TOKEN        : generic token (used for '(', DOLSHARP, etc)
   115			 * OPERATOR     : generic operator
   116			 * AOPERATOR    : assignment operator
   117			 * PREBLOCK     : beginning the block after an if, while, foreach, ...
   118			 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
   119			 * PREREF       : *EXPR where EXPR is not a simple identifier
   120			 * TERM         : expression term
   121			 * LOOPX        : loop exiting command (goto, last, dump, etc)
   122			 * FTST         : file test operator
   123			 * FUN0         : zero-argument function
   124			 * FUN1         : not used, except for not, which isn't a UNIOP
   125			 * BOop         : bitwise or or xor
   126			 * BAop         : bitwise and
   127			 * SHop         : shift operator
   128			 * PWop         : power operator
   129			 * PMop         : pattern-matching operator
   130			 * Aop          : addition-level operator
   131			 * Mop          : multiplication-level operator
   132			 * Eop          : equality-testing operator
   133			 * Rop          : relational operator <= != gt
   134			 *
   135			 * Also see LOP and lop() below.
   136			 */
   137			
   138			#ifdef DEBUGGING /* Serve -DT. */
   139			#   define REPORT(retval) tokereport(s,(int)retval)
   140			#else
   141			#   define REPORT(retval) (retval)
   142			#endif
   143			
   144			#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
   145			#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
   146			#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
   147			#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
   148			#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
   149			#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
   150			#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
   151			#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
   152			#define FTST(f)  return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
   153			#define FUN0(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
   154			#define FUN1(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
   155			#define BOop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
   156			#define BAop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
   157			#define SHop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
   158			#define PWop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
   159			#define PMop(f)  return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
   160			#define Aop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
   161			#define Mop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
   162			#define Eop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
   163			#define Rop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
   164			
   165			/* This bit of chicanery makes a unary function followed by
   166			 * a parenthesis into a function with one argument, highest precedence.
   167			 * The UNIDOR macro is for unary functions that can be followed by the //
   168			 * operator (such as C<shift // 0>).
   169			 */
   170			#define UNI2(f,x) { \
   171				yylval.ival = f; \
   172				PL_expect = x; \
   173				PL_bufptr = s; \
   174				PL_last_uni = PL_oldbufptr; \
   175				PL_last_lop_op = f; \
   176				if (*s == '(') \
   177				    return REPORT( (int)FUNC1 ); \
   178				s = skipspace(s); \
   179				return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
   180				}
   181			#define UNI(f)    UNI2(f,XTERM)
   182			#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
   183			
   184			#define UNIBRACK(f) { \
   185				yylval.ival = f; \
   186				PL_bufptr = s; \
   187				PL_last_uni = PL_oldbufptr; \
   188				if (*s == '(') \
   189				    return REPORT( (int)FUNC1 ); \
   190				s = skipspace(s); \
   191				return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
   192				}
   193			
   194			/* grandfather return to old style */
   195			#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
   196			
   197			#ifdef DEBUGGING
   198			
   199			/* how to interpret the yylval associated with the token */
   200			enum token_type {
   201			    TOKENTYPE_NONE,
   202			    TOKENTYPE_IVAL,
   203			    TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
   204			    TOKENTYPE_PVAL,
   205			    TOKENTYPE_OPVAL,
   206			    TOKENTYPE_GVVAL
   207			};
   208			
   209			static struct debug_tokens { const int token, type; const char *name; }
   210			  const debug_tokens[] =
   211			{
   212			    { ADDOP,		TOKENTYPE_OPNUM,	"ADDOP" },
   213			    { ANDAND,		TOKENTYPE_NONE,		"ANDAND" },
   214			    { ANDOP,		TOKENTYPE_NONE,		"ANDOP" },
   215			    { ANONSUB,		TOKENTYPE_IVAL,		"ANONSUB" },
   216			    { ARROW,		TOKENTYPE_NONE,		"ARROW" },
   217			    { ASSIGNOP,		TOKENTYPE_OPNUM,	"ASSIGNOP" },
   218			    { BITANDOP,		TOKENTYPE_OPNUM,	"BITANDOP" },
   219			    { BITOROP,		TOKENTYPE_OPNUM,	"BITOROP" },
   220			    { COLONATTR,	TOKENTYPE_NONE,		"COLONATTR" },
   221			    { CONTINUE,		TOKENTYPE_NONE,		"CONTINUE" },
   222			    { DO,		TOKENTYPE_NONE,		"DO" },
   223			    { DOLSHARP,		TOKENTYPE_NONE,		"DOLSHARP" },
   224			    { DORDOR,		TOKENTYPE_NONE,		"DORDOR" },
   225			    { DOROP,		TOKENTYPE_OPNUM,	"DOROP" },
   226			    { DOTDOT,		TOKENTYPE_IVAL,		"DOTDOT" },
   227			    { ELSE,		TOKENTYPE_NONE,		"ELSE" },
   228			    { ELSIF,		TOKENTYPE_IVAL,		"ELSIF" },
   229			    { EQOP,		TOKENTYPE_OPNUM,	"EQOP" },
   230			    { FOR,		TOKENTYPE_IVAL,		"FOR" },
   231			    { FORMAT,		TOKENTYPE_NONE,		"FORMAT" },
   232			    { FUNC,		TOKENTYPE_OPNUM,	"FUNC" },
   233			    { FUNC0,		TOKENTYPE_OPNUM,	"FUNC0" },
   234			    { FUNC0SUB,		TOKENTYPE_OPVAL,	"FUNC0SUB" },
   235			    { FUNC1,		TOKENTYPE_OPNUM,	"FUNC1" },
   236			    { FUNCMETH,		TOKENTYPE_OPVAL,	"FUNCMETH" },
   237			    { HASHBRACK,	TOKENTYPE_NONE,		"HASHBRACK" },
   238			    { IF,		TOKENTYPE_IVAL,		"IF" },
   239			    { LABEL,		TOKENTYPE_PVAL,		"LABEL" },
   240			    { LOCAL,		TOKENTYPE_IVAL,		"LOCAL" },
   241			    { LOOPEX,		TOKENTYPE_OPNUM,	"LOOPEX" },
   242			    { LSTOP,		TOKENTYPE_OPNUM,	"LSTOP" },
   243			    { LSTOPSUB,		TOKENTYPE_OPVAL,	"LSTOPSUB" },
   244			    { MATCHOP,		TOKENTYPE_OPNUM,	"MATCHOP" },
   245			    { METHOD,		TOKENTYPE_OPVAL,	"METHOD" },
   246			    { MULOP,		TOKENTYPE_OPNUM,	"MULOP" },
   247			    { MY,		TOKENTYPE_IVAL,		"MY" },
   248			    { MYSUB,		TOKENTYPE_NONE,		"MYSUB" },
   249			    { NOAMP,		TOKENTYPE_NONE,		"NOAMP" },
   250			    { NOTOP,		TOKENTYPE_NONE,		"NOTOP" },
   251			    { OROP,		TOKENTYPE_IVAL,		"OROP" },
   252			    { OROR,		TOKENTYPE_NONE,		"OROR" },
   253			    { PACKAGE,		TOKENTYPE_NONE,		"PACKAGE" },
   254			    { PMFUNC,		TOKENTYPE_OPVAL,	"PMFUNC" },
   255			    { POSTDEC,		TOKENTYPE_NONE,		"POSTDEC" },
   256			    { POSTINC,		TOKENTYPE_NONE,		"POSTINC" },
   257			    { POWOP,		TOKENTYPE_OPNUM,	"POWOP" },
   258			    { PREDEC,		TOKENTYPE_NONE,		"PREDEC" },
   259			    { PREINC,		TOKENTYPE_NONE,		"PREINC" },
   260			    { PRIVATEREF,	TOKENTYPE_OPVAL,	"PRIVATEREF" },
   261			    { REFGEN,		TOKENTYPE_NONE,		"REFGEN" },
   262			    { RELOP,		TOKENTYPE_OPNUM,	"RELOP" },
   263			    { SHIFTOP,		TOKENTYPE_OPNUM,	"SHIFTOP" },
   264			    { SUB,		TOKENTYPE_NONE,		"SUB" },
   265			    { THING,		TOKENTYPE_OPVAL,	"THING" },
   266			    { UMINUS,		TOKENTYPE_NONE,		"UMINUS" },
   267			    { UNIOP,		TOKENTYPE_OPNUM,	"UNIOP" },
   268			    { UNIOPSUB,		TOKENTYPE_OPVAL,	"UNIOPSUB" },
   269			    { UNLESS,		TOKENTYPE_IVAL,		"UNLESS" },
   270			    { UNTIL,		TOKENTYPE_IVAL,		"UNTIL" },
   271			    { USE,		TOKENTYPE_IVAL,		"USE" },
   272			    { WHILE,		TOKENTYPE_IVAL,		"WHILE" },
   273			    { WORD,		TOKENTYPE_OPVAL,	"WORD" },
   274			    { 0,		TOKENTYPE_NONE,		0 }
   275			};
   276			
   277			/* dump the returned token in rv, plus any optional arg in yylval */
   278			
   279			STATIC int
   280			S_tokereport(pTHX_ const char* s, I32 rv)
   281	    38799749    {
   282	    38799749        if (DEBUG_T_TEST) {
   283	      ######    	const char *name = Nullch;
   284	      ######    	enum token_type type = TOKENTYPE_NONE;
   285	      ######    	const struct debug_tokens *p;
   286	      ######    	SV* const report = newSVpvn("<== ", 4);
   287			
   288	      ######    	for (p = debug_tokens; p->token; p++) {
   289	      ######    	    if (p->token == (int)rv) {
   290	      ######    		name = p->name;
   291	      ######    		type = p->type;
   292	      ######    		break;
   293				    }
   294				}
   295	      ######    	if (name)
   296	      ######    	    Perl_sv_catpv(aTHX_ report, name);
   297	      ######    	else if ((char)rv > ' ' && (char)rv < '~')
   298	      ######    	    Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
   299	      ######    	else if (!rv)
   300	      ######    	    Perl_sv_catpv(aTHX_ report, "EOF");
   301				else
   302	      ######    	    Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
   303	      ######    	switch (type) {
   304				case TOKENTYPE_NONE:
   305				case TOKENTYPE_GVVAL: /* doesn't appear to be used */
   306	      ######    	    break;
   307				case TOKENTYPE_IVAL:
   308	      ######    	    Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
   309	      ######    	    break;
   310				case TOKENTYPE_OPNUM:
   311	      ######    	    Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
   312							    PL_op_name[yylval.ival]);
   313	      ######    	    break;
   314				case TOKENTYPE_PVAL:
   315	      ######    	    Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
   316	      ######    	    break;
   317				case TOKENTYPE_OPVAL:
   318	      ######    	    if (yylval.opval)
   319	      ######    		Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
   320							    PL_op_name[yylval.opval->op_type]);
   321				    else
   322	      ######    		Perl_sv_catpv(aTHX_ report, "(opval=null)");
   323				    break;
   324				}
   325	      ######            Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
   326	      ######            if (s - PL_bufptr > 0)
   327	      ######                sv_catpvn(report, PL_bufptr, s - PL_bufptr);
   328			        else {
   329	      ######                if (PL_oldbufptr && *PL_oldbufptr)
   330	      ######                    sv_catpv(report, PL_tokenbuf);
   331			        }
   332	      ######            PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen_const(report));
   333			    };
   334	    38799749        return (int)rv;
   335			}
   336			
   337			#endif
   338			
   339			/*
   340			 * S_ao
   341			 *
   342			 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
   343			 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
   344			 */
   345			
   346			STATIC int
   347			S_ao(pTHX_ int toketype)
   348	     1092246    {
   349	     1092246        if (*PL_bufptr == '=') {
   350	      119912    	PL_bufptr++;
   351	      119912    	if (toketype == ANDAND)
   352	          21    	    yylval.ival = OP_ANDASSIGN;
   353	      119891    	else if (toketype == OROR)
   354	       25020    	    yylval.ival = OP_ORASSIGN;
   355	       94871    	else if (toketype == DORDOR)
   356	           6    	    yylval.ival = OP_DORASSIGN;
   357	      119912    	toketype = ASSIGNOP;
   358			    }
   359	     1092246        return toketype;
   360			}
   361			
   362			/*
   363			 * S_no_op
   364			 * When Perl expects an operator and finds something else, no_op
   365			 * prints the warning.  It always prints "<something> found where
   366			 * operator expected.  It prints "Missing semicolon on previous line?"
   367			 * if the surprise occurs at the start of the line.  "do you need to
   368			 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
   369			 * where the compiler doesn't know if foo is a method call or a function.
   370			 * It prints "Missing operator before end of line" if there's nothing
   371			 * after the missing operator, or "... before <...>" if there is something
   372			 * after the missing operator.
   373			 */
   374			
   375			STATIC void
   376			S_no_op(pTHX_ const char *what, char *s)
   377	          15    {
   378	          15        char * const oldbp = PL_bufptr;
   379	          15        const bool is_first = (PL_oldbufptr == PL_linestart);
   380			
   381	          15        if (!s)
   382	      ######    	s = oldbp;
   383			    else
   384	          15    	PL_bufptr = s;
   385	          15        yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
   386	          15        if (ckWARN_d(WARN_SYNTAX)) {
   387	          14    	if (is_first)
   388	      ######    	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
   389					    "\t(Missing semicolon on previous line?)\n");
   390	          14    	else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
   391	          13    	    const char *t;
   392	          13    	    for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
   393	          13    	    if (t < PL_bufptr && isSPACE(*t))
   394	           3    		Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
   395						"\t(Do you need to predeclare %.*s?)\n",
   396					    t - PL_oldoldbufptr, PL_oldoldbufptr);
   397				}
   398				else {
   399	           1    	    assert(s >= oldbp);
   400	           1    	    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
   401					    "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
   402				}
   403			    }
   404	          15        PL_bufptr = oldbp;
   405			}
   406			
   407			/*
   408			 * S_missingterm
   409			 * Complain about missing quote/regexp/heredoc terminator.
   410			 * If it's called with (char *)NULL then it cauterizes the line buffer.
   411			 * If we're in a delimited string and the delimiter is a control
   412			 * character, it's reformatted into a two-char sequence like ^C.
   413			 * This is fatal.
   414			 */
   415			
   416			STATIC void
   417			S_missingterm(pTHX_ char *s)
   418	           4    {
   419	           4        char tmpbuf[3];
   420	           4        char q;
   421	           4        if (s) {
   422	      ######    	char * const nl = strrchr(s,'\n');
   423	      ######    	if (nl)
   424	      ######    	    *nl = '\0';
   425			    }
   426	           4        else if (
   427			#ifdef EBCDIC
   428				iscntrl(PL_multi_close)
   429			#else
   430				PL_multi_close < 32 || PL_multi_close == 127
   431			#endif
   432				) {
   433	      ######    	*tmpbuf = '^';
   434	      ######    	tmpbuf[1] = toCTRL(PL_multi_close);
   435	      ######    	tmpbuf[2] = '\0';
   436	      ######    	s = tmpbuf;
   437			    }
   438			    else {
   439	           4    	*tmpbuf = (char)PL_multi_close;
   440	           4    	tmpbuf[1] = '\0';
   441	           4    	s = tmpbuf;
   442			    }
   443	           4        q = strchr(s,'"') ? '\'' : '"';
   444	           4        Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
   445			}
   446			
   447			/*
   448			 * Perl_deprecate
   449			 */
   450			
   451			void
   452			Perl_deprecate(pTHX_ const char *s)
   453	          40    {
   454	          40        if (ckWARN(WARN_DEPRECATED))
   455	           9    	Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
   456			}
   457			
   458			void
   459			Perl_deprecate_old(pTHX_ const char *s)
   460	          25    {
   461			    /* This function should NOT be called for any new deprecated warnings */
   462			    /* Use Perl_deprecate instead                                         */
   463			    /*                                                                    */
   464			    /* It is here to maintain backward compatibility with the pre-5.8     */
   465			    /* warnings category hierarchy. The "deprecated" category used to     */
   466			    /* live under the "syntax" category. It is now a top-level category   */
   467			    /* in its own right.                                                  */
   468			
   469	          25        if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
   470	           6    	Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
   471						"Use of %s is deprecated", s);
   472			}
   473			
   474			/*
   475			 * depcom
   476			 * Deprecate a comma-less variable list.
   477			 */
   478			
   479			STATIC void
   480			S_depcom(pTHX)
   481	           6    {
   482	           6        deprecate_old("comma-less variable list");
   483			}
   484			
   485			/*
   486			 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
   487			 * utf16-to-utf8-reversed.
   488			 */
   489			
   490			#ifdef PERL_CR_FILTER
   491			static void
   492			strip_return(SV *sv)
   493			{
   494			    register const char *s = SvPVX_const(sv);
   495			    register const char * const e = s + SvCUR(sv);
   496			    /* outer loop optimized to do nothing if there are no CR-LFs */
   497			    while (s < e) {
   498				if (*s++ == '\r' && *s == '\n') {
   499				    /* hit a CR-LF, need to copy the rest */
   500				    register char *d = s - 1;
   501				    *d++ = *s++;
   502				    while (s < e) {
   503					if (*s == '\r' && s[1] == '\n')
   504					    s++;
   505					*d++ = *s++;
   506				    }
   507				    SvCUR(sv) -= s - d;
   508				    return;
   509				}
   510			    }
   511			}
   512			
   513			STATIC I32
   514			S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
   515			{
   516			    const I32 count = FILTER_READ(idx+1, sv, maxlen);
   517			    if (count > 0 && !maxlen)
   518				strip_return(sv);
   519			    return count;
   520			}
   521			#endif
   522			
   523			/*
   524			 * Perl_lex_start
   525			 * Initialize variables.  Uses the Perl save_stack to save its state (for
   526			 * recursive calls to the parser).
   527			 */
   528			
   529			void
   530			Perl_lex_start(pTHX_ SV *line)
   531	      106267    {
   532	      106267        const char *s;
   533	      106267        STRLEN len;
   534			
   535	      106267        SAVEI32(PL_lex_dojoin);
   536	      106267        SAVEI32(PL_lex_brackets);
   537	      106267        SAVEI32(PL_lex_casemods);
   538	      106267        SAVEI32(PL_lex_starts);
   539	      106267        SAVEI32(PL_lex_state);
   540	      106267        SAVEVPTR(PL_lex_inpat);
   541	      106267        SAVEI32(PL_lex_inwhat);
   542	      106267        if (PL_lex_state == LEX_KNOWNEXT) {
   543	          23    	I32 toke = PL_nexttoke;
   544	          46    	while (--toke >= 0) {
   545	          23    	    SAVEI32(PL_nexttype[toke]);
   546	          23    	    SAVEVPTR(PL_nextval[toke]);
   547				}
   548	          23    	SAVEI32(PL_nexttoke);
   549			    }
   550	      106267        SAVECOPLINE(PL_curcop);
   551	      106267        SAVEPPTR(PL_bufptr);
   552	      106267        SAVEPPTR(PL_bufend);
   553	      106267        SAVEPPTR(PL_oldbufptr);
   554	      106267        SAVEPPTR(PL_oldoldbufptr);
   555	      106267        SAVEPPTR(PL_last_lop);
   556	      106267        SAVEPPTR(PL_last_uni);
   557	      106267        SAVEPPTR(PL_linestart);
   558	      106267        SAVESPTR(PL_linestr);
   559	      106267        SAVEGENERICPV(PL_lex_brackstack);
   560	      106267        SAVEGENERICPV(PL_lex_casestack);
   561	      106267        SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
   562	      106267        SAVESPTR(PL_lex_stuff);
   563	      106267        SAVEI32(PL_lex_defer);
   564	      106267        SAVEI32(PL_sublex_info.sub_inwhat);
   565	      106267        SAVESPTR(PL_lex_repl);
   566	      106267        SAVEINT(PL_expect);
   567	      106267        SAVEINT(PL_lex_expect);
   568			
   569	      106267        PL_lex_state = LEX_NORMAL;
   570	      106267        PL_lex_defer = 0;
   571	      106267        PL_expect = XSTATE;
   572	      106267        PL_lex_brackets = 0;
   573	      106267        New(899, PL_lex_brackstack, 120, char);
   574	      106267        New(899, PL_lex_casestack, 12, char);
   575	      106267        PL_lex_casemods = 0;
   576	      106267        *PL_lex_casestack = '\0';
   577	      106267        PL_lex_dojoin = 0;
   578	      106267        PL_lex_starts = 0;
   579	      106267        PL_lex_stuff = Nullsv;
   580	      106267        PL_lex_repl = Nullsv;
   581	      106267        PL_lex_inpat = 0;
   582	      106267        PL_nexttoke = 0;
   583	      106267        PL_lex_inwhat = 0;
   584	      106267        PL_sublex_info.sub_inwhat = 0;
   585	      106267        PL_linestr = line;
   586	      106267        if (SvREADONLY(PL_linestr))
   587	        1753    	PL_linestr = sv_2mortal(newSVsv(PL_linestr));
   588	      106267        s = SvPV_const(PL_linestr, len);
   589	      106267        if (!len || s[len-1] != ';') {
   590	       82525    	if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
   591	       45556    	    PL_linestr = sv_2mortal(newSVsv(PL_linestr));
   592	       82525    	sv_catpvn(PL_linestr, "\n;", 2);
   593			    }
   594	      106267        SvTEMP_off(PL_linestr);
   595	      106267        PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
   596	      106267        PL_bufend = PL_bufptr + SvCUR(PL_linestr);
   597	      106267        PL_last_lop = PL_last_uni = Nullch;
   598	      106267        PL_rsfp = 0;
   599			}
   600			
   601			/*
   602			 * Perl_lex_end
   603			 * Finalizer for lexing operations.  Must be called when the parser is
   604			 * done with the lexer.
   605			 */
   606			
   607			void
   608			Perl_lex_end(pTHX)
   609	      100659    {
   610	      100659        PL_doextract = FALSE;
   611			}
   612			
   613			/*
   614			 * S_incline
   615			 * This subroutine has nothing to do with tilting, whether at windmills
   616			 * or pinball tables.  Its name is short for "increment line".  It
   617			 * increments the current line number in CopLINE(PL_curcop) and checks
   618			 * to see whether the line starts with a comment of the form
   619			 *    # line 500 "foo.pm"
   620			 * If so, it sets the current line number and file to the values in the comment.
   621			 */
   622			
   623			STATIC void
   624			S_incline(pTHX_ char *s)
   625	     8682471    {
   626	     8682471        char *t;
   627	     8682471        char *n;
   628	     8682471        char *e;
   629	     8682471        char ch;
   630			
   631	     8682471        CopLINE_inc(PL_curcop);
   632	     8682471        if (*s++ != '#')
   633	     8346618    	return;
   634	      750322        while (SPACE_OR_TAB(*s)) s++;
   635	      335853        if (strnEQ(s, "line", 4))
   636	         819    	s += 4;
   637			    else
   638	         819    	return;
   639	         819        if (SPACE_OR_TAB(*s))
   640	         806    	s++;
   641			    else
   642	         806    	return;
   643	         806        while (SPACE_OR_TAB(*s)) s++;
   644	         806        if (!isDIGIT(*s))
   645	         806    	return;
   646	         806        n = s;
   647	        2049        while (isDIGIT(*s))
   648	        1243    	s++;
   649	        1006        while (SPACE_OR_TAB(*s))
   650	         200    	s++;
   651	         806        if (*s == '"' && (t = strchr(s+1, '"'))) {
   652	         200    	s++;
   653	         200    	e = t + 1;
   654			    }
   655			    else {
   656	         606    	for (t = s; !isSPACE(*t); t++) ;
   657	         606    	e = t;
   658			    }
   659	         806        while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
   660	      ######    	e++;
   661	         806        if (*e != '\n' && *e != '\0')
   662	      ######    	return;		/* false alarm */
   663			
   664	         806        ch = *t;
   665	         806        *t = '\0';
   666	         806        if (t - s > 0) {
   667	         200    	CopFILE_free(PL_curcop);
   668	         200    	CopFILE_set(PL_curcop, s);
   669			    }
   670	         806        *t = ch;
   671	         806        CopLINE_set(PL_curcop, atoi(n)-1);
   672			}
   673			
   674			/*
   675			 * S_skipspace
   676			 * Called to gobble the appropriate amount and type of whitespace.
   677			 * Skips comments as well.
   678			 */
   679			
   680			STATIC char *
   681			S_skipspace(pTHX_ register char *s)
   682	    12968792    {
   683	    12968792        if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
   684	         556    	while (s < PL_bufend && SPACE_OR_TAB(*s))
   685	          63    	    s++;
   686	         493    	return s;
   687			    }
   688	    13174635        for (;;) {
   689	    13174634    	STRLEN prevlen;
   690	    13174634    	SSize_t oldprevlen, oldoldprevlen;
   691	    13174634    	SSize_t oldloplen = 0, oldunilen = 0;
   692	    19417590    	while (s < PL_bufend && isSPACE(*s)) {
   693	     6242956    	    if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
   694	       31760    		incline(s);
   695				}
   696			
   697				/* comment */
   698	    13174634    	if (s < PL_bufend && *s == '#') {
   699	      687852    	    while (s < PL_bufend && *s != '\n')
   700	      671612    		s++;
   701	       16240    	    if (s < PL_bufend) {
   702	       16240    		s++;
   703	       16240    		if (PL_in_eval && !PL_rsfp) {
   704	         178    		    incline(s);
   705	         178    		    continue;
   706					}
   707				    }
   708				}
   709			
   710				/* only continue to recharge the buffer if we're at the end
   711				 * of the buffer, we're not reading from a source filter, and
   712				 * we're in normal lexing mode
   713				 */
   714	    13174456    	if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
   715					PL_lex_state == LEX_FORMLINE)
   716	    12966516    	    return s;
   717			
   718				/* try to recharge the buffer */
   719	      207940    	if ((s = filter_gets(PL_linestr, PL_rsfp,
   720						     (prevlen = SvCUR(PL_linestr)))) == Nullch)
   721				{
   722				    /* end of file.  Add on the -p or -n magic */
   723	        1783    	    if (PL_minus_p) {
   724	      ######    		sv_setpv(PL_linestr,
   725						 ";}continue{print or die qq(-p destination: $!\\n);}");
   726	      ######    		PL_minus_n = PL_minus_p = 0;
   727				    }
   728	        1783    	    else if (PL_minus_n) {
   729	           4    		sv_setpvn(PL_linestr, ";}", 2);
   730	           4    		PL_minus_n = 0;
   731				    }
   732				    else
   733	        1779    		sv_setpvn(PL_linestr,";", 1);
   734			
   735				    /* reset variables for next time we lex */
   736	        1783    	    PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
   737					= SvPVX(PL_linestr);
   738	        1783    	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
   739	        1783    	    PL_last_lop = PL_last_uni = Nullch;
   740			
   741				    /* Close the filehandle.  Could be from -P preprocessor,
   742				     * STDIN, or a regular file.  If we were reading code from
   743				     * STDIN (because the commandline held no -e or filename)
   744				     * then we don't close it, we reset it so the code can
   745				     * read from STDIN too.
   746				     */
   747			
   748	        1783    	    if (PL_preprocess && !PL_in_eval)
   749	      ######    		(void)PerlProc_pclose(PL_rsfp);
   750	        1783    	    else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
   751	      ######    		PerlIO_clearerr(PL_rsfp);
   752				    else
   753	        1783    		(void)PerlIO_close(PL_rsfp);
   754	        1783    	    PL_rsfp = Nullfp;
   755	        1783    	    return s;
   756				}
   757			
   758				/* not at end of file, so we only read another line */
   759				/* make corresponding updates to old pointers, for yyerror() */
   760	      206157    	oldprevlen = PL_oldbufptr - PL_bufend;
   761	      206157    	oldoldprevlen = PL_oldoldbufptr - PL_bufend;
   762	      206157    	if (PL_last_uni)
   763	       23524    	    oldunilen = PL_last_uni - PL_bufend;
   764	      206157    	if (PL_last_lop)
   765	       18980    	    oldloplen = PL_last_lop - PL_bufend;
   766	      206157    	PL_linestart = PL_bufptr = s + prevlen;
   767	      206157    	PL_bufend = s + SvCUR(PL_linestr);
   768	      206157    	s = PL_bufptr;
   769	      206157    	PL_oldbufptr = s + oldprevlen;
   770	      206157    	PL_oldoldbufptr = s + oldoldprevlen;
   771	      206157    	if (PL_last_uni)
   772	       23524    	    PL_last_uni = s + oldunilen;
   773	      206157    	if (PL_last_lop)
   774	       18980    	    PL_last_lop = s + oldloplen;
   775	      206157    	incline(s);
   776			
   777				/* debugger active and we're not compiling the debugger code,
   778				 * so store the line into the debugger's array of lines
   779				 */
   780	      206157    	if (PERLDB_LINE && PL_curstash != PL_debstash) {
   781	           1    	    SV * const sv = NEWSV(85,0);
   782			
   783	           1    	    sv_upgrade(sv, SVt_PVMG);
   784	           1    	    sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
   785	           1                (void)SvIOK_on(sv);
   786	           1                SvIV_set(sv, 0);
   787	           1    	    av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
   788				}
   789			    }
   790			}
   791			
   792			/*
   793			 * S_check_uni
   794			 * Check the unary operators to ensure there's no ambiguity in how they're
   795			 * used.  An ambiguous piece of code would be:
   796			 *     rand + 5
   797			 * This doesn't mean rand() + 5.  Because rand() is a unary operator,
   798			 * the +5 is its argument.
   799			 */
   800			
   801			STATIC void
   802			S_check_uni(pTHX)
   803	       14098    {
   804	       14098        char *s;
   805	       14098        char *t;
   806			
   807	       14098        if (PL_oldoldbufptr != PL_last_uni)
   808	       14059    	return;
   809	          50        while (isSPACE(*PL_last_uni))
   810	          11    	PL_last_uni++;
   811	          39        for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
   812	          39        if ((t = strchr(s, '(')) && t < PL_bufptr)
   813	          33    	return;
   814	           6        if (ckWARN_d(WARN_AMBIGUOUS)){
   815	           5    	const char ch = *s;
   816	           5            *s = '\0';
   817	           5            Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
   818					   "Warning: Use of \"%s\" without parentheses is ambiguous",
   819					   PL_last_uni);
   820	           5            *s = ch;
   821			    }
   822			}
   823			
   824			/*
   825			 * LOP : macro to build a list operator.  Its behaviour has been replaced
   826			 * with a subroutine, S_lop() for which LOP is just another name.
   827			 */
   828			
   829			#define LOP(f,x) return lop(f,x,s)
   830			
   831			/*
   832			 * S_lop
   833			 * Build a list operator (or something that might be one).  The rules:
   834			 *  - if we have a next token, then it's a list operator [why?]
   835			 *  - if the next thing is an opening paren, then it's a function
   836			 *  - else it's a list operator
   837			 */
   838			
   839			STATIC I32
   840			S_lop(pTHX_ I32 f, int x, char *s)
   841	      379099    {
   842	      379099        yylval.ival = f;
   843	      379099        CLINE;
   844	      379099        PL_expect = x;
   845	      379099        PL_bufptr = s;
   846	      379099        PL_last_lop = PL_oldbufptr;
   847	      379099        PL_last_lop_op = (OPCODE)f;
   848	      379099        if (PL_nexttoke)
   849	         224    	return REPORT(LSTOP);
   850	      378875        if (*s == '(')
   851	      144139    	return REPORT(FUNC);
   852	      234736        s = skipspace(s);
   853	      234736        if (*s == '(')
   854	        8428    	return REPORT(FUNC);
   855			    else
   856	      226308    	return REPORT(LSTOP);
   857			}
   858			
   859			/*
   860			 * S_force_next
   861			 * When the lexer realizes it knows the next token (for instance,
   862			 * it is reordering tokens for the parser) then it can call S_force_next
   863			 * to know what token to return the next time the lexer is called.  Caller
   864			 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
   865			 * handles the token correctly.
   866			 */
   867			
   868			STATIC void
   869			S_force_next(pTHX_ I32 type)
   870	     4267093    {
   871	     4267093        PL_nexttype[PL_nexttoke] = type;
   872	     4267093        PL_nexttoke++;
   873	     4267093        if (PL_lex_state != LEX_KNOWNEXT) {
   874	     4056399    	PL_lex_defer = PL_lex_state;
   875	     4056399    	PL_lex_expect = PL_expect;
   876	     4056399    	PL_lex_state = LEX_KNOWNEXT;
   877			    }
   878			}
   879			
   880			STATIC SV *
   881			S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
   882	     1121105    {
   883	     1121105        SV * const sv = newSVpvn(start,len);
   884	     1121105        if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
   885	         117    	SvUTF8_on(sv);
   886	     1121105        return sv;
   887			}
   888			
   889			/*
   890			 * S_force_word
   891			 * When the lexer knows the next thing is a word (for instance, it has
   892			 * just seen -> and it knows that the next char is a word char, then
   893			 * it calls S_force_word to stick the next word into the PL_next lookahead.
   894			 *
   895			 * Arguments:
   896			 *   char *start : buffer position (must be within PL_linestr)
   897			 *   int token   : PL_next will be this type of bare word (e.g., METHOD,WORD)
   898			 *   int check_keyword : if true, Perl checks to make sure the word isn't
   899			 *       a keyword (do this if the word is a label, e.g. goto FOO)
   900			 *   int allow_pack : if true, : characters will also be allowed (require,
   901			 *       use, etc. do this)
   902			 *   int allow_initial_tick : used by the "sub" lexer only.
   903			 */
   904			
   905			STATIC char *
   906			S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
   907	     1085496    {
   908	     1085496        register char *s;
   909	     1085496        STRLEN len;
   910			
   911	     1085496        start = skipspace(start);
   912	     1085496        s = start;
   913	     1085496        if (isIDFIRST_lazy_if(s,UTF) ||
   914				(allow_pack && *s == ':') ||
   915				(allow_initial_tick && *s == '\'') )
   916			    {
   917	     1027203    	s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
   918	     1027203    	if (check_keyword && keyword(PL_tokenbuf, len))
   919	       35085    	    return start;
   920	      992118    	if (token == METHOD) {
   921	      239230    	    s = skipspace(s);
   922	      239230    	    if (*s == '(')
   923	      192290    		PL_expect = XTERM;
   924				    else {
   925	       46940    		PL_expect = XOPERATOR;
   926				    }
   927				}
   928	      992118    	PL_nextval[PL_nexttoke].opval
   929				    = (OP*)newSVOP(OP_CONST,0,
   930						   S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
   931	      992118    	PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
   932	      992118    	force_next(token);
   933			    }
   934	     1050411        return s;
   935			}
   936			
   937			/*
   938			 * S_force_ident
   939			 * Called when the lexer wants $foo *foo &foo etc, but the program
   940			 * text only contains the "foo" portion.  The first argument is a pointer
   941			 * to the "foo", and the second argument is the type symbol to prefix.
   942			 * Forces the next token to be a "WORD".
   943			 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
   944			 */
   945			
   946			STATIC void
   947			S_force_ident(pTHX_ register const char *s, int kind)
   948	      133573    {
   949	      133573        if (s && *s) {
   950	      107587    	OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
   951	      107587    	PL_nextval[PL_nexttoke].opval = o;
   952	      107587    	force_next(WORD);
   953	      107587    	if (kind) {
   954	      107587    	    o->op_private = OPpCONST_ENTERED;
   955				    /* XXX see note in pp_entereval() for why we forgo typo
   956				       warnings if the symbol must be introduced in an eval.
   957				       GSAR 96-10-12 */
   958	      107587    	    gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
   959					kind == '$' ? SVt_PV :
   960					kind == '@' ? SVt_PVAV :
   961					kind == '%' ? SVt_PVHV :
   962						      SVt_PVGV
   963					);
   964				}
   965			    }
   966			}
   967			
   968			NV
   969			Perl_str_to_version(pTHX_ SV *sv)
   970	          26    {
   971	          26        NV retval = 0.0;
   972	          26        NV nshift = 1.0;
   973	          26        STRLEN len;
   974	          26        const char *start = SvPV_const(sv,len);
   975	          26        const char * const end = start + len;
   976	          26        const bool utf = SvUTF8(sv) ? TRUE : FALSE;
   977	          97        while (start < end) {
   978	          71    	STRLEN skip;
   979	          71    	UV n;
   980	          71    	if (utf)
   981	          18    	    n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
   982				else {
   983	          53    	    n = *(U8*)start;
   984	          53    	    skip = 1;
   985				}
   986	          71    	retval += ((NV)n)/nshift;
   987	          71    	start += skip;
   988	          71    	nshift *= 1000;
   989			    }
   990	          26        return retval;
   991			}
   992			
   993			/*
   994			 * S_force_version
   995			 * Forces the next token to be a version number.
   996			 * If the next token appears to be an invalid version number, (e.g. "v2b"),
   997			 * and if "guessing" is TRUE, then no new token is created (and the caller
   998			 * must use an alternative parsing method).
   999			 */
  1000			
  1001			STATIC char *
  1002			S_force_version(pTHX_ char *s, int guessing)
  1003	       75705    {
  1004	       75705        OP *version = Nullop;
  1005	       75705        char *d;
  1006			
  1007	       75705        s = skipspace(s);
  1008			
  1009	       75705        d = s;
  1010	       75705        if (*d == 'v')
  1011	          14    	d++;
  1012	       75705        if (isDIGIT(*d)) {
  1013	       71102    	while (isDIGIT(*d) || *d == '_' || *d == '.')
  1014	       60316    	    d++;
  1015	       10786            if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
  1016	       10776    	    SV *ver;
  1017	       10776                s = scan_num(s, &yylval);
  1018	       10776                version = yylval.opval;
  1019	       10776    	    ver = cSVOPx(version)->op_sv;
  1020	       10776    	    if (SvPOK(ver) && !SvNIOK(ver)) {
  1021	          26    		SvUPGRADE(ver, SVt_PVNV);
  1022	          26    		SvNV_set(ver, str_to_version(ver));
  1023	          26    		SvNOK_on(ver);		/* hint that it is a version */
  1024				    }
  1025			        }
  1026	          10    	else if (guessing)
  1027	      ######    	    return s;
  1028			    }
  1029			
  1030			    /* NOTE: The parser sees the package name and the VERSION swapped */
  1031	       75705        PL_nextval[PL_nexttoke].opval = version;
  1032	       75705        force_next(WORD);
  1033			
  1034	       75705        return s;
  1035			}
  1036			
  1037			/*
  1038			 * S_tokeq
  1039			 * Tokenize a quoted string passed in as an SV.  It finds the next
  1040			 * chunk, up to end of string or a backslash.  It may make a new
  1041			 * SV containing that chunk (if HINT_NEW_STRING is on).  It also
  1042			 * turns \\ into \.
  1043			 */
  1044			
  1045			STATIC SV *
  1046			S_tokeq(pTHX_ SV *sv)
  1047	     1594633    {
  1048	     1594633        register char *s;
  1049	     1594633        register char *send;
  1050	     1594633        register char *d;
  1051	     1594633        STRLEN len = 0;
  1052	     1594633        SV *pv = sv;
  1053			
  1054	     1594633        if (!SvLEN(sv))
  1055	      ######    	goto finish;
  1056			
  1057	     1594633        s = SvPV_force(sv, len);
  1058	     1594633        if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
  1059	       15481    	goto finish;
  1060	     1579152        send = s + len;
  1061	    13635448        while (s < send && *s != '\\')
  1062	    12056296    	s++;
  1063	     1579152        if (s == send)
  1064	     1569368    	goto finish;
  1065	        9784        d = s;
  1066	        9784        if ( PL_hints & HINT_NEW_STRING ) {
  1067	           2    	pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
  1068	           2    	if (SvUTF8(sv))
  1069	      ######    	    SvUTF8_on(pv);
  1070			    }
  1071	      382219        while (s < send) {
  1072	      372435    	if (*s == '\\') {
  1073	       20282    	    if (s + 1 < send && (s[1] == '\\'))
  1074	        5107    		s++;		/* all that, just for this */
  1075				}
  1076	      372435    	*d++ = *s++;
  1077			    }
  1078	        9784        *d = '\0';
  1079	        9784        SvCUR_set(sv, d - SvPVX_const(sv));
  1080			  finish:
  1081	     1594633        if ( PL_hints & HINT_NEW_STRING )
  1082	           6           return new_constant(NULL, 0, "q", sv, pv, "q");
  1083	     1594627        return sv;
  1084			}
  1085			
  1086			/*
  1087			 * Now come three functions related to double-quote context,
  1088			 * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
  1089			 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
  1090			 * interact with PL_lex_state, and create fake ( ... ) argument lists
  1091			 * to handle functions and concatenation.
  1092			 * They assume that whoever calls them will be setting up a fake
  1093			 * join call, because each subthing puts a ',' after it.  This lets
  1094			 *   "lower \luPpEr"
  1095			 * become
  1096			 *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
  1097			 *
  1098			 * (I'm not sure whether the spurious commas at the end of lcfirst's
  1099			 * arguments and join's arguments are created or not).
  1100			 */
  1101			
  1102			/*
  1103			 * S_sublex_start
  1104			 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
  1105			 *
  1106			 * Pattern matching will set PL_lex_op to the pattern-matching op to
  1107			 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
  1108			 *
  1109			 * OP_CONST and OP_READLINE are easy--just make the new op and return.
  1110			 *
  1111			 * Everything else becomes a FUNC.
  1112			 *
  1113			 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
  1114			 * had an OP_CONST or OP_READLINE).  This just sets us up for a
  1115			 * call to S_sublex_push().
  1116			 */
  1117			
  1118			STATIC I32
  1119			S_sublex_start(pTHX)
  1120	     2033243    {
  1121	     2033243        const register I32 op_type = yylval.ival;
  1122			
  1123	     2033243        if (op_type == OP_NULL) {
  1124	        6668    	yylval.opval = PL_lex_op;
  1125	        6668    	PL_lex_op = Nullop;
  1126	        6668    	return THING;
  1127			    }
  1128	     2026575        if (op_type == OP_CONST || op_type == OP_READLINE) {
  1129	     1230534    	SV *sv = tokeq(PL_lex_stuff);
  1130			
  1131	     1230534    	if (SvTYPE(sv) == SVt_PVIV) {
  1132				    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
  1133	     1230531    	    STRLEN len;
  1134	     1230531    	    const char *p = SvPV_const(sv, len);
  1135	     1230531    	    SV * const nsv = newSVpvn(p, len);
  1136	     1230531    	    if (SvUTF8(sv))
  1137	         139    		SvUTF8_on(nsv);
  1138	     1230531    	    SvREFCNT_dec(sv);
  1139	     1230531    	    sv = nsv;
  1140				}
  1141	     1230534    	yylval.opval = (OP*)newSVOP(op_type, 0, sv);
  1142	     1230534    	PL_lex_stuff = Nullsv;
  1143				/* Allow <FH> // "foo" */
  1144	     1230534    	if (op_type == OP_READLINE)
  1145	      ######    	    PL_expect = XTERMORDORDOR;
  1146	     1230534    	return THING;
  1147			    }
  1148			
  1149	      796041        PL_sublex_info.super_state = PL_lex_state;
  1150	      796041        PL_sublex_info.sub_inwhat = op_type;
  1151	      796041        PL_sublex_info.sub_op = PL_lex_op;
  1152	      796041        PL_lex_state = LEX_INTERPPUSH;
  1153			
  1154	      796041        PL_expect = XTERM;
  1155	      796041        if (PL_lex_op) {
  1156	      227077    	yylval.opval = PL_lex_op;
  1157	      227077    	PL_lex_op = Nullop;
  1158	      227077    	return PMFUNC;
  1159			    }
  1160			    else
  1161	      568964    	return FUNC;
  1162			}
  1163			
  1164			/*
  1165			 * S_sublex_push
  1166			 * Create a new scope to save the lexing state.  The scope will be
  1167			 * ended in S_sublex_done.  Returns a '(', starting the function arguments
  1168			 * to the uc, lc, etc. found before.
  1169			 * Sets PL_lex_state to LEX_INTERPCONCAT.
  1170			 */
  1171			
  1172			STATIC I32
  1173			S_sublex_push(pTHX)
  1174	      796041    {
  1175			    dVAR;
  1176	      796041        ENTER;
  1177			
  1178	      796041        PL_lex_state = PL_sublex_info.super_state;
  1179	      796041        SAVEI32(PL_lex_dojoin);
  1180	      796041        SAVEI32(PL_lex_brackets);
  1181	      796041        SAVEI32(PL_lex_casemods);
  1182	      796041        SAVEI32(PL_lex_starts);
  1183	      796041        SAVEI32(PL_lex_state);
  1184	      796041        SAVEVPTR(PL_lex_inpat);
  1185	      796041        SAVEI32(PL_lex_inwhat);
  1186	      796041        SAVECOPLINE(PL_curcop);
  1187	      796041        SAVEPPTR(PL_bufptr);
  1188	      796041        SAVEPPTR(PL_bufend);
  1189	      796041        SAVEPPTR(PL_oldbufptr);
  1190	      796041        SAVEPPTR(PL_oldoldbufptr);
  1191	      796041        SAVEPPTR(PL_last_lop);
  1192	      796041        SAVEPPTR(PL_last_uni);
  1193	      796041        SAVEPPTR(PL_linestart);
  1194	      796041        SAVESPTR(PL_linestr);
  1195	      796041        SAVEGENERICPV(PL_lex_brackstack);
  1196	      796041        SAVEGENERICPV(PL_lex_casestack);
  1197			
  1198	      796041        PL_linestr = PL_lex_stuff;
  1199	      796041        PL_lex_stuff = Nullsv;
  1200			
  1201	      796041        PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
  1202				= SvPVX(PL_linestr);
  1203	      796041        PL_bufend += SvCUR(PL_linestr);
  1204	      796041        PL_last_lop = PL_last_uni = Nullch;
  1205	      796041        SAVEFREESV(PL_linestr);
  1206			
  1207	      796041        PL_lex_dojoin = FALSE;
  1208	      796041        PL_lex_brackets = 0;
  1209	      796041        New(899, PL_lex_brackstack, 120, char);
  1210	      796041        New(899, PL_lex_casestack, 12, char);
  1211	      796041        PL_lex_casemods = 0;
  1212	      796041        *PL_lex_casestack = '\0';
  1213	      796041        PL_lex_starts = 0;
  1214	      796041        PL_lex_state = LEX_INTERPCONCAT;
  1215	      796041        CopLINE_set(PL_curcop, (line_t)PL_multi_start);
  1216			
  1217	      796041        PL_lex_inwhat = PL_sublex_info.sub_inwhat;
  1218	      796041        if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
  1219	      220635    	PL_lex_inpat = PL_sublex_info.sub_op;
  1220			    else
  1221	      575406    	PL_lex_inpat = Nullop;
  1222			
  1223	      796041        return '(';
  1224			}
  1225			
  1226			/*
  1227			 * S_sublex_done
  1228			 * Restores lexer state after a S_sublex_push.
  1229			 */
  1230			
  1231			STATIC I32
  1232			S_sublex_done(pTHX)
  1233	      916316    {
  1234			    dVAR;
  1235	      916316        if (!PL_lex_starts++) {
  1236	       36076    	SV * const sv = newSVpvn("",0);
  1237	       36076    	if (SvUTF8(PL_linestr))
  1238	           1    	    SvUTF8_on(sv);
  1239	       36076    	PL_expect = XOPERATOR;
  1240	       36076    	yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
  1241	       36076    	return THING;
  1242			    }
  1243			
  1244	      880240        if (PL_lex_casemods) {		/* oops, we've got some unbalanced parens */
  1245	         576    	PL_lex_state = LEX_INTERPCASEMOD;
  1246	         576    	return yylex();
  1247			    }
  1248			
  1249			    /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
  1250	      879664        if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
  1251	       83629    	PL_linestr = PL_lex_repl;
  1252	       83629    	PL_lex_inpat = 0;
  1253	       83629    	PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
  1254	       83629    	PL_bufend += SvCUR(PL_linestr);
  1255	       83629    	PL_last_lop = PL_last_uni = Nullch;
  1256	       83629    	SAVEFREESV(PL_linestr);
  1257	       83629    	PL_lex_dojoin = FALSE;
  1258	       83629    	PL_lex_brackets = 0;
  1259	       83629    	PL_lex_casemods = 0;
  1260	       83629    	*PL_lex_casestack = '\0';
  1261	       83629    	PL_lex_starts = 0;
  1262	       83629    	if (SvEVALED(PL_lex_repl)) {
  1263	        4461    	    PL_lex_state = LEX_INTERPNORMAL;
  1264	        4461    	    PL_lex_starts++;
  1265				    /*	we don't clear PL_lex_repl here, so that we can check later
  1266					whether this is an evalled subst; that means we rely on the
  1267					logic to ensure sublex_done() is called again only via the
  1268					branch (in yylex()) that clears PL_lex_repl, else we'll loop */
  1269				}
  1270				else {
  1271	       79168    	    PL_lex_state = LEX_INTERPCONCAT;
  1272	       79168    	    PL_lex_repl = Nullsv;
  1273				}
  1274	       83629    	return ',';
  1275			    }
  1276			    else {
  1277	      796035    	LEAVE;
  1278	      796035    	PL_bufend = SvPVX(PL_linestr);
  1279	      796035    	PL_bufend += SvCUR(PL_linestr);
  1280	      796035    	PL_expect = XOPERATOR;
  1281	      796035    	PL_sublex_info.sub_inwhat = 0;
  1282	      796035    	return ')';
  1283			    }
  1284			}
  1285			
  1286			/*
  1287			  scan_const
  1288			
  1289			  Extracts a pattern, double-quoted string, or transliteration.  This
  1290			  is terrifying code.
  1291			
  1292			  It looks at lex_inwhat and PL_lex_inpat to find out whether it's
  1293			  processing a pattern (PL_lex_inpat is true), a transliteration
  1294			  (lex_inwhat & OP_TRANS is true), or a double-quoted string.
  1295			
  1296			  Returns a pointer to the character scanned up to. Iff this is
  1297			  advanced from the start pointer supplied (ie if anything was
  1298			  successfully parsed), will leave an OP for the substring scanned
  1299			  in yylval. Caller must intuit reason for not parsing further
  1300			  by looking at the next characters herself.
  1301			
  1302			  In patterns:
  1303			    backslashes:
  1304			      double-quoted style: \r and \n
  1305			      regexp special ones: \D \s
  1306			      constants: \x3
  1307			      backrefs: \1 (deprecated in substitution replacements)
  1308			      case and quoting: \U \Q \E
  1309			    stops on @ and $, but not for $ as tail anchor
  1310			
  1311			  In transliterations:
  1312			    characters are VERY literal, except for - not at the start or end
  1313			    of the string, which indicates a range.  scan_const expands the
  1314			    range to the full set of intermediate characters.
  1315			
  1316			  In double-quoted strings:
  1317			    backslashes:
  1318			      double-quoted style: \r and \n
  1319			      constants: \x3
  1320			      backrefs: \1 (deprecated)
  1321			      case and quoting: \U \Q \E
  1322			    stops on @ and $
  1323			
  1324			  scan_const does *not* construct ops to handle interpolated strings.
  1325			  It stops processing as soon as it finds an embedded $ or @ variable
  1326			  and leaves it to the caller to work out what's going on.
  1327			
  1328			  @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
  1329			
  1330			  $ in pattern could be $foo or could be tail anchor.  Assumption:
  1331			  it's a tail anchor if $ is the last thing in the string, or if it's
  1332			  followed by one of ")| \n\t"
  1333			
  1334			  \1 (backreferences) are turned into $1
  1335			
  1336			  The structure of the code is
  1337			      while (there's a character to process) {
  1338			          handle transliteration ranges
  1339				  skip regexp comments
  1340				  skip # initiated comments in //x patterns
  1341				  check for embedded @foo
  1342				  check for embedded scalars
  1343				  if (backslash) {
  1344				      leave intact backslashes from leave (below)
  1345				      deprecate \1 in strings and sub replacements
  1346				      handle string-changing backslashes \l \U \Q \E, etc.
  1347				      switch (what was escaped) {
  1348				          handle - in a transliteration (becomes a literal -)
  1349					  handle \132 octal characters
  1350					  handle 0x15 hex characters
  1351					  handle \cV (control V)
  1352					  handle printf backslashes (\f, \r, \n, etc)
  1353				      } (end switch)
  1354				  } (end if backslash)
  1355			    } (end while character to read)
  1356					
  1357			*/
  1358			
  1359			STATIC char *
  1360			S_scan_const(pTHX_ char *start)
  1361	     1154591    {
  1362	     1154591        register char *send = PL_bufend;		/* end of the constant */
  1363	     1154591        SV *sv = NEWSV(93, send - start);		/* sv for the constant */
  1364	     1154591        register char *s = start;			/* start of the constant */
  1365	     1154591        register char *d = SvPVX(sv);		/* destination for copies */
  1366	     1154591        bool dorange = FALSE;			/* are we in a translit range? */
  1367	     1154591        bool didrange = FALSE;		        /* did we just finish a range? */
  1368	     1154591        I32  has_utf8 = FALSE;			/* Output constant is UTF8 */
  1369	     1154591        I32  this_utf8 = UTF;			/* The source string is assumed to be UTF8 */
  1370	     1154591        UV uv;
  1371			
  1372	     1154591        const char *leaveit =	/* set of acceptably-backslashed characters */
  1373				PL_lex_inpat
  1374				    ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
  1375	     1154591    	    : "";
  1376			
  1377	     1154591        if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
  1378				/* If we are doing a trans and we know we want UTF8 set expectation */
  1379	        9810    	has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
  1380	        9810    	this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
  1381			    }
  1382			
  1383			
  1384	    11676816        while (s < send || dorange) {
  1385			        /* get transliterations out of the way (they're most literal) */
  1386	    10965678    	if (PL_lex_inwhat == OP_TRANS) {
  1387				    /* expand a range A-Z to the full set of characters.  AIE! */
  1388	       28761    	    if (dorange) {
  1389	        4946    		I32 i;				/* current expanded character */
  1390	        4946    		I32 min;			/* first character in range */
  1391	        4946    		I32 max;			/* last character in range */
  1392			
  1393	        4946    		if (has_utf8) {
  1394	           3    		    char * const c = (char*)utf8_hop((U8*)d, -1);
  1395	           3    		    char *e = d++;
  1396	           9    		    while (e-- > c)
  1397	           6    			*(e + 1) = *e;
  1398	           3    		    *c = (char)UTF_TO_NATIVE(0xff);
  1399					    /* mark the range as done, and continue */
  1400	           3    		    dorange = FALSE;
  1401	           3    		    didrange = TRUE;
  1402	           3    		    continue;
  1403					}
  1404			
  1405	        4943    		i = d - SvPVX_const(sv);		/* remember current offset */
  1406	        4943    		SvGROW(sv, SvLEN(sv) + 256);	/* never more than 256 chars in a range */
  1407	        4943    		d = SvPVX(sv) + i;		/* refresh d after realloc */
  1408	        4943    		d -= 2;				/* eat the first char and the - */
  1409			
  1410	        4943    		min = (U8)*d;			/* first char in range */
  1411	        4943    		max = (U8)d[1];			/* last char in range  */
  1412			
  1413	        4943                    if (min > max) {
  1414	           1    		    Perl_croak(aTHX_
  1415						       "Invalid range \"%c-%c\" in transliteration operator",
  1416						       (char)min, (char)max);
  1417			                }
  1418			
  1419			#ifdef EBCDIC
  1420					if ((isLOWER(min) && isLOWER(max)) ||
  1421					    (isUPPER(min) && isUPPER(max))) {
  1422					    if (isLOWER(min)) {
  1423						for (i = min; i <= max; i++)
  1424						    if (isLOWER(i))
  1425							*d++ = NATIVE_TO_NEED(has_utf8,i);
  1426					    } else {
  1427						for (i = min; i <= max; i++)
  1428						    if (isUPPER(i))
  1429							*d++ = NATIVE_TO_NEED(has_utf8,i);
  1430					    }
  1431					}
  1432					else
  1433			#endif
  1434	      158217    		    for (i = min; i <= max; i++)
  1435	      153275    			*d++ = (char)i;
  1436			
  1437					/* mark the range as done, and continue */
  1438	        4942    		dorange = FALSE;
  1439	        4942    		didrange = TRUE;
  1440	        4942    		continue;
  1441				    }
  1442			
  1443				    /* range begins (ignore - as first or last char) */
  1444	       23815    	    else if (*s == '-' && s+1 < send  && s != start) {
  1445	        4988    		if (didrange) {
  1446	           1    		    Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
  1447					}
  1448	        4987    		if (has_utf8) {
  1449	          41    		    *d++ = (char)UTF_TO_NATIVE(0xff);	/* use illegal utf8 byte--see pmtrans */
  1450	          41    		    s++;
  1451	          41    		    continue;
  1452					}
  1453	        4946    		dorange = TRUE;
  1454	        4946    		s++;
  1455				    }
  1456				    else {
  1457	       18827    		didrange = FALSE;
  1458				    }
  1459				}
  1460			
  1461				/* if we get here, we're not doing a transliteration */
  1462			
  1463				/* skip for regexp comments /(?#comment)/ and code /(?{code})/,
  1464				   except for the last char, which will be done separately. */
  1465	    10936917    	else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
  1466	       18789    	    if (s[2] == '#') {
  1467	          15    		while (s+1 < send && *s != ')')
  1468	          10    		    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
  1469				    }
  1470	       18784    	    else if (s[2] == '{' /* This should match regcomp.c */
  1471					     || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
  1472				    {
  1473	         662    		I32 count = 1;
  1474	         662    		char *regparse = s + (s[2] == '{' ? 3 : 4);
  1475	        7200    		char c;
  1476			
  1477	        7200    		while (count && (c = *regparse)) {
  1478	        6538    		    if (c == '\\' && regparse[1])
  1479	           1    			regparse++;
  1480	        6537    		    else if (c == '{')
  1481	           3    			count++;
  1482	        6534    		    else if (c == '}')
  1483	         663    			count--;
  1484	        6538    		    regparse++;
  1485					}
  1486	         662    		if (*regparse != ')')
  1487	           2    		    regparse--;		/* Leave one char for continuation. */
  1488	        9755    		while (s < regparse)
  1489	        9093    		    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
  1490				    }
  1491				}
  1492			
  1493				/* likewise skip #-initiated comments in //x patterns */
  1494	    10918128    	else if (*s == '#' && PL_lex_inpat &&
  1495				  ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
  1496	       42705    	    while (s+1 < send && *s != '\n')
  1497	       40939    		*d++ = NATIVE_TO_NEED(has_utf8,*s++);
  1498				}
  1499			
  1500				/* check for embedded arrays
  1501				   (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
  1502				   */
  1503	    10916362    	else if (*s == '@' && s[1]
  1504					 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
  1505	    10903538    	    break;
  1506			
  1507				/* check for embedded scalars.  only stop if we're sure it's a
  1508				   variable.
  1509			        */
  1510	    10903538    	else if (*s == '$') {
  1511	      471072    	    if (!PL_lex_inpat)	/* not a regexp, so $ must be var */
  1512	      403487    		break;
  1513	       67585    	    if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
  1514	       20956    		break;		/* in regexp, $ might be tail anchor */
  1515				}
  1516			
  1517				/* End of else if chain - OP_TRANS rejoin rest */
  1518			
  1519				/* backslashes */
  1520	    10523423    	if (*s == '\\' && s+1 < send) {
  1521	     3016662    	    s++;
  1522			
  1523				    /* some backslashes we leave behind */
  1524	     3016662    	    if (*leaveit && *s && strchr(leaveit, *s)) {
  1525	      188617    		*d++ = NATIVE_TO_NEED(has_utf8,'\\');
  1526	      188617    		*d++ = NATIVE_TO_NEED(has_utf8,*s++);
  1527	      188617    		continue;
  1528				    }
  1529			
  1530				    /* deprecate \1 in strings and substitution replacements */
  1531	     2828045    	    if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
  1532					isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
  1533				    {
  1534	           2    		if (ckWARN(WARN_SYNTAX))
  1535	           1    		    Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
  1536	           2    		*--s = '$';
  1537	           2    		break;
  1538				    }
  1539			
  1540				    /* string-change backslash escapes */
  1541	     2828043    	    if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
  1542	        6180    		--s;
  1543	        6180    		break;
  1544				    }
  1545			
  1546				    /* if we get here, it's either a quoted -, or a digit */
  1547	     2821863    	    switch (*s) {
  1548			
  1549				    /* quoted - in transliterations */
  1550				    case '-':
  1551	         431    		if (PL_lex_inwhat == OP_TRANS) {
  1552	         130    		    *d++ = *s++;
  1553	         130    		    continue;
  1554					}
  1555					/* FALL THROUGH */
  1556				    default:
  1557				        {
  1558	       83044    		    if (ckWARN(WARN_MISC) &&
  1559						isALNUM(*s) &&
  1560						*s != '_')
  1561	           2    			Perl_warner(aTHX_ packWARN(WARN_MISC),
  1562						       "Unrecognized escape \\%c passed through",
  1563						       *s);
  1564					    /* default action is to copy the quoted character */
  1565	           2    		    goto default_action;
  1566					}
  1567			
  1568				    /* \132 indicates an octal constant */
  1569				    case '0': case '1': case '2': case '3':
  1570				    case '4': case '5': case '6': case '7':
  1571					{
  1572	       30645                        I32 flags = 0;
  1573	       30645                        STRLEN len = 3;
  1574	       30645    		    uv = grok_oct(s, &len, &flags, NULL);
  1575	       30645    		    s += len;
  1576					}
  1577	       30645    		goto NUM_ESCAPE_INSERT;
  1578			
  1579				    /* \x24 indicates a hex constant */
  1580				    case 'x':
  1581	     2577790    		++s;
  1582	     2577790    		if (*s == '{') {
  1583	        6113    		    char* const e = strchr(s, '}');
  1584	        6113                        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
  1585	        6113                          PERL_SCAN_DISALLOW_PREFIX;
  1586	        6113    		    STRLEN len;
  1587			
  1588	        6113                        ++s;
  1589	        6113    		    if (!e) {
  1590	           1    			yyerror("Missing right brace on \\x{}");
  1591	           1    			continue;
  1592					    }
  1593	        6112                        len = e - s;
  1594	        6112    		    uv = grok_hex(s, &len, &flags, NULL);
  1595	        6112    		    s = e + 1;
  1596					}
  1597					else {
  1598					    {
  1599	     2571677    			STRLEN len = 2;
  1600	     2571677                            I32 flags = PERL_SCAN_DISALLOW_PREFIX;
  1601	     2571677    			uv = grok_hex(s, &len, &flags, NULL);
  1602	     2571677    			s += len;
  1603					    }
  1604					}
  1605			
  1606				      NUM_ESCAPE_INSERT:
  1607					/* Insert oct or hex escaped character.
  1608					 * There will always enough room in sv since such
  1609					 * escapes will be longer than any UTF-8 sequence
  1610					 * they can end up as. */
  1611					
  1612					/* We need to map to chars to ASCII before doing the tests
  1613					   to cover EBCDIC
  1614					*/
  1615	     2608435    		if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
  1616	       76594    		    if (!has_utf8 && uv > 255) {
  1617					        /* Might need to recode whatever we have
  1618						 * accumulated so far if it contains any
  1619						 * hibit chars.
  1620						 *
  1621						 * (Can't we keep track of that and avoid
  1622						 *  this rescan? --jhi)
  1623						 */
  1624	        3248    			int hicount = 0;
  1625	        3248    			U8 *c;
  1626	        6116    			for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
  1627	        2868    			    if (!NATIVE_IS_INVARIANT(*c)) {
  1628	         325    			        hicount++;
  1629						    }
  1630						}
  1631	        3248    			if (hicount) {
  1632	          46    			    const STRLEN offset = d - SvPVX_const(sv);
  1633	          46    			    U8 *src, *dst;
  1634	          46    			    d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
  1635	          46    			    src = (U8 *)d - 1;
  1636	          46    			    dst = src+hicount;
  1637	          46    			    d  += hicount;
  1638	         651    			    while (src >= (const U8 *)SvPVX_const(sv)) {
  1639	         605    			        if (!NATIVE_IS_INVARIANT(*src)) {
  1640	         325    				    const U8 ch = NATIVE_TO_ASCII(*src);
  1641	         325    				    *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
  1642	         325    				    *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
  1643						        }
  1644						        else {
  1645	         280    				    *dst-- = *src;
  1646						        }
  1647	         605    				src--;
  1648						    }
  1649			                        }
  1650			                    }
  1651			
  1652	       76594                        if (has_utf8 || uv > 255) {
  1653	        5557    		        d = (char*)uvchr_to_utf8((U8*)d, uv);
  1654	        5557    			has_utf8 = TRUE;
  1655	        5557    			if (PL_lex_inwhat == OP_TRANS &&
  1656						    PL_sublex_info.sub_op) {
  1657	          53    			    PL_sublex_info.sub_op->op_private |=
  1658							(PL_lex_repl ? OPpTRANS_FROM_UTF
  1659								     : OPpTRANS_TO_UTF);
  1660						}
  1661			                    }
  1662					    else {
  1663	       71037    		        *d++ = (char)uv;
  1664					    }
  1665					}
  1666					else {
  1667	     2531841    		    *d++ = (char) uv;
  1668					}
  1669	     2531841    		continue;
  1670			
  1671			 	    /* \N{LATIN SMALL LETTER A} is a named character */
  1672			 	    case 'N':
  1673	         156     		++s;
  1674	         156     		if (*s == '{') {
  1675	         156     		    char* e = strchr(s, '}');
  1676	         156     		    SV *res;
  1677	         156     		    STRLEN len;
  1678	         156     		    const char *str;
  1679			
  1680	         156     		    if (!e) {
  1681	      ######    			yyerror("Missing right brace on \\N{}");
  1682	      ######    			e = s - 1;
  1683	      ######    			goto cont_scan;
  1684					    }
  1685	         156    		    if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
  1686					        /* \N{U+...} */
  1687	           1    		        I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
  1688	           1    			  PERL_SCAN_DISALLOW_PREFIX;
  1689	           1    		        s += 3;
  1690	           1    			len = e - s;
  1691	           1    			uv = grok_hex(s, &len, &flags, NULL);
  1692	           1    			s = e + 1;
  1693	           1    			goto NUM_ESCAPE_INSERT;
  1694					    }
  1695	         155    		    res = newSVpvn(s + 1, e - s - 1);
  1696	         155    		    res = new_constant( Nullch, 0, "charnames",
  1697								res, Nullsv, "\\N{...}" );
  1698	         153    		    if (has_utf8)
  1699	          21    			sv_utf8_upgrade(res);
  1700	         153    		    str = SvPV_const(res,len);
  1701			#ifdef EBCDIC_NEVER_MIND
  1702					    /* charnames uses pack U and that has been
  1703					     * recently changed to do the below uni->native
  1704					     * mapping, so this would be redundant (and wrong,
  1705					     * the code point would be doubly converted).
  1706					     * But leave this in just in case the pack U change
  1707					     * gets revoked, but the semantics is still
  1708					     * desireable for charnames. --jhi */
  1709					    {
  1710						 UV uv = utf8_to_uvchr((const U8*)str, 0);
  1711			
  1712						 if (uv < 0x100) {
  1713						      U8 tmpbuf[UTF8_MAXBYTES+1], *d;
  1714			
  1715						      d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
  1716						      sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
  1717						      str = SvPV_const(res, len);
  1718						 }
  1719					    }
  1720			#endif
  1721	         153    		    if (!has_utf8 && SvUTF8(res)) {
  1722	         132    			const char * const ostart = SvPVX_const(sv);
  1723	         132    			SvCUR_set(sv, d - ostart);
  1724	         132    			SvPOK_on(sv);
  1725	         132    			*d = '\0';
  1726	         132    			sv_utf8_upgrade(sv);
  1727						/* this just broke our allocation above... */
  1728	         132    			SvGROW(sv, (STRLEN)(send - start));
  1729	         132    			d = SvPVX(sv) + SvCUR(sv);
  1730	         132    			has_utf8 = TRUE;
  1731					    }
  1732	         153    		    if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
  1733	      ######    			const char * const odest = SvPVX_const(sv);
  1734			
  1735	      ######    			SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
  1736	      ######    			d = SvPVX(sv) + (d - odest);
  1737					    }
  1738	         153    		    Copy(str, d, len, char);
  1739	         153    		    d += len;
  1740	         153    		    SvREFCNT_dec(res);
  1741					  cont_scan:
  1742	         153    		    s = e + 1;
  1743					}
  1744					else
  1745	      ######    		    yyerror("Missing braces on \\N{}");
  1746	      ######    		continue;
  1747			
  1748				    /* \c is a control character */
  1749				    case 'c':
  1750	        3663    		s++;
  1751	        3663    		if (s < send) {
  1752	        3662    		    U8 c = *s++;
  1753			#ifdef EBCDIC
  1754					    if (isLOWER(c))
  1755						c = toUPPER(c);
  1756			#endif
  1757	        3662    		    *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
  1758					}
  1759					else {
  1760	           1    		    yyerror("Missing control char name in \\c");
  1761					}
  1762	           1    		continue;
  1763			
  1764				    /* printf-style backslashes, formfeeds, newlines, etc */
  1765				    case 'b':
  1766	         121    		*d++ = NATIVE_TO_NEED(has_utf8,'\b');
  1767	         121    		break;
  1768				    case 'n':
  1769	      112999    		*d++ = NATIVE_TO_NEED(has_utf8,'\n');
  1770	      112999    		break;
  1771				    case 'r':
  1772	         490    		*d++ = NATIVE_TO_NEED(has_utf8,'\r');
  1773	         490    		break;
  1774				    case 'f':
  1775	         219    		*d++ = NATIVE_TO_NEED(has_utf8,'\f');
  1776	         219    		break;
  1777				    case 't':
  1778	       11444    		*d++ = NATIVE_TO_NEED(has_utf8,'\t');
  1779	       11444    		break;
  1780				    case 'e':
  1781	         316    		*d++ = ASCII_TO_NEED(has_utf8,'\033');
  1782	         316    		break;
  1783				    case 'a':
  1784	         846    		*d++ = ASCII_TO_NEED(has_utf8,'\007');
  1785	      126435    		break;
  1786				    } /* end switch */
  1787			
  1788	      126435    	    s++;
  1789	      126435    	    continue;
  1790				} /* end if (backslash) */
  1791			
  1792			    default_action:
  1793				/* If we started with encoded form, or already know we want it
  1794				   and then encode the next character */
  1795	     7589805    	if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
  1796	         298    	    STRLEN len  = 1;
  1797	         298    	    const UV uv       = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
  1798	         298    	    const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
  1799	         298    	    s += len;
  1800	         298    	    if (need > len) {
  1801					/* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
  1802	           2    		const STRLEN off = d - SvPVX_const(sv);
  1803	           2    		d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
  1804				    }
  1805	         298    	    d = (char*)uvchr_to_utf8((U8*)d, uv);
  1806	         298    	    has_utf8 = TRUE;
  1807				}
  1808				else {
  1809	     7589507    	    *d++ = NATIVE_TO_NEED(has_utf8,*s++);
  1810				}
  1811			    } /* while loop to process each character */
  1812			
  1813			    /* terminate the string and set up the sv */
  1814	     1154587        *d = '\0';
  1815	     1154587        SvCUR_set(sv, d - SvPVX_const(sv));
  1816	     1154587        if (SvCUR(sv) >= SvLEN(sv))
  1817	      ######    	Perl_croak(aTHX_ "panic: constant overflowed allocated space");
  1818			
  1819	     1154587        SvPOK_on(sv);
  1820	     1154587        if (PL_encoding && !has_utf8) {
  1821	         136    	sv_recode_to_utf8(sv, PL_encoding);
  1822	         136    	if (SvUTF8(sv))
  1823	         136    	    has_utf8 = TRUE;
  1824			    }
  1825	     1154587        if (has_utf8) {
  1826	        3657    	SvUTF8_on(sv);
  1827	        3657    	if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
  1828	          85    	    PL_sublex_info.sub_op->op_private |=
  1829					    (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
  1830				}
  1831			    }
  1832			
  1833			    /* shrink the sv if we allocated more than we used */
  1834	     1154587        if (SvCUR(sv) + 5 < SvLEN(sv)) {
  1835	      641927    	SvPV_shrink_to_cur(sv);
  1836			    }
  1837			
  1838			    /* return the substring (via yylval) only if we parsed anything */
  1839	     1154587        if (s > PL_bufptr) {
  1840	     1022593    	if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
  1841	          13    	    sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
  1842						      sv, Nullsv,
  1843						      ( PL_lex_inwhat == OP_TRANS
  1844							? "tr"
  1845							: ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
  1846							    ? "s"
  1847							    : "qq")));
  1848	     1022593    	yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
  1849			    } else
  1850	      131994    	SvREFCNT_dec(sv);
  1851	     1154587        return s;
  1852			}
  1853			
  1854			/* S_intuit_more
  1855			 * Returns TRUE if there's more to the expression (e.g., a subscript),
  1856			 * FALSE otherwise.
  1857			 *
  1858			 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
  1859			 *
  1860			 * ->[ and ->{ return TRUE
  1861			 * { and [ outside a pattern are always subscripts, so return TRUE
  1862			 * if we're outside a pattern and it's not { or [, then return FALSE
  1863			 * if we're in a pattern and the first char is a {
  1864			 *   {4,5} (any digits around the comma) returns FALSE
  1865			 * if we're in a pattern and the first char is a [
  1866			 *   [] returns FALSE
  1867			 *   [SOMETHING] has a funky algorithm to decide whether it's a
  1868			 *      character class or not.  It has to deal with things like
  1869			 *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
  1870			 * anything else returns TRUE
  1871			 */
  1872			
  1873			/* This is the one truly awful dwimmer necessary to conflate C and sed. */
  1874			
  1875			STATIC int
  1876			S_intuit_more(pTHX_ register char *s)
  1877	     5339601    {
  1878	     5339601        if (PL_lex_brackets)
  1879	     4128734    	return TRUE;
  1880	     1210867        if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
  1881	       25100    	return TRUE;
  1882	     1185767        if (*s != '{' && *s != '[')
  1883	     1118904    	return FALSE;
  1884	       66863        if (!PL_lex_inpat)
  1885	       65371    	return TRUE;
  1886			
  1887			    /* In a pattern, so maybe we have {n,m}. */
  1888	        1492        if (*s == '{') {
  1889	        1144    	s++;
  1890	        1144    	if (!isDIGIT(*s))
  1891	        1144    	    return TRUE;
  1892	      ######    	while (isDIGIT(*s))
  1893	      ######    	    s++;
  1894	      ######    	if (*s == ',')
  1895	      ######    	    s++;
  1896	      ######    	while (isDIGIT(*s))
  1897	      ######    	    s++;
  1898	      ######    	if (*s == '}')
  1899	      ######    	    return FALSE;
  1900	      ######    	return TRUE;
  1901				
  1902			    }
  1903			
  1904			    /* On the other hand, maybe we have a character class */
  1905			
  1906	         348        s++;
  1907	         348        if (*s == ']' || *s == '^')
  1908	          32    	return FALSE;
  1909			    else {
  1910			        /* this is terrifying, and it works */
  1911	         316    	int weight = 2;		/* let's weigh the evidence */
  1912	         316    	char seen[256];
  1913	         316    	unsigned char un_char = 255, last_un_char;
  1914	         316    	const char * const send = strchr(s,']');
  1915	         316    	char tmpbuf[sizeof PL_tokenbuf * 4];
  1916			
  1917	         316    	if (!send)		/* has to be an expression */
  1918	      ######    	    return TRUE;
  1919			
  1920	         316    	Zero(seen,256,char);
  1921	         316    	if (*s == '$')
  1922	           4    	    weight -= 3;
  1923	         312    	else if (isDIGIT(*s)) {
  1924	         304    	    if (s[1] != ']') {
  1925	      ######    		if (isDIGIT(s[1]) && s[2] == ']')
  1926	      ######    		    weight -= 10;
  1927				    }
  1928				    else
  1929	         304    		weight -= 100;
  1930				}
  1931	         984    	for (; s < send; s++) {
  1932	         334    	    last_un_char = un_char;
  1933	         334    	    un_char = (unsigned char)*s;
  1934	         334    	    switch (*s) {
  1935				    case '@':
  1936				    case '&':
  1937				    case '$':
  1938	           4    		weight -= seen[un_char] * 10;
  1939	           4    		if (isALNUM_lazy_if(s+1,UTF)) {
  1940	           4    		    scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
  1941	           4    		    if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
  1942	      ######    			weight -= 100;
  1943					    else
  1944	           4    			weight -= 10;
  1945					}
  1946	      ######    		else if (*s == '$' && s[1] &&
  1947					  strchr("[#!%*<>()-=",s[1])) {
  1948	      ######    		    if (/*{*/ strchr("])} =",s[2]))
  1949	      ######    			weight -= 10;
  1950					    else
  1951	      ######    			weight -= 1;
  1952					}
  1953	      ######    		break;
  1954				    case '\\':
  1955	           2    		un_char = 254;
  1956	           2    		if (s[1]) {
  1957	           2    		    if (strchr("wds]",s[1]))
  1958	           2    			weight += 100;
  1959	      ######    		    else if (seen['\''] || seen['"'])
  1960	      ######    			weight += 1;
  1961	      ######    		    else if (strchr("rnftbxcav",s[1]))
  1962	      ######    			weight += 40;
  1963	      ######    		    else if (isDIGIT(s[1])) {
  1964	      ######    			weight += 40;
  1965	      ######    			while (s[1] && isDIGIT(s[1]))
  1966	      ######    			    s++;
  1967					    }
  1968					}
  1969					else
  1970	      ######    		    weight += 100;
  1971	      ######    		break;
  1972				    case '-':
  1973	           6    		if (s[1] == '\\')
  1974	      ######    		    weight += 50;
  1975	           6    		if (strchr("aA01! ",last_un_char))
  1976	           2    		    weight += 30;
  1977	           6    		if (strchr("zZ79~",s[1]))
  1978	           2    		    weight += 30;
  1979	           6    		if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
  1980	           4    		    weight -= 5;	/* cope with negative subscript */
  1981	           4    		break;
  1982				    default:
  1983	         322    		if (!isALNUM(last_un_char)
  1984					    && !(last_un_char == '$' || last_un_char == '@'
  1985						 || last_un_char == '&')
  1986					    && isALPHA(*s) && s[1] && isALPHA(s[1])) {
  1987	      ######    		    char *d = tmpbuf;
  1988	      ######    		    while (isALPHA(*s))
  1989	      ######    			*d++ = *s++;
  1990	      ######    		    *d = '\0';
  1991	      ######    		    if (keyword(tmpbuf, d - tmpbuf))
  1992	      ######    			weight -= 150;
  1993					}
  1994	         322    		if (un_char == last_un_char + 1)
  1995	      ######    		    weight += 5;
  1996	         322    		weight -= seen[un_char];
  1997	         334    		break;
  1998				    }
  1999	         334    	    seen[un_char]++;
  2000				}
  2001	         316    	if (weight >= 0)	/* probably a character class */
  2002	           6    	    return FALSE;
  2003			    }
  2004			
  2005	         310        return TRUE;
  2006			}
  2007			
  2008			/*
  2009			 * S_intuit_method
  2010			 *
  2011			 * Does all the checking to disambiguate
  2012			 *   foo bar
  2013			 * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
  2014			 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
  2015			 *
  2016			 * First argument is the stuff after the first token, e.g. "bar".
  2017			 *
  2018			 * Not a method if bar is a filehandle.
  2019			 * Not a method if foo is a subroutine prototyped to take a filehandle.
  2020			 * Not a method if it's really "Foo $bar"
  2021			 * Method if it's "foo $bar"
  2022			 * Not a method if it's really "print foo $bar"
  2023			 * Method if it's really "foo package::" (interpreted as package->foo)
  2024			 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
  2025			 * Not a method if bar is a filehandle or package, but is quoted with
  2026			 *   =>
  2027			 */
  2028			
  2029			STATIC int
  2030			S_intuit_method(pTHX_ char *start, GV *gv)
  2031	       24016    {
  2032	       24016        char *s = start + (*start == '$');
  2033	       24016        char tmpbuf[sizeof PL_tokenbuf];
  2034	       24016        STRLEN len;
  2035	       24016        GV* indirgv;
  2036			
  2037	       24016        if (gv) {
  2038	       19523    	CV *cv;
  2039	       19523    	if (GvIO(gv))
  2040	        2526    	    return 0;
  2041	       16997    	if ((cv = GvCVu(gv))) {
  2042	       16798    	    const char *proto = SvPVX_const(cv);
  2043	       16798    	    if (proto) {
  2044	        2377    		if (*proto == ';')
  2045	           6    		    proto++;
  2046	        2377    		if (*proto == '*')
  2047	           5    		    return 0;
  2048				    }
  2049				} else
  2050	         199    	    gv = 0;
  2051			    }
  2052	       21485        s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
  2053			    /* start is the beginning of the possible filehandle/object,
  2054			     * and s is the end of it
  2055			     * tmpbuf is a copy of it
  2056			     */
  2057			
  2058	       21485        if (*start == '$') {
  2059	       10318    	if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
  2060	       10318    	    return 0;
  2061	      ######    	s = skipspace(s);
  2062	      ######    	PL_bufptr = start;
  2063	      ######    	PL_expect = XREF;
  2064	      ######    	return *s == '(' ? FUNCMETH : METHOD;
  2065			    }
  2066	       11167        if (!keyword(tmpbuf, len)) {
  2067	        7977    	if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
  2068	           2    	    len -= 2;
  2069	           2    	    tmpbuf[len] = '\0';
  2070	           2    	    goto bare_package;
  2071				}
  2072	        7975    	indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
  2073	        7975    	if (indirgv && GvCVu(indirgv))
  2074	         251    	    return 0;
  2075				/* filehandle or package name makes it a method */
  2076	        7724    	if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
  2077	        5388    	    s = skipspace(s);
  2078	        5388    	    if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
  2079	      ######    		return 0;	/* no assumptions -- "=>" quotes bearword */
  2080			      bare_package:
  2081	        5390    	    PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
  2082									   newSVpvn(tmpbuf,len));
  2083	        5390    	    PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
  2084	        5390    	    PL_expect = XTERM;
  2085	        5390    	    force_next(WORD);
  2086	        5390    	    PL_bufptr = s;
  2087	        5390    	    return *s == '(' ? FUNCMETH : METHOD;
  2088				}
  2089			    }
  2090	        5526        return 0;
  2091			}
  2092			
  2093			/*
  2094			 * S_incl_perldb
  2095			 * Return a string of Perl code to load the debugger.  If PERL5DB
  2096			 * is set, it will return the contents of that, otherwise a
  2097			 * compile-time require of perl5db.pl.
  2098			 */
  2099			
  2100			STATIC const char*
  2101			S_incl_perldb(pTHX)
  2102	        4506    {
  2103	        4506        if (PL_perldb) {
  2104	           9    	const char * const pdb = PerlEnv_getenv("PERL5DB");
  2105			
  2106	           9    	if (pdb)
  2107	           9    	    return pdb;
  2108	      ######    	SETERRNO(0,SS_NORMAL);
  2109	      ######    	return "BEGIN { require 'perl5db.pl' }";
  2110			    }
  2111	        4497        return "";
  2112			}
  2113			
  2114			
  2115			/* Encoded script support. filter_add() effectively inserts a
  2116			 * 'pre-processing' function into the current source input stream.
  2117			 * Note that the filter function only applies to the current source file
  2118			 * (e.g., it will not affect files 'require'd or 'use'd by this one).
  2119			 *
  2120			 * The datasv parameter (which may be NULL) can be used to pass
  2121			 * private data to this instance of the filter. The filter function
  2122			 * can recover the SV using the FILTER_DATA macro and use it to
  2123			 * store private buffers and state information.
  2124			 *
  2125			 * The supplied datasv parameter is upgraded to a PVIO type
  2126			 * and the IoDIRP/IoANY field is used to store the function pointer,
  2127			 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
  2128			 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
  2129			 * private use must be set using malloc'd pointers.
  2130			 */
  2131			
  2132			SV *
  2133			Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
  2134	        2094    {
  2135	        2094        if (!funcp)
  2136	      ######    	return Nullsv;
  2137			
  2138	        2094        if (!PL_rsfp_filters)
  2139	        2090    	PL_rsfp_filters = newAV();
  2140	        2094        if (!datasv)
  2141	        2048    	datasv = NEWSV(255,0);
  2142	        2094        SvUPGRADE(datasv, SVt_PVIO);
  2143	        2094        IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
  2144	        2094        IoFLAGS(datasv) |= IOf_FAKE_DIRP;
  2145			    DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
  2146	        2094    			  IoANY(datasv), SvPV_nolen(datasv)));
  2147	        2094        av_unshift(PL_rsfp_filters, 1);
  2148	        2094        av_store(PL_rsfp_filters, 0, datasv) ;
  2149	        2094        return(datasv);
  2150			}
  2151			
  2152			
  2153			/* Delete most recently added instance of this filter function.	*/
  2154			void
  2155			Perl_filter_del(pTHX_ filter_t funcp)
  2156	        2073    {
  2157	        2073        SV *datasv;
  2158			
  2159			#ifdef DEBUGGING
  2160	        2073        DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
  2161			#endif
  2162	        2073        if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
  2163	        2073    	return;
  2164			    /* if filter is on top of stack (usual case) just pop it off */
  2165	        2073        datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
  2166	        2073        if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
  2167	        2073    	IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
  2168	        2073    	IoANY(datasv) = (void *)NULL;
  2169	        2073    	sv_free(av_pop(PL_rsfp_filters));
  2170			
  2171	        2073            return;
  2172			    }
  2173			    /* we need to search for the correct entry and clear it	*/
  2174	      ######        Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
  2175			}
  2176			
  2177			
  2178			/* Invoke the idxth filter function for the current rsfp.	 */
  2179			/* maxlen 0 = read one text line */
  2180			I32
  2181			Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
  2182	        5145    {
  2183	        5145        filter_t funcp;
  2184	        5145        SV *datasv = NULL;
  2185			
  2186	        5145        if (!PL_rsfp_filters)
  2187	      ######    	return -1;
  2188	        5145        if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?	*/
  2189				/* Provide a default input filter to make life easy.	*/
  2190				/* Note that we append to the line. This is handy.	*/
  2191				DEBUG_P(PerlIO_printf(Perl_debug_log,
  2192	         229    			      "filter_read %d: from rsfp\n", idx));
  2193	         229    	if (maxlen) {
  2194			 	    /* Want a block */
  2195	          39    	    int len ;
  2196	          39    	    const int old_len = SvCUR(buf_sv);
  2197			
  2198				    /* ensure buf_sv is large enough */
  2199	          39    	    SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
  2200	          39    	    if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
  2201	           6    		if (PerlIO_error(PL_rsfp))
  2202	      ######    	            return -1;		/* error */
  2203				        else
  2204	           6    		    return 0 ;		/* end of file */
  2205				    }
  2206	          33    	    SvCUR_set(buf_sv, old_len + len) ;
  2207				} else {
  2208				    /* Want a line */
  2209	         190                if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
  2210	          31    		if (PerlIO_error(PL_rsfp))
  2211	      ######    	            return -1;		/* error */
  2212				        else
  2213	          31    		    return 0 ;		/* end of file */
  2214				    }
  2215				}
  2216	         192    	return SvCUR(buf_sv);
  2217			    }
  2218			    /* Skip this filter slot if filter has been deleted	*/
  2219	        4916        if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
  2220				DEBUG_P(PerlIO_printf(Perl_debug_log,
  2221						      "filter_read %d: skipped (filter deleted)\n",
  2222	      ######    			      idx));
  2223	      ######    	return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
  2224			    }
  2225			    /* Get function pointer hidden within datasv	*/
  2226	        4916        funcp = DPTR2FPTR(filter_t, IoANY(datasv));
  2227			    DEBUG_P(PerlIO_printf(Perl_debug_log,
  2228						  "filter_read %d: via function %p (%s)\n",
  2229	        4916    			  idx, datasv, SvPV_nolen_const(datasv)));
  2230			    /* Call function. The function is expected to 	*/
  2231			    /* call "FILTER_READ(idx+1, buf_sv)" first.		*/
  2232			    /* Return: <0:error, =0:eof, >0:not eof 		*/
  2233	        4916        return (*funcp)(aTHX_ idx, buf_sv, maxlen);
  2234			}
  2235			
  2236			STATIC char *
  2237			S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
  2238	    10229110    {
  2239			#ifdef PERL_CR_FILTER
  2240			    if (!PL_rsfp_filters) {
  2241				filter_add(S_cr_textfilter,NULL);
  2242			    }
  2243			#endif
  2244	    10229110        if (PL_rsfp_filters) {
  2245	        4891    	if (!append)
  2246	        3169                SvCUR_set(sv, 0);	/* start with empty line	*/
  2247	        4891            if (FILTER_READ(0, sv, 0) > 0)
  2248	        2806                return ( SvPVX(sv) ) ;
  2249			        else
  2250	        2084    	    return Nullch ;
  2251			    }
  2252			    else
  2253	    10224219            return (sv_gets(sv, fp, append));
  2254			}
  2255			
  2256			STATIC HV *
  2257			S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
  2258	          30    {
  2259	          30        GV *gv;
  2260			
  2261	          30        if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
  2262	           1            return PL_curstash;
  2263			
  2264	          29        if (len > 2 &&
  2265			        (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
  2266			        (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
  2267			    {
  2268	           1            return GvHV(gv);			/* Foo:: */
  2269			    }
  2270			
  2271			    /* use constant CLASS => 'MyClass' */
  2272	          28        if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
  2273	           2            SV *sv;
  2274	           2            if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
  2275	           2                pkgname = SvPV_nolen_const(sv);
  2276			        }
  2277			    }
  2278			
  2279	          28        return gv_stashpv(pkgname, FALSE);
  2280			}
  2281			
  2282			#ifdef DEBUGGING
  2283			    static const char* const exp_name[] =
  2284				{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
  2285				  "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
  2286				};
  2287			#endif
  2288			
  2289			/*
  2290			  yylex
  2291			
  2292			  Works out what to call the token just pulled out of the input
  2293			  stream.  The yacc parser takes care of taking the ops we return and
  2294			  stitching them into a tree.
  2295			
  2296			  Returns:
  2297			    PRIVATEREF
  2298			
  2299			  Structure:
  2300			      if read an identifier
  2301			          if we're in a my declaration
  2302				      croak if they tried to say my($foo::bar)
  2303				      build the ops for a my() declaration
  2304				  if it's an access to a my() variable
  2305				      are we in a sort block?
  2306				          croak if my($a); $a <=> $b
  2307				      build ops for access to a my() variable
  2308				  if in a dq string, and they've said @foo and we can't find @foo
  2309				      croak
  2310				  build ops for a bareword
  2311			      if we already built the token before, use it.
  2312			*/
  2313			
  2314			
  2315			#ifdef __SC__
  2316			#pragma segment Perl_yylex
  2317			#endif
  2318			int
  2319			Perl_yylex(pTHX)
  2320	    39968763    {
  2321	    39968763        register char *s = PL_bufptr;
  2322	    39968763        register char *d;
  2323	    39968763        register I32 tmp;
  2324	    39968763        STRLEN len;
  2325	    39968763        GV *gv = Nullgv;
  2326	    39968763        GV **gvp = 0;
  2327	    39968763        bool bof = FALSE;
  2328	    39968763        I32 orig_keyword = 0;
  2329			
  2330			    DEBUG_T( {
  2331				PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
  2332								lex_state_names[PL_lex_state]);
  2333	    39968763        } );
  2334			    /* check if there's an identifier for us to look at */
  2335	    39968763        if (PL_pending_ident)
  2336	     5127241            return REPORT(S_pending_ident(aTHX));
  2337			
  2338			    /* no identifier pending identification */
  2339			
  2340	    34841522        switch (PL_lex_state) {
  2341			#ifdef COMMENTARY
  2342			    case LEX_NORMAL:		/* Some compilers will produce faster */
  2343			    case LEX_INTERPNORMAL:	/* code if we comment these out. */
  2344				break;
  2345			#endif
  2346			
  2347			    /* when we've already built the next token, just pull it out of the queue */
  2348			    case LEX_KNOWNEXT:
  2349	     4267092    	PL_nexttoke--;
  2350	     4267092    	yylval = PL_nextval[PL_nexttoke];
  2351	     4267092    	if (!PL_nexttoke) {
  2352	     4056398    	    PL_lex_state = PL_lex_defer;
  2353	     4056398    	    PL_expect = PL_lex_expect;
  2354	     4056398    	    PL_lex_defer = LEX_NORMAL;
  2355				}
  2356				DEBUG_T({ PerlIO_printf(Perl_debug_log,
  2357			              "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
  2358	     4267092                  (IV)PL_nexttype[PL_nexttoke]); });
  2359			
  2360	     4267092    	return REPORT(PL_nexttype[PL_nexttoke]);
  2361			
  2362			    /* interpolated case modifiers like \L \U, including \Q and \E.
  2363			       when we get here, PL_bufptr is at the \
  2364			    */
  2365			    case LEX_INTERPCASEMOD:
  2366			#ifdef DEBUGGING
  2367	        7413    	if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
  2368	      ######    	    Perl_croak(aTHX_ "panic: INTERPCASEMOD");
  2369			#endif
  2370				/* handle \E or end of string */
  2371	        7413           	if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
  2372				    /* if at a \E */
  2373	        3993    	    if (PL_lex_casemods) {
  2374	        3411    		const char oldmod = PL_lex_casestack[--PL_lex_casemods];
  2375	        3411    		PL_lex_casestack[PL_lex_casemods] = '\0';
  2376			
  2377	        3411    		if (PL_bufptr != PL_bufend
  2378					    && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
  2379	        2756    		    PL_bufptr += 2;
  2380	        2756    		    PL_lex_state = LEX_INTERPCONCAT;
  2381					}
  2382	        3411    		return REPORT(')');
  2383				    }
  2384	         582    	    if (PL_bufptr != PL_bufend)
  2385	           6    		PL_bufptr += 2;
  2386	         582    	    PL_lex_state = LEX_INTERPCONCAT;
  2387	         582    	    return yylex();
  2388				}
  2389				else {
  2390				    DEBUG_T({ PerlIO_printf(Perl_debug_log,
  2391	        3420                  "### Saw case modifier at '%s'\n", PL_bufptr); });
  2392	        3420    	    s = PL_bufptr + 1;
  2393	        3420    	    if (s[1] == '\\' && s[2] == 'E') {
  2394	           5    	        PL_bufptr = s + 3;
  2395	           5    		PL_lex_state = LEX_INTERPCONCAT;
  2396	           5    		return yylex();
  2397				    }
  2398				    else {
  2399	        3415    	        if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
  2400	           5    		    tmp = *s, *s = s[2], s[2] = (char)tmp;	/* misordered... */
  2401	        3415    		if ((*s == 'L' || *s == 'U') &&
  2402					    (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
  2403	           2    		    PL_lex_casestack[--PL_lex_casemods] = '\0';
  2404	           2    		    return REPORT(')');
  2405					}
  2406	        3413    		if (PL_lex_casemods > 10)
  2407	           2    		    Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
  2408	        3413    		PL_lex_casestack[PL_lex_casemods++] = *s;
  2409	        3413    		PL_lex_casestack[PL_lex_casemods] = '\0';
  2410	        3413    		PL_lex_state = LEX_INTERPCONCAT;
  2411	        3413    		PL_nextval[PL_nexttoke].ival = 0;
  2412	        3413    		force_next('(');
  2413	        3413    		if (*s == 'l')
  2414	          15    		    PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
  2415	        3398    		else if (*s == 'u')
  2416	          82    		    PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
  2417	        3316    		else if (*s == 'L')
  2418	         273    		    PL_nextval[PL_nexttoke].ival = OP_LC;
  2419	        3043    		else if (*s == 'U')
  2420	         232    		    PL_nextval[PL_nexttoke].ival = OP_UC;
  2421	        2811    		else if (*s == 'Q')
  2422	        2811    		    PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
  2423					else
  2424	      ######    		    Perl_croak(aTHX_ "panic: yylex");
  2425	        3413    		PL_bufptr = s + 1;
  2426				    }
  2427	        3413    	    force_next(FUNC);
  2428	        3413    	    if (PL_lex_starts) {
  2429	        1565    		s = PL_bufptr;
  2430	        1565    		PL_lex_starts = 0;
  2431					/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
  2432	        1565    		if (PL_lex_casemods == 1 && PL_lex_inpat)
  2433	        1157    		    OPERATOR(',');
  2434					else
  2435	         408    		    Aop(OP_CONCAT);
  2436				    }
  2437				    else
  2438	        1848    		return yylex();
  2439				}
  2440			
  2441			    case LEX_INTERPPUSH:
  2442	      796041            return REPORT(sublex_push());
  2443			
  2444			    case LEX_INTERPSTART:
  2445	     1148407    	if (PL_bufptr == PL_bufend)
  2446	      711138    	    return REPORT(sublex_done());
  2447				DEBUG_T({ PerlIO_printf(Perl_debug_log,
  2448	      437269                  "### Interpolated variable at '%s'\n", PL_bufptr); });
  2449	      437269    	PL_expect = XTERM;
  2450	      437269    	PL_lex_dojoin = (*PL_bufptr == '@');
  2451	      437269    	PL_lex_state = LEX_INTERPNORMAL;
  2452	      437269    	if (PL_lex_dojoin) {
  2453	       12824    	    PL_nextval[PL_nexttoke].ival = 0;
  2454	       12824    	    force_next(',');
  2455	       12824    	    force_ident("\"", '$');
  2456	       12824    	    PL_nextval[PL_nexttoke].ival = 0;
  2457	       12824    	    force_next('$');
  2458	       12824    	    PL_nextval[PL_nexttoke].ival = 0;
  2459	       12824    	    force_next('(');
  2460	       12824    	    PL_nextval[PL_nexttoke].ival = OP_JOIN;	/* emulate join($", ...) */
  2461	       12824    	    force_next(FUNC);
  2462				}
  2463	      437269    	if (PL_lex_starts++) {
  2464	      324436    	    s = PL_bufptr;
  2465				    /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
  2466	      324436    	    if (!PL_lex_casemods && PL_lex_inpat)
  2467	       12811    		OPERATOR(',');
  2468				    else
  2469	      311625    		Aop(OP_CONCAT);
  2470				}
  2471	      112833    	return yylex();
  2472			
  2473			    case LEX_INTERPENDMAYBE:
  2474	      393225    	if (intuit_more(PL_bufptr)) {
  2475	       46561    	    PL_lex_state = LEX_INTERPNORMAL;	/* false alarm, more expr */
  2476	       46561    	    break;
  2477				}
  2478				/* FALL THROUGH */
  2479			
  2480			    case LEX_INTERPEND:
  2481	      441730    	if (PL_lex_dojoin) {
  2482	       12824    	    PL_lex_dojoin = FALSE;
  2483	       12824    	    PL_lex_state = LEX_INTERPCONCAT;
  2484	       12824    	    return REPORT(')');
  2485				}
  2486	      428906    	if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
  2487				    && SvEVALED(PL_lex_repl))
  2488				{
  2489	        4385    	    if (PL_bufptr != PL_bufend)
  2490	           2    		Perl_croak(aTHX_ "Bad evalled substitution pattern");
  2491	        4383    	    PL_lex_repl = Nullsv;
  2492				}
  2493				/* FALLTHROUGH */
  2494			    case LEX_INTERPCONCAT:
  2495			#ifdef DEBUGGING
  2496	     1369138    	if (PL_lex_brackets)
  2497	      ######    	    Perl_croak(aTHX_ "panic: INTERPCONCAT");
  2498			#endif
  2499	     1369138    	if (PL_bufptr == PL_bufend)
  2500	      205178    	    return REPORT(sublex_done());
  2501			
  2502	     1163960    	if (SvIVX(PL_linestr) == '\'') {
  2503	        9369    	    SV *sv = newSVsv(PL_linestr);
  2504	        9369    	    if (!PL_lex_inpat)
  2505	           5    		sv = tokeq(sv);
  2506	        9364    	    else if ( PL_hints & HINT_NEW_RE )
  2507	           2    		sv = new_constant(NULL, 0, "qr", sv, sv, "q");
  2508	        9369    	    yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
  2509	        9369    	    s = PL_bufend;
  2510				}
  2511				else {
  2512	     1154591    	    s = scan_const(PL_bufptr);
  2513	     1154587    	    if (*s == '\\')
  2514	        6180    		PL_lex_state = LEX_INTERPCASEMOD;
  2515				    else
  2516	     1148407    		PL_lex_state = LEX_INTERPSTART;
  2517				}
  2518			
  2519	     1163956    	if (s != PL_bufptr) {
  2520	     1031962    	    PL_nextval[PL_nexttoke] = yylval;
  2521	     1031962    	    PL_expect = XTERM;
  2522	     1031962    	    force_next(THING);
  2523	     1031962    	    if (PL_lex_starts++) {
  2524					/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
  2525	      304101    		if (!PL_lex_casemods && PL_lex_inpat)
  2526	       13060    		    OPERATOR(',');
  2527					else
  2528	      291041    		    Aop(OP_CONCAT);
  2529				    }
  2530				    else {
  2531	      727861    		PL_bufptr = s;
  2532	      727861    		return yylex();
  2533				    }
  2534				}
  2535			
  2536	      131994    	return yylex();
  2537			    case LEX_FORMLINE:
  2538	         265    	PL_lex_state = LEX_NORMAL;
  2539	         265    	s = scan_formline(PL_bufptr);
  2540	         265    	if (!PL_lex_formbrack)
  2541	          88    	    goto rightbracket;
  2542	         177    	OPERATOR(';');
  2543			    }
  2544			
  2545	    27240340        s = PL_bufptr;
  2546	    27240340        PL_oldoldbufptr = PL_oldbufptr;
  2547	    27240340        PL_oldbufptr = s;
  2548			    DEBUG_T( {
  2549				PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
  2550					      exp_name[PL_expect], s);
  2551	    27240340        } );
  2552			
  2553			  retry:
  2554	    66447326        switch (*s) {
  2555			    default:
  2556	           2    	if (isIDFIRST_lazy_if(s,UTF))
  2557	           2    	    goto keylookup;
  2558	      ######    	Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
  2559			    case 4:
  2560			    case 26:
  2561	     6653408    	goto fake_eof;			/* emulate EOF on ^D or ^Z */
  2562			    case 0:
  2563	     6653408    	if (!PL_rsfp) {
  2564	      105470    	    PL_last_uni = 0;
  2565	      105470    	    PL_last_lop = 0;
  2566	      105470    	    if (PL_lex_brackets) {
  2567	           2     	        if (PL_lex_formbrack)
  2568	           1    		    yyerror("Format not terminated");
  2569			                else
  2570	           1    		    yyerror("Missing right curly or square bracket");
  2571				    }
  2572			            DEBUG_T( { PerlIO_printf(Perl_debug_log,
  2573			                        "### Tokener got EOF\n");
  2574	      105470                } );
  2575	      105470    	    TOKEN(0);
  2576				}
  2577	     6547938    	if (s++ < PL_bufend)
  2578	           7    	    goto retry;			/* ignore stray nulls */
  2579	     6547931    	PL_last_uni = 0;
  2580	     6547931    	PL_last_lop = 0;
  2581	     6547931    	if (!PL_in_eval && !PL_preambled) {
  2582	        4506    	    PL_preambled = TRUE;
  2583	        4506    	    sv_setpv(PL_linestr,incl_perldb());
  2584	        4506    	    if (SvCUR(PL_linestr))
  2585	           9    		sv_catpvn(PL_linestr,";", 1);
  2586	        4506    	    if (PL_preambleav){
  2587	        1414    		while(AvFILLp(PL_preambleav) >= 0) {
  2588	         711    		    SV *tmpsv = av_shift(PL_preambleav);
  2589	         711    		    sv_catsv(PL_linestr, tmpsv);
  2590	         711    		    sv_catpvn(PL_linestr, ";", 1);
  2591	         711    		    sv_free(tmpsv);
  2592					}
  2593	         703    		sv_free((SV*)PL_preambleav);
  2594	         703    		PL_preambleav = NULL;
  2595				    }
  2596	        4506    	    if (PL_minus_n || PL_minus_p) {
  2597	          21    		sv_catpv(PL_linestr, "LINE: while (<>) {");
  2598	          21    		if (PL_minus_l)
  2599	           3    		    sv_catpv(PL_linestr,"chomp;");
  2600	          21    		if (PL_minus_a) {
  2601	           5    		    if (PL_minus_F) {
  2602	           2    			if ((*PL_splitstr == '/' || *PL_splitstr == '\''
  2603						     || *PL_splitstr == '"')
  2604						      && strchr(PL_splitstr + 1, *PL_splitstr))
  2605	      ######    			    Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
  2606						else {
  2607						    /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
  2608						       bytes can be used as quoting characters.  :-) */
  2609						    /* The count here deliberately includes the NUL
  2610						       that terminates the C string constant.  This
  2611						       embeds the opening NUL into the string.  */
  2612	           2    			    const char *splits = PL_splitstr;
  2613	           2    			    sv_catpvn(PL_linestr, "our @F=split(q", 15);
  2614	          15    			    do {
  2615							/* Need to \ \s  */
  2616	          15    				if (*splits == '\\')
  2617	           2    				    sv_catpvn(PL_linestr, splits, 1);
  2618	          15    				sv_catpvn(PL_linestr, splits, 1);
  2619	          15    			    } while (*splits++);
  2620						    /* This loop will embed the trailing NUL of
  2621						       PL_linestr as the last thing it does before
  2622						       terminating.  */
  2623	           2    			    sv_catpvn(PL_linestr, ");", 2);
  2624						}
  2625					    }
  2626					    else
  2627	           3    		        sv_catpv(PL_linestr,"our @F=split(' ');");
  2628					}
  2629				    }
  2630	        4506    	    sv_catpvn(PL_linestr, "\n", 1);
  2631	        4506    	    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
  2632	        4506    	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
  2633	        4506    	    PL_last_lop = PL_last_uni = Nullch;
  2634	        4506    	    if (PERLDB_LINE && PL_curstash != PL_debstash) {
  2635	           9    		SV * const sv = NEWSV(85,0);
  2636			
  2637	           9    		sv_upgrade(sv, SVt_PVMG);
  2638	           9    		sv_setsv(sv,PL_linestr);
  2639	           9                    (void)SvIOK_on(sv);
  2640	           9                    SvIV_set(sv, 0);
  2641	           9    		av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
  2642				    }
  2643	           9    	    goto retry;
  2644				}
  2645	     8418620    	do {
  2646	     8418620    	    bof = PL_rsfp ? TRUE : FALSE;
  2647	     8418620    	    if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
  2648				      fake_eof:
  2649	       38009    		if (PL_rsfp) {
  2650	       37719    		    if (PL_preprocess && !PL_in_eval)
  2651	           3    			(void)PerlProc_pclose(PL_rsfp);
  2652	       37716    		    else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
  2653	          19    			PerlIO_clearerr(PL_rsfp);
  2654					    else
  2655	       37697    			(void)PerlIO_close(PL_rsfp);
  2656	       37719    		    PL_rsfp = Nullfp;
  2657	       37719    		    PL_doextract = FALSE;
  2658					}
  2659	       38009    		if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
  2660	          16    		    sv_setpv(PL_linestr,PL_minus_p
  2661						     ? ";}continue{print;}" : ";}");
  2662	          16    		    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
  2663	          16    		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
  2664	          16    		    PL_last_lop = PL_last_uni = Nullch;
  2665	          16    		    PL_minus_n = PL_minus_p = 0;
  2666	          16    		    goto retry;
  2667					}
  2668	       37993    		PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
  2669	       37993    		PL_last_lop = PL_last_uni = Nullch;
  2670	       37993    		sv_setpvn(PL_linestr,"",0);
  2671	       37993    		TOKEN(';');	/* not infinite loop because rsfp is NULL now */
  2672				    }
  2673				    /* If it looks like the start of a BOM or raw UTF-16,
  2674				     * check if it in fact is. */
  2675	     8396125    	    else if (bof &&
  2676					     (*s == 0 ||
  2677					      *(U8*)s == 0xEF ||
  2678					      *(U8*)s >= 0xFE ||
  2679					      s[1] == 0)) {
  2680			#ifdef PERLIO_IS_STDIO
  2681			#  ifdef __GNU_LIBRARY__
  2682			#    if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
  2683			#      define FTELL_FOR_PIPE_IS_BROKEN
  2684			#    endif
  2685			#  else
  2686			#    ifdef __GLIBC__
  2687			#      if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
  2688			#        define FTELL_FOR_PIPE_IS_BROKEN
  2689			#      endif
  2690			#    endif
  2691			#  endif
  2692			#endif
  2693			#ifdef FTELL_FOR_PIPE_IS_BROKEN
  2694					/* This loses the possibility to detect the bof
  2695					 * situation on perl -P when the libc5 is being used.
  2696					 * Workaround?  Maybe attach some extra state to PL_rsfp?
  2697					 */
  2698					if (!PL_preprocess)
  2699					    bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
  2700			#else
  2701	     1751063    		bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
  2702			#endif
  2703	     1751063    		if (bof) {
  2704	        1685    		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
  2705	        1685    		    s = swallow_bom((U8*)s);
  2706					}
  2707				    }
  2708	     8396125    	    if (PL_doextract) {
  2709					/* Incest with pod. */
  2710	     1944147    		if (*s == '=' && strnEQ(s, "=cut", 4)) {
  2711	       68952    		    sv_setpvn(PL_linestr, "", 0);
  2712	       68952    		    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
  2713	       68952    		    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
  2714	       68952    		    PL_last_lop = PL_last_uni = Nullch;
  2715	       68952    		    PL_doextract = FALSE;
  2716					}
  2717				    }
  2718	     8396125    	    incline(s);
  2719	     8396125    	} while (PL_doextract);
  2720	     6520930    	PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
  2721	     6520930    	if (PERLDB_LINE && PL_curstash != PL_debstash) {
  2722	         937    	    SV * const sv = NEWSV(85,0);
  2723			
  2724	         937    	    sv_upgrade(sv, SVt_PVMG);
  2725	         937    	    sv_setsv(sv,PL_linestr);
  2726	         937                (void)SvIOK_on(sv);
  2727	         937                SvIV_set(sv, 0);
  2728	         937    	    av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
  2729				}
  2730	     6520930    	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
  2731	     6520930    	PL_last_lop = PL_last_uni = Nullch;
  2732	     6520930    	if (CopLINE(PL_curcop) == 1) {
  2733	       42348    	    while (s < PL_bufend && isSPACE(*s))
  2734	        1925    		s++;
  2735	       40423    	    if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
  2736	      ######    		s++;
  2737	       40423    	    d = Nullch;
  2738	       40423    	    if (!PL_in_eval) {
  2739	        5049    		if (*s == '#' && *(s+1) == '!')
  2740	        1048    		    d = s + 2;
  2741			#ifdef ALTERNATE_SHEBANG
  2742					else {
  2743					    static char const as[] = ALTERNATE_SHEBANG;
  2744					    if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
  2745						d = s + (sizeof(as) - 1);
  2746					}
  2747			#endif /* ALTERNATE_SHEBANG */
  2748				    }
  2749	       40423    	    if (d) {
  2750	        1073    		char *ipath;
  2751	        1073    		char *ipathend;
  2752			
  2753	        1073    		while (isSPACE(*d))
  2754	          25    		    d++;
  2755	        1048    		ipath = d;
  2756	       14461    		while (*d && !isSPACE(*d))
  2757	       13413    		    d++;
  2758	        1048    		ipathend = d;
  2759			
  2760			#ifdef ARG_ZERO_IS_SCRIPT
  2761					if (ipathend > ipath) {
  2762					    /*
  2763					     * HP-UX (at least) sets argv[0] to the script name,
  2764					     * which makes $^X incorrect.  And Digital UNIX and Linux,
  2765					     * at least, set argv[0] to the basename of the Perl
  2766					     * interpreter. So, having found "#!", we'll set it right.
  2767					     */
  2768					    SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
  2769					    assert(SvPOK(x) || SvGMAGICAL(x));
  2770					    if (sv_eq(x, CopFILESV(PL_curcop))) {
  2771						sv_setpvn(x, ipath, ipathend - ipath);
  2772						SvSETMAGIC(x);
  2773					    }
  2774					    else {
  2775						STRLEN blen;
  2776						STRLEN llen;
  2777						const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
  2778						const char * const lstart = SvPV_const(x,llen);
  2779						if (llen < blen) {
  2780						    bstart += blen - llen;
  2781						    if (strnEQ(bstart, lstart, llen) &&	bstart[-1] == '/') {
  2782							sv_setpvn(x, ipath, ipathend - ipath);
  2783							SvSETMAGIC(x);
  2784						    }
  2785						}
  2786					    }
  2787					    TAINT_NOT;	/* $^X is always tainted, but that's OK */
  2788					}
  2789			#endif /* ARG_ZERO_IS_SCRIPT */
  2790			
  2791					/*
  2792					 * Look for options.
  2793					 */
  2794	        1048    		d = instr(s,"perl -");
  2795	        1048    		if (!d) {
  2796	         684    		    d = instr(s,"perl");
  2797			#if defined(DOSISH)
  2798					    /* avoid getting into infinite loops when shebang
  2799					     * line contains "Perl" rather than "perl" */
  2800					    if (!d) {
  2801						for (d = ipathend-4; d >= ipath; --d) {
  2802						    if ((*d == 'p' || *d == 'P')
  2803							&& !ibcmp(d, "perl", 4))
  2804						    {
  2805							break;
  2806						    }
  2807						}
  2808						if (d < ipath)
  2809						    d = Nullch;
  2810					    }
  2811			#endif
  2812					}
  2813			#ifdef ALTERNATE_SHEBANG
  2814					/*
  2815					 * If the ALTERNATE_SHEBANG on this system starts with a
  2816					 * character that can be part of a Perl expression, then if
  2817					 * we see it but not "perl", we're probably looking at the
  2818					 * start of Perl code, not a request to hand off to some
  2819					 * other interpreter.  Similarly, if "perl" is there, but
  2820					 * not in the first 'word' of the line, we assume the line
  2821					 * contains the start of the Perl program.
  2822					 */
  2823					if (d && *s != '#') {
  2824					    const char *c = ipath;
  2825					    while (*c && !strchr("; \t\r\n\f\v#", *c))
  2826						c++;
  2827					    if (c < d)
  2828						d = Nullch;	/* "perl" not in first word; ignore */
  2829					    else
  2830						*s = '#';	/* Don't try to parse shebang line */
  2831					}
  2832			#endif /* ALTERNATE_SHEBANG */
  2833			#ifndef MACOS_TRADITIONAL
  2834	        1048    		if (!d &&
  2835					    *s == '#' &&
  2836					    ipathend > ipath &&
  2837					    !PL_minus_c &&
  2838					    !instr(s,"indir") &&
  2839					    instr(PL_origargv[0],"perl"))
  2840					{
  2841					    dVAR;
  2842	      ######    		    char **newargv;
  2843			
  2844	      ######    		    *ipathend = '\0';
  2845	      ######    		    s = ipathend + 1;
  2846	      ######    		    while (s < PL_bufend && isSPACE(*s))
  2847	      ######    			s++;
  2848	      ######    		    if (s < PL_bufend) {
  2849	      ######    			Newz(899,newargv,PL_origargc+3,char*);
  2850	      ######    			newargv[1] = s;
  2851	      ######    			while (s < PL_bufend && !isSPACE(*s))
  2852	      ######    			    s++;
  2853	      ######    			*s = '\0';
  2854	      ######    			Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
  2855					    }
  2856					    else
  2857	      ######    			newargv = PL_origargv;
  2858	      ######    		    newargv[0] = ipath;
  2859	      ######    		    PERL_FPU_PRE_EXEC
  2860	      ######    		    PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
  2861	      ######    		    PERL_FPU_POST_EXEC
  2862	      ######    		    Perl_croak(aTHX_ "Can't exec %s", ipath);
  2863					}
  2864			#endif
  2865	        1048    		if (d) {
  2866	        1048    		    const U32 oldpdb = PL_perldb;
  2867	        1048    		    const bool oldn = PL_minus_n;
  2868	        1048    		    const bool oldp = PL_minus_p;
  2869			
  2870	        5809    		    while (*d && !isSPACE(*d)) d++;
  2871	        1447    		    while (SPACE_OR_TAB(*d)) d++;
  2872			
  2873	        1048    		    if (*d++ == '-') {
  2874	         392    			const bool switches_done = PL_doswitches;
  2875	         818    			do {
  2876	         818    			    if (*d == 'M' || *d == 'm' || *d == 'C') {
  2877	      ######    				const char * const m = d;
  2878	      ######    				while (*d && !isSPACE(*d)) d++;
  2879	      ######    				Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
  2880							      (int)(d - m), m);
  2881						    }
  2882	         818    			    d = moreswitches(d);
  2883	         818    			} while (d);
  2884	         392    			if (PL_doswitches && !switches_done) {
  2885	           1    			    int argc = PL_origargc;
  2886	           1    			    char **argv = PL_origargv;
  2887	           2    			    do {
  2888	           2    				argc--,argv++;
  2889	           2    			    } while (argc && argv[0][0] == '-' && argv[0][1]);
  2890	           1    			    init_argv_symbols(argc,argv);
  2891						}
  2892	         392    			if ((PERLDB_LINE && !oldpdb) ||
  2893						    ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
  2894						      /* if we have already added "LINE: while (<>) {",
  2895						         we must not do it again */
  2896						{
  2897	           6    			    sv_setpvn(PL_linestr, "", 0);
  2898	           6    			    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
  2899	           6    			    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
  2900	           6    			    PL_last_lop = PL_last_uni = Nullch;
  2901	           6    			    PL_preambled = FALSE;
  2902	           6    			    if (PERLDB_LINE)
  2903	      ######    				(void)gv_fetchfile(PL_origfilename);
  2904	      ######    			    goto retry;
  2905						}
  2906	         386    			if (PL_doswitches && !switches_done) {
  2907	      ######    			    int argc = PL_origargc;
  2908	      ######    			    char **argv = PL_origargv;
  2909	      ######    			    do {
  2910	      ######    				argc--,argv++;
  2911	      ######    			    } while (argc && argv[0][0] == '-' && argv[0][1]);
  2912	      ######    			    init_argv_symbols(argc,argv);
  2913						}
  2914					    }
  2915					}
  2916				    }
  2917				}
  2918	     6520924    	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
  2919	         155    	    PL_bufptr = s;
  2920	         155    	    PL_lex_state = LEX_FORMLINE;
  2921	         155    	    return yylex();
  2922				}
  2923	    26156837    	goto retry;
  2924			    case '\r':
  2925			#ifdef PERL_STRICT_CR
  2926				Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
  2927				Perl_croak(aTHX_
  2928			      "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
  2929			#endif
  2930			    case ' ': case '\t': case '\f': case 013:
  2931			#ifdef MACOS_TRADITIONAL
  2932			    case '\312':
  2933			#endif
  2934	    26156837    	s++;
  2935	    26156837    	goto retry;
  2936			    case '#':
  2937			    case '\n':
  2938	     6455967    	if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
  2939	       47885    	    if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
  2940					/* handle eval qq[#line 1 "foo"\n ...] */
  2941	         181    		CopLINE_dec(PL_curcop);
  2942	         181    		incline(s);
  2943				    }
  2944	       47885    	    d = PL_bufend;
  2945	      268266    	    while (s < d && *s != '\n')
  2946	      220381    		s++;
  2947	       47885    	    if (s < d)
  2948	       47885    		s++;
  2949	      ######    	    else if (s > d) /* Found by Ilya: feed random input to Perl. */
  2950	      ######    	      Perl_croak(aTHX_ "panic: input overflow");
  2951	       47885    	    incline(s);
  2952	       47885    	    if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
  2953	          76    		PL_bufptr = s;
  2954	          76    		PL_lex_state = LEX_FORMLINE;
  2955	          76    		return yylex();
  2956				    }
  2957				}
  2958				else {
  2959	     6408082    	    *s = '\0';
  2960	     6408082    	    PL_bufend = s;
  2961				}
  2962	     6408082    	goto retry;
  2963			    case '-':
  2964	      596704    	if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
  2965	       35950    	    I32 ftst = 0;
  2966			
  2967	       35950    	    s++;
  2968	       35950    	    PL_bufptr = s;
  2969	       35950    	    tmp = *s++;
  2970			
  2971	       71137    	    while (s < PL_bufend && SPACE_OR_TAB(*s))
  2972	       35187    		s++;
  2973			
  2974	       35950    	    if (strnEQ(s,"=>",2)) {
  2975	      ######    		s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
  2976			                DEBUG_T( { PerlIO_printf(Perl_debug_log,
  2977			                            "### Saw unary minus before =>, forcing word '%s'\n", s);
  2978	      ######                    } );
  2979	      ######    		OPERATOR('-');		/* unary minus */
  2980				    }
  2981	       35950    	    PL_last_uni = PL_oldbufptr;
  2982	       35950    	    switch (tmp) {
  2983	        1655    	    case 'r': ftst = OP_FTEREAD;	break;
  2984	        2626    	    case 'w': ftst = OP_FTEWRITE;	break;
  2985	         922    	    case 'x': ftst = OP_FTEEXEC;	break;
  2986	           2    	    case 'o': ftst = OP_FTEOWNED;	break;
  2987	           2    	    case 'R': ftst = OP_FTRREAD;	break;
  2988	           4    	    case 'W': ftst = OP_FTRWRITE;	break;
  2989	           2    	    case 'X': ftst = OP_FTREXEC;	break;
  2990	           1    	    case 'O': ftst = OP_FTROWNED;	break;
  2991	        2972    	    case 'e': ftst = OP_FTIS;		break;
  2992	           8    	    case 'z': ftst = OP_FTZERO;		break;
  2993	        4464    	    case 's': ftst = OP_FTSIZE;		break;
  2994	        8063    	    case 'f': ftst = OP_FTFILE;		break;
  2995	       12288    	    case 'd': ftst = OP_FTDIR;		break;
  2996	        1740    	    case 'l': ftst = OP_FTLINK;		break;
  2997	           8    	    case 'p': ftst = OP_FTPIPE;		break;
  2998	          11    	    case 'S': ftst = OP_FTSOCK;		break;
  2999	           6    	    case 'u': ftst = OP_FTSUID;		break;
  3000	           5    	    case 'g': ftst = OP_FTSGID;		break;
  3001	          10    	    case 'k': ftst = OP_FTSVTX;		break;
  3002	          10    	    case 'b': ftst = OP_FTBLK;		break;
  3003	         108    	    case 'c': ftst = OP_FTCHR;		break;
  3004	         241    	    case 't': ftst = OP_FTTTY;		break;
  3005	          82    	    case 'T': ftst = OP_FTTEXT;		break;
  3006	         404    	    case 'B': ftst = OP_FTBINARY;	break;
  3007				    case 'M': case 'A': case 'C':
  3008	         314    		gv_fetchpv("\024",TRUE, SVt_PV);
  3009	         314    		switch (tmp) {
  3010	         306    		case 'M': ftst = OP_FTMTIME;	break;
  3011	           4    		case 'A': ftst = OP_FTATIME;	break;
  3012	        