The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
LETTER          [a-zA-Z_@]
DIGIT           [0-9]
DIGITS          {DIGIT}+
HEX             0[xX][0-9A-Fa-f]+
OCT             0[oO][0-7]+
BIN             0[bB][01]+
DOT             [.]
SIGN            [-+]
BIGINT          {SIGN}?{DIGITS}"L"
FLOATNUM        {SIGN}?(({DIGITS}{DOT}{DIGIT}*|{DOT}{DIGITS})([eE]{SIGN}?{DIGITS})?|{DIGITS}[eE]{SIGN}?{DIGITS})
LETTERDIGIT     [a-zA-Z0-9_]
LABELLETTERDIGIT([a-zA-Z0-9_@])
ID              {LETTER}{LABELLETTERDIGIT}*
DQ_STRING       \"(\\.|[^"\\\n])*\"
ENCCHAR         {LETTER}|{DIGIT}|"-"
ENCCHARS        {ENCCHAR}*
ENC             {LETTER}{ENCCHARS}":"
UNICODE         {ENC}{ENC}?{DQ_STRING}
STRINGCONSTANT  {SQ_STRING}|{DQ_STRING}
SQ_STRING       \'[^'\n]*\'
RANKSPEC        \[[,]*\]
EOL        \r?\n
WS              [\t\f\r\x1a ]
SP              [ ]

%x emit
%x macro
%x pod
%x cmt1
%x cmt2
%x cmt3
%x cmt4
%x cmt5
%x heredoc1
%x heredoc2


%%
        /* for emacs "*/
        if (IMCC_INFO(interp)->expect_pasm == 1 && !IMCC_INFO(interp)->in_pod) {
            IMCC_INFO(interp)->expect_pasm = 2;
            BEGIN(emit);
        }

        if (IMCC_INFO(interp)->frames->s.pasm_file && YYSTATE == INITIAL &&
            !IMCC_INFO(interp)->in_pod)
        {
            if (IMCC_INFO(interp)->frames->s.pasm_file == 1) {
                BEGIN(emit);
                return EMIT;
            }

            return 0;
        }

<heredoc1>.*{EOL} {
            IMCC_INFO(interp)->frames->heredoc_rest = mem_sys_strdup(yytext);
            BEGIN(heredoc2);
    }

<heredoc2>{EOL} {
        /* heredocs have highest priority
         * arrange them before all wildcard state matches */

        /* Newline in the heredoc. Realloc and cat on. */
        IMCC_INFO(interp)->line++;
        IMCC_INFO(interp)->heredoc_content =
            (char*)mem_sys_realloc(IMCC_INFO(interp)->heredoc_content,
                            strlen(IMCC_INFO(interp)->heredoc_content) +
                            strlen(yytext) + 2);
        strcpy(IMCC_INFO(interp)->heredoc_content +
               strlen(IMCC_INFO(interp)->heredoc_content), yytext);
    }

<heredoc2>.* {
        /* Are we at the end of the heredoc? */
        if (STREQ(IMCC_INFO(interp)->heredoc_end, yytext)) {
            /* End of the heredoc. */
            yyguts_t * const yyg = (yyguts_t *)yyscanner;
            const int len        = strlen(IMCC_INFO(interp)->heredoc_content);

            /* delim */
            IMCC_INFO(interp)->heredoc_content[len] =
                IMCC_INFO(interp)->heredoc_content[0];

            IMCC_INFO(interp)->heredoc_content[len + 1] = 0;

            mem_sys_free(IMCC_INFO(interp)->heredoc_end);
            IMCC_INFO(interp)->heredoc_end = NULL;

            IMCC_INFO(interp)->frames->buffer = YY_CURRENT_BUFFER;
            valp->s                           =
                IMCC_INFO(interp)->heredoc_content;

            yy_pop_state(yyscanner);
            yy_scan_string(IMCC_INFO(interp)->frames->heredoc_rest, yyscanner);

            /* the EOF rule will increment the line number; decrement here */
            IMCC_INFO(interp)->line--;
            return STRINGC;
        }
        else {
            /* Part of the heredoc. Realloc and cat the line on. */
            IMCC_INFO(interp)->heredoc_content =
                (char *)mem_sys_realloc(IMCC_INFO(interp)->heredoc_content,
                                strlen(IMCC_INFO(interp)->heredoc_content) +
                                strlen(yytext) + 2);
            strcpy(IMCC_INFO(interp)->heredoc_content +
                   strlen(IMCC_INFO(interp)->heredoc_content), yytext);
        }
    }

<cmt2>[^"]+ {
        yy_pop_state(yyscanner);
        yy_push_state(cmt3, yyscanner);

        IMCC_INFO(interp)->frames->s.file = mem_sys_strdup(yytext);
        IMCC_INFO(interp)->cur_unit->file = mem_sys_strdup(yytext);

        return FILECOMMENT;
    }

<cmt3>["] {
        yy_pop_state(yyscanner);
        yy_push_state(cmt4, yyscanner);
    }

<*>setfile{SP}+["] { yy_push_state(cmt2, yyscanner); }

<*>setline{SP}+ { yy_push_state(cmt1, yyscanner);  }

<cmt1>{DIGITS} {
        IMCC_INFO(interp)->line = atoi(yytext);
        yy_pop_state(yyscanner);
        yy_push_state(cmt4, yyscanner);
        return LINECOMMENT;
    }

<cmt4>.*{EOL} {
        yy_pop_state(yyscanner);
        IMCC_INFO(interp)->line++;
    }

<INITIAL,emit>{EOL} {
        if (IMCC_INFO(interp)->expect_pasm == 2)
            BEGIN(INITIAL);

        IMCC_INFO(interp)->expect_pasm = 0;
        IMCC_INFO(interp)->line++;

        return '\n';
    }

<INITIAL,emit># {
        yy_push_state(cmt5, yyscanner);
    }

<cmt5>.*{EOL} {
        if (IMCC_INFO(interp)->expect_pasm == 2)
            BEGIN(INITIAL);
        else
            yy_pop_state(yyscanner);

        IMCC_INFO(interp)->expect_pasm = 0;
        IMCC_INFO(interp)->line++;

        return '\n';
    }


<INITIAL,emit,macro>^"=cut"{EOL} {
    /* this is a stand-alone =cut, but we're not in POD mode, so ignore.  */
    IMCC_INFO(interp)->line++;
}

<INITIAL,emit,macro>^"=" {
        IMCC_INFO(interp)->in_pod = 1;
        yy_push_state(pod, yyscanner);
    }

<pod>^"=cut"{EOL} {
        IMCC_INFO(interp)->in_pod = 0;
        yy_pop_state(yyscanner);
        IMCC_INFO(interp)->line++;
    }

<pod>.*         { /*ignore*/ }

<pod>{EOL}      { IMCC_INFO(interp)->line++; }

<*>".line"               return TK_LINE;
<*>".file"               return TK_FILE;
<INITIAL,emit>".annotate" return ANNOTATE;
<INITIAL,emit>".lex"     return LEXICAL;
".set_arg"               return ARG;
".sub"                   return SUB;
".end"                   return ESUB;
".begin_call"            return PCC_BEGIN;
".end_call"              return PCC_END;
".call"                  return PCC_CALL;
".nci_call"              return NCI_CALL;
".meth_call"             return METH_CALL;
".invocant"              return INVOCANT;
<emit,INITIAL>".pcc_sub" return PCC_SUB;
".begin_return"          return PCC_BEGIN_RETURN;
".end_return"            return PCC_END_RETURN;
".begin_yield"           return PCC_BEGIN_YIELD;
".end_yield"             return PCC_END_YIELD;

<emit,INITIAL>":method"    return METHOD;
<emit,INITIAL>":multi"     return MULTI;
<emit,INITIAL>":main"      return MAIN;
<emit,INITIAL>":load"      return LOAD;
<emit,INITIAL>":init"      return INIT;
<emit,INITIAL>":immediate" return IMMEDIATE;
<emit,INITIAL>":postcomp"  return POSTCOMP;
<emit,INITIAL>":anon"      return ANON;
<emit,INITIAL>":outer"     return OUTER;
<emit,INITIAL>":lex"       return NEED_LEX;
<emit,INITIAL>":vtable"    return VTABLE_METHOD;
<emit,INITIAL>":nsentry"   return NS_ENTRY;
":unique_reg"              return UNIQUE_REG;
":instanceof"              return SUB_INSTANCE_OF;
":subid"                   return SUBID;

".get_result"                  return RESULT;
".get_results"             return GET_RESULTS;
".yield"                   return YIELDT;
".set_yield"               return SET_YIELD;
".return"                  return RETURN;
".set_return"              return SET_RETURN;
".tailcall"                return TAILCALL;
<emit,INITIAL>".loadlib"   return LOADLIB;

":flat"         return ADV_FLAT;
":slurpy"       return ADV_SLURPY;
":optional"     return ADV_OPTIONAL;
":opt_flag"     return ADV_OPT_FLAG;
":named"        return ADV_NAMED;
"=>"            return ADV_ARROW;
":invocant"     return ADV_INVOCANT;
":call_sig"     return ADV_CALL_SIG;

<emit,INITIAL>".namespace"    return NAMESPACE;
<emit,INITIAL>".HLL"          return HLL;

".local"                      return LOCAL;
<emit,INITIAL>".const"        return CONST;
".globalconst"                return GLOBAL_CONST;
".param"                      return PARAM;
"goto"                        return GOTO;
"if"                          return IF;
"unless"                      return UNLESS;
"null"                        return PNULL;
"int"                         return INTV;
"num"                         return FLOATV;

"pmc"                         return PMCV;
"string"                      return STRINGV;
"<<"                          return SHIFT_LEFT;
">>"                          return SHIFT_RIGHT;
">>>"                         return SHIFT_RIGHT_U;
"&&"                          return LOG_AND;
"||"                          return LOG_OR;
"~~"                          return LOG_XOR;
"<"                           return RELOP_LT;
"<="                          return RELOP_LTE;
">"                           return RELOP_GT;
">="                          return RELOP_GTE;
"=="                          return RELOP_EQ;
"!="                          return RELOP_NE;
"**"                          return POW;

{WS}+"."{WS}+          return CONCAT;
"."                    return DOT;

"+="                   return PLUS_ASSIGN;
"-="                   return MINUS_ASSIGN;
"*="                   return MUL_ASSIGN;
"/="                   return DIV_ASSIGN;
"%="                   return MOD_ASSIGN;
"//"                   return FDIV;
"//="                  return FDIV_ASSIGN;
"&="                   return BAND_ASSIGN;
"|="                   return BOR_ASSIGN;
"~="                   return BXOR_ASSIGN;
">>="                  return SHR_ASSIGN;
"<<="                  return SHL_ASSIGN;
">>>="                 return SHR_U_ASSIGN;
".="                   return CONCAT_ASSIGN;

<emit,INITIAL>".macro_const" {
        char *macro_name   = NULL;
        int   start_cond   = YY_START;
        int   macro_exists = 0;
        int   c;
        int   start_line;

        BEGIN(macro);
        c = yylex_skip(valp, interp, " ", yyscanner);

        if (c != IDENTIFIER)
            IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
               "Constant names must be identifiers");

        IMCC_INFO(interp)->cur_macro_name = macro_name = valp->s;
        start_line                        = IMCC_INFO(interp)->line;

        c = yylex_skip(valp, interp, " ", yyscanner);

        if (c != INTC && c != FLOATC && c != STRINGC && c != REG)
            IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
                "Constant '%s' value must be a number, "
                "stringliteral or register", macro_name);

        /* macro_name becomes a hash key
         * the first one needs to remain; destroying the hash frees it
         * subsequent macro_names need destruction here to avoid leaks */
        if (find_macro(interp, macro_name))
            macro_exists = 1;

        define_macro(interp, macro_name, NULL, valp->s, start_line);
        mem_sys_free(valp->s);

        /* don't leak these */
        if (macro_exists)
            mem_sys_free(macro_name);

        IMCC_INFO(interp)->cur_macro_name = NULL;

        BEGIN(start_cond);
        return MACRO;
    }

<emit,INITIAL>".macro" {
        return read_macro(valp, interp, yyscanner);
    }

<emit,INITIAL>".include" {
        const int c = yylex(valp, yyscanner, interp);

        if (c != STRINGC)
            return c;

        /* STRINGCs have a mem_sys_strdup()ed valp->s */
        mem_sys_free(valp->s);
        YYCHOP();
        include_file(interp, yytext + 1, yyscanner);
    }

<emit,INITIAL>{ID}"$:" {
        if (valp) {
            char *label;
            size_t len;

            YYCHOP();
            YYCHOP();

            if (!IMCC_INFO(interp)->frames || !IMCC_INFO(interp)->frames->label)
                    IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR, "missing space?");

            len = yyleng + 10;
            label = (char *)mem_sys_allocate(len);
            snprintf(label, len, "%s%d", yytext, IMCC_INFO(interp)->frames->label);

            /* XXX: free valp->s if it exists? */
            valp->s = label;
        }

        return LABEL;
    }

<emit,INITIAL>{ID}"$" {

        if (valp) {
            char *label;
            size_t len;
            YYCHOP();

            if (!IMCC_INFO(interp)->frames || !IMCC_INFO(interp)->frames->label)
                IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR, "missing space?");

            len = yyleng + 10;
            label = (char *)mem_sys_allocate(len);
            snprintf(label, len, "%s%d", yytext, IMCC_INFO(interp)->frames->label);

            /* XXX: free valp->s if it exists? */
            valp->s = label;
        }

        return IDENTIFIER;
     }

<emit,INITIAL>","             return COMMA;

<emit,INITIAL>{ID}":" {
        /* trim last ':' */
        YYCHOP();

        if (valp)
            valp->s = mem_sys_strdup(yytext);

        return LABEL;
    }

<emit,INITIAL>{DOT}{LETTER}{LETTERDIGIT}* {
        char   * const macro_name = mem_sys_strdup(yytext + 1);
        int failed = expand_macro(interp, macro_name, yyscanner);
        mem_sys_free(macro_name);
        if (!failed) {
            yyless(1);
            return DOT;
        }
    }



<*>{FLOATNUM}         DUP_AND_RET(valp, FLOATC);
<*>{SIGN}?{DIGIT}+    DUP_AND_RET(valp, INTC);
<*>{HEX}              DUP_AND_RET(valp, INTC);
<*>{BIN}              DUP_AND_RET(valp, INTC);
<*>{OCT}              DUP_AND_RET(valp, INTC);

<*>{BIGINT} {
        valp->s = mem_sys_strdup(yytext);

        /* trailing 'L' */
        valp->s[strlen(valp->s) - 1] = '\0';

        /* no BIGINT native format yet */
        return STRINGC;
    }

<*>{STRINGCONSTANT} {
        valp->s = mem_sys_strdup(yytext);

        return STRINGC;
    }

<*>"<<"{STRINGCONSTANT} {
        macro_frame_t *frame;

        /* Save the string we want to mark the end of the heredoc and snip
           off newline and quote. */
        if (IMCC_INFO(interp)->frames->heredoc_rest)
            IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR, "nested heredoc not supported");
        IMCC_INFO(interp)->heredoc_end = mem_sys_strdup(yytext + 3);
        IMCC_INFO(interp)->heredoc_end[strlen(IMCC_INFO(interp)->heredoc_end) - 1] = 0;

        if (!strlen(IMCC_INFO(interp)->heredoc_end))
            IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR, "empty heredoc delimiter");

        frame                     = new_frame(interp);
        frame->s.next             = (parser_state_t *)IMCC_INFO(interp)->frames;
        IMCC_INFO(interp)->frames = frame;

        /* Start slurping up the heredoc. */
        IMCC_INFO(interp)->heredoc_content    = (char *)mem_sys_allocate(2);

        /* preserve delim */
        IMCC_INFO(interp)->heredoc_content[0] = yytext[2];

        /* eos */
        IMCC_INFO(interp)->heredoc_content[1] = 0;
        yy_push_state(heredoc1, yyscanner);
    }

<*>{UNICODE} {
        /* charset:"..." */
        valp->s = mem_sys_strdup(yytext);

        /* this is actually not unicode but a string with a charset */
        return USTRINGC;
    }

<emit,INITIAL>\$I[0-9]+ {
        if (valp) (valp)->s = yytext;
        if (IMCC_INFO(interp)->state->pasm_file)
                IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
                    "'%s' is not a valid register name in pasm mode", yytext);
        return IREG;
    }

<emit,INITIAL>\$N[0-9]+ {
        if (valp) (valp)->s = yytext;
        if (IMCC_INFO(interp)->state->pasm_file)
                IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
                    "'%s' is not a valid register name in pasm mode", yytext);
        return NREG;
    }

<emit,INITIAL>\$S[0-9]+ {
        if (valp) (valp)->s = yytext;
        if (IMCC_INFO(interp)->state->pasm_file)
                IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
                    "'%s' is not a valid register name in pasm mode", yytext);
        return SREG;
    }

<emit,INITIAL>\$P[0-9]+ {
        if (valp) (valp)->s = yytext;
        if (IMCC_INFO(interp)->state->pasm_file)
                IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
                    "'%s' is not a valid register name in pasm mode", yytext);
        return PREG;
    }

<emit,INITIAL>\$[a-zA-Z0-9]+ {
        IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
            "'%s' is not a valid register name", yytext);
    }

<emit,INITIAL>[ISNP]{DIGIT}{DIGIT}? {
        if (IMCC_INFO(interp)->state->pasm_file == 0)
            IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR,
            "'%s' is only a valid register name in PASM mode", yytext);

        if (valp)
            valp->s = mem_sys_strdup(yytext);

        return REG;
    }

<emit,INITIAL>{ID} {
        if (!IMCC_INFO(interp)->is_def) {
            SymReg *r = find_sym(interp, yytext);

            if (r && (r->type & (VTIDENTIFIER|VT_CONSTP))) {
                valp->sr = r;
                return VAR;
            }

            if (IMCC_INFO(interp)->cur_unit
            &&  IMCC_INFO(interp)->cur_unit->instructions
            && (r = IMCC_INFO(interp)->cur_unit->instructions->symregs[0])
            &&  r->pcc_sub)
            {
                if (((r->pcc_sub->pragma & P_METHOD)
                ||   (IMCC_INFO(interp)->cur_unit->is_vtable_method))
                &&   !strcmp(yytext, "self")) {
                    valp->sr = mk_ident(interp, "self", 'P');
                    IMCC_INFO(interp)->cur_unit->type |= IMC_HAS_SELF;
                    return VAR;
                }
            }
        }

        valp->s = mem_sys_strdup(yytext);
        return (!IMCC_INFO(interp)->is_def && is_op(interp, valp->s) ? PARROT_OP : IDENTIFIER);
    }


<emit,INITIAL>{WS}+ /* skip */;

<emit,cmt1,cmt2,cmt3,cmt4,cmt5,INITIAL>. {
        /* catch all except for state macro */
        return yytext[0];
    }

<emit><<EOF>> {
        BEGIN(INITIAL);

        if (IMCC_INFO(interp)->frames->s.pasm_file) {
            IMCC_INFO(interp)->frames->s.pasm_file = 2;
            return EOM;
        }

        return 0;
    }

<INITIAL><<EOF>> yyterminate();

<macro>".endm"         DUP_AND_RET(valp, ENDM);

<macro>{WS}*{EOL} {
        IMCC_INFO(interp)->line++;
        DUP_AND_RET(valp, '\n');
    }

<macro>"$"{ID}":"  return LABEL;

<macro>".label"{WS}+ {

        if (yylex(valp, yyscanner, interp) != LABEL)
                IMCC_fataly(interp, EXCEPTION_SYNTAX_ERROR, "LABEL expected");

        if (valp) {
            char *label;
            size_t len;
            YYCHOP();

            len = strlen(IMCC_INFO(interp)->cur_macro_name) + yyleng + 15;
            label = (char *)mem_sys_allocate(len);

            snprintf(label, len, "local__%s__%s__$:",
                IMCC_INFO(interp)->cur_macro_name, yytext+1);

            if (valp->s)
                mem_sys_free(valp->s);
            valp->s = label;
        }

        return LABEL;
    }

<macro>".$"{ID} {
        if (valp) {
            const size_t len = strlen(IMCC_INFO(interp)->cur_macro_name) + yyleng + 12;
            char * const label = (char *)mem_sys_allocate(len);

            snprintf(label, len, "local__%s__%s__$",
                IMCC_INFO(interp)->cur_macro_name, yytext+2);

            if (valp->s)
                mem_sys_free(valp->s);
            valp->s = label;
        }

        return IDENTIFIER;
    }

<macro>^{WS}+                       /* skip leading ws */;
<macro>{WS}+                        DUP_AND_RET(valp, ' ');
<macro>[SNIP]{DIGITS}               DUP_AND_RET(valp, REG);
<macro>"$"[SNIP]{DIGITS}            DUP_AND_RET(valp, REG);
<macro>{ID}                         DUP_AND_RET(valp, IDENTIFIER);
<macro>{DOT}{ID}                    DUP_AND_RET(valp, MACRO);
<macro>.                            DUP_AND_RET(valp, yytext[0]);
<macro><<EOF>>                      yyterminate();

%%