@@ -1,5 +1,15 @@
Revision history for Perl extension Compiler-Lexer.
+0.22 2015-02-22T02:59:18Z
+ - supported newly syntax from 5.20.0
+ - fixed parsing bugs (format/v-string/variable)
+
+0.21 2015-01-26T05:59:24Z
+ - added authority
+
+0.20 2015-01-26T03:17:35Z
+ - support HereDocumentBareTag (e.g. <<BARE)
+
0.19 2014-07-23T07:32:47Z
- fixed some issues (foo'bar and q'foobar' and so on)
@@ -42,6 +42,12 @@ t/issue_35.t
t/issue_38.t
t/issue_39.t
t/issue_40.t
+t/issue_42.t
+t/issue_43.t
+t/issue_44.t
+t/issue_45.t
+t/issue_48.t
+t/issue_53.t
t/issue_reports.t
t/package.t
t/perl6.t
@@ -4,7 +4,7 @@
"Masaaki Goshima (goccy) <goccy(at)cpan.org>"
],
"dynamic_config" : 0,
- "generated_by" : "Minilla/v2.1.1",
+ "generated_by" : "Minilla/v2.3.0",
"license" : [
"perl_5"
],
@@ -17,7 +17,8 @@
"directory" : [
"t",
"examples",
- "builder"
+ "builder",
+ "experiments"
]
},
"prereqs" : {
@@ -52,14 +53,14 @@
},
"test" : {
"requires" : {
- "Test::More" : "0"
+ "Test::More" : "0.95"
}
}
},
"provides" : {
"Compiler::Lexer" : {
"file" : "lib/Compiler/Lexer.pm",
- "version" : "0.19"
+ "version" : "0.22"
},
"Compiler::Lexer::Kind" : {
"file" : "lib/Compiler/Lexer/Constants.pm"
@@ -85,15 +86,17 @@
"web" : "https://github.com/goccy/p5-Compiler-Lexer"
}
},
- "version" : "0.19",
+ "version" : "0.22",
+ "x_authority" : "cpan:GOCCY",
"x_contributors" : [
"tokuhirom <tokuhirom@gmail.com>",
"Reini Urban <rurban@cpanel.net>",
"Fumihiro Itoh <fmhrit@gmail.com>",
"Masaaki Goshima <masaaki.goshima@mixi.co.jp>",
- "moznion <moznion@gmail.com>",
"Olivier Mengué <dolmen@cpan.org>",
"Syohei YOSHIDA <syohex@gmail.com>",
- "goccy <goccy54@gmail.com>"
+ "brian d foy <brian.d.foy@gmail.com>",
+ "moznion <moznion@gmail.com>",
+ "Masaaki Goshima <goccy54@gmail.com>"
]
}
@@ -6,12 +6,12 @@ build_requires:
Devel::PPPort: '3.19'
ExtUtils::MakeMaker: '6.59'
ExtUtils::ParseXS: '2.21'
- Test::More: '0'
+ Test::More: '0.95'
configure_requires:
Module::Build: '0.38'
Module::Build::XSUtil: '0.02'
dynamic_config: 0
-generated_by: 'Minilla/v2.1.1, CPAN::Meta::Converter version 2.141520'
+generated_by: 'Minilla/v2.3.0, CPAN::Meta::Converter version 2.143240'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
@@ -22,10 +22,11 @@ no_index:
- t
- examples
- builder
+ - experiments
provides:
Compiler::Lexer:
file: lib/Compiler/Lexer.pm
- version: '0.19'
+ version: '0.22'
Compiler::Lexer::Kind:
file: lib/Compiler/Lexer/Constants.pm
Compiler::Lexer::SyntaxType:
@@ -41,13 +42,15 @@ resources:
bugtracker: https://github.com/goccy/p5-Compiler-Lexer/issues
homepage: https://github.com/goccy/p5-Compiler-Lexer
repository: git://github.com/goccy/p5-Compiler-Lexer.git
-version: '0.19'
+version: '0.22'
+x_authority: cpan:GOCCY
x_contributors:
- 'tokuhirom <tokuhirom@gmail.com>'
- 'Reini Urban <rurban@cpanel.net>'
- 'Fumihiro Itoh <fmhrit@gmail.com>'
- 'Masaaki Goshima <masaaki.goshima@mixi.co.jp>'
- - 'moznion <moznion@gmail.com>'
- 'Olivier Mengué <dolmen@cpan.org>'
- 'Syohei YOSHIDA <syohex@gmail.com>'
- - 'goccy <goccy54@gmail.com>'
+ - 'brian d foy <brian.d.foy@gmail.com>'
+ - 'moznion <moznion@gmail.com>'
+ - 'Masaaki Goshima <goccy54@gmail.com>'
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/goccy/p5-Compiler-Lexer.png?branch=master)](https://travis-ci.org/goccy/p5-Compiler-Lexer) [![Coverage Status](https://coveralls.io/repos/goccy/p5-Compiler-Lexer/badge.png?branch=master)](https://coveralls.io/r/goccy/p5-Compiler-Lexer?branch=master)
+[![Build Status](https://travis-ci.org/goccy/p5-Compiler-Lexer.svg?branch=master)](https://travis-ci.org/goccy/p5-Compiler-Lexer) [![Coverage Status](https://img.shields.io/coveralls/goccy/p5-Compiler-Lexer/master.svg)](https://coveralls.io/r/goccy/p5-Compiler-Lexer?branch=master)
# NAME
Compiler::Lexer - Lexical Analyzer for Perl5
@@ -26,7 +26,7 @@ Compiler::Lexer - Lexical Analyzer for Perl5
create new instance.
You can create object from $options in hash reference.
- __options list__
+ **options list**
- filename
- verbose : includes token of Pod, Comment and WhiteSpace
@@ -13,5 +13,5 @@ on 'build' => sub {
};
on 'test' => sub {
- requires 'Test::More';
+ requires 'Test::More', '0.95';
};
@@ -98,36 +98,6 @@ DISTRIBUTIONS
Test::Builder::Module 0
Test::More 0.61
perl 5.008_001
- YAML-0.84
- pathname: M/MS/MSTROUT/YAML-0.84.tar.gz
- provides:
- Test::YAML 0.84
- Test::YAML::Filter undef
- YAML 0.84
- YAML::Any 0.84
- YAML::Dumper 0.84
- YAML::Dumper::Base 0.84
- YAML::Error 0.84
- YAML::Loader 0.84
- YAML::Loader::Base 0.84
- YAML::Marshall 0.84
- YAML::Mo undef
- YAML::Node 0.84
- YAML::Tag 0.84
- YAML::Type::blessed undef
- YAML::Type::code undef
- YAML::Type::glob undef
- YAML::Type::ref undef
- YAML::Type::regexp undef
- YAML::Type::undef undef
- YAML::Types 0.84
- YAML::Warning undef
- yaml_mapping undef
- yaml_scalar undef
- yaml_sequence undef
- requirements:
- ExtUtils::MakeMaker 6.59
- perl 5.008001
YAML-LibYAML-0.41
pathname: I/IN/INGY/YAML-LibYAML-0.41.tar.gz
provides:
@@ -55,13 +55,13 @@ token_type:
T_AndBitEqual: 54
T_AndEqual: 58
T_Annotation: 135
- T_Argument: 204
+ T_Argument: 205
T_ArgumentArray: 136
- T_Array: 187
- T_ArrayAt: 198
+ T_Array: 188
+ T_ArrayAt: 199
T_ArrayDereference: 113
- T_ArrayRef: 196
- T_ArraySet: 200
+ T_ArrayRef: 197
+ T_ArraySet: 201
T_ArraySize: 66
T_ArraySizeDereference: 121
T_ArrayVar: 168
@@ -75,7 +75,7 @@ token_type:
T_Break: 88
T_BuiltinFunc: 70
T_CORE: 77
- T_Call: 203
+ T_Call: 204
T_CallDecl: 130
T_Class: 129
T_CodeDereference: 116
@@ -83,14 +83,14 @@ token_type:
T_CodeVar: 167
T_Colon: 105
T_Comma: 104
- T_Comment: 208
+ T_Comment: 209
T_Compare: 33
T_ConstValue: 138
T_Continue: 86
T_DESTROY: 78
T_DataWord: 74
T_Dec: 46
- T_Default: 206
+ T_Default: 207
T_DefaultEqual: 49
T_DefaultOperator: 60
T_DefaultStmt: 103
@@ -108,30 +108,31 @@ token_type:
T_FieldDecl: 158
T_ForStmt: 133
T_ForeachStmt: 134
- T_Format: 183
- T_FormatDecl: 182
- T_FormatEnd: 184
- T_Function: 202
+ T_Format: 184
+ T_FormatDecl: 183
+ T_FormatEnd: 185
+ T_Function: 203
T_FunctionDecl: 63
T_GivenStmt: 102
T_Glob: 11
- T_GlobalArrayVar: 194
- T_GlobalHashVar: 195
- T_GlobalVar: 193
+ T_GlobalArrayVar: 195
+ T_GlobalHashVar: 196
+ T_GlobalVar: 194
T_GlobalVarDecl: 162
T_Goto: 85
T_Greater: 7
T_GreaterEqual: 29
T_Handle: 89
T_HandleDelim: 152
- T_Hash: 188
- T_HashAt: 199
+ T_Hash: 189
+ T_HashAt: 200
T_HashDereference: 114
- T_HashRef: 197
- T_HashSet: 201
+ T_HashRef: 198
+ T_HashSet: 202
T_HashVar: 169
- T_HereDocument: 180
- T_HereDocumentEnd: 181
+ T_HereDocument: 181
+ T_HereDocumentBareTag: 179
+ T_HereDocumentEnd: 182
T_HereDocumentExecTag: 178
T_HereDocumentRawTag: 177
T_HereDocumentTag: 176
@@ -152,11 +153,11 @@ token_type:
T_Less: 8
T_LessEqual: 30
T_LibraryDirectories: 140
- T_List: 205
- T_LocalArrayVar: 191
+ T_List: 206
+ T_LocalArrayVar: 192
T_LocalDecl: 90
- T_LocalHashVar: 192
- T_LocalVar: 190
+ T_LocalHashVar: 193
+ T_LocalVar: 191
T_LocalVarDecl: 161
T_Method: 64
T_Mod: 5
@@ -172,20 +173,28 @@ token_type:
T_Not: 68
T_NotBitEqual: 56
T_NotEqual: 37
- T_Object: 185
- T_Operator: 189
+ T_Object: 186
+ T_Operator: 190
T_Or: 53
T_OrBitEqual: 55
T_OrEqual: 57
T_OurDecl: 91
T_Package: 128
- T_Pod: 207
+ T_Pod: 208
T_Pointer: 125
T_PolymorphicCompare: 34
+ T_PostDeref: 212
+ T_PostDerefArraySliceCloseBracket: 215
+ T_PostDerefArraySliceOpenBracket: 214
+ T_PostDerefCodeCloseParen: 219
+ T_PostDerefCodeOpenParen: 218
+ T_PostDerefHashSliceCloseBrace: 217
+ T_PostDerefHashSliceOpenBrace: 216
+ T_PostDerefStar: 213
T_PowerEqual: 48
T_ProgramArgument: 139
T_Prototype: 165
- T_RawHereDocument: 179
+ T_RawHereDocument: 180
T_RawString: 173
T_Redo: 82
T_Ref: 10
@@ -194,7 +203,7 @@ token_type:
T_RegDelim: 151
T_RegDoubleQuote: 146
T_RegExec: 148
- T_RegExp: 186
+ T_RegExp: 187
T_RegList: 147
T_RegMatch: 150
T_RegMiddleDelim: 153
@@ -244,7 +253,7 @@ token_type:
T_ThreeTermOperator: 6
T_ToDo: 61
T_TypeRef: 159
- T_Undefined: 210
+ T_Undefined: 211
T_UnlessStmt: 99
T_UntilStmt: 100
T_UseDecl: 93
@@ -254,4 +263,4 @@ token_type:
T_VersionString: 175
T_WhenStmt: 101
T_WhileStmt: 132
- T_WhiteSpace: 209
+ T_WhiteSpace: 210
@@ -606,6 +606,7 @@ Term VersionString
Term HereDocumentTag
Term HereDocumentRawTag
Term HereDocumentExecTag
+Term HereDocumentBareTag
Term RawHereDocument
Term HereDocument
Term HereDocumentEnd
@@ -637,4 +638,12 @@ Term Default undef
Verbose Pod
Verbose Comment
Verbose WhiteSpace
-Undefined Undefined
+Symbol PostDeref
+Symbol PostDerefStar
+Symbol PostDerefArraySliceOpenBracket
+Symbol PostDerefArraySliceCloseBracket
+Symbol PostDerefHashSliceOpenBrace
+Symbol PostDerefHashSliceCloseBrace
+Symbol PostDerefCodeOpenParen
+Symbol PostDerefCodeCloseParen
+Undefined Undefined
@@ -181,6 +181,7 @@ typedef enum {
HereDocumentTag,
HereDocumentRawTag,
HereDocumentExecTag,
+ HereDocumentBareTag,
RawHereDocument,
HereDocument,
HereDocumentEnd,
@@ -212,7 +213,15 @@ typedef enum {
Pod,
Comment,
WhiteSpace,
- Undefined
+ Undefined,
+ PostDeref,
+ PostDerefStar,
+ PostDerefArraySliceOpenBracket,
+ PostDerefArraySliceCloseBracket,
+ PostDerefHashSliceOpenBrace,
+ PostDerefHashSliceCloseBrace,
+ PostDerefCodeOpenParen,
+ PostDerefCodeCloseParen
} Type;
}
@@ -191,7 +191,7 @@ public:
bool isRegexStarted;
bool isPrototypeStarted;
bool isFormatStarted;
- bool isFormatDeclared;
+ Token *formatDeclaredToken;
bool commentFlag;
bool hereDocumentFlag;
bool skipFlag;
@@ -216,10 +216,13 @@ public:
bool isRegexEndDelim(LexContext *ctx);
bool isRegexDelim(Token *prev_token, char symbol);
bool isHereDocument(LexContext *ctx, Token *prev_token);
+ bool isPostDeref(LexContext *ctx);
bool isFormat(LexContext *ctx, Token *tk);
bool isVersionString(LexContext *ctx);
bool isSkip(LexContext *ctx);
bool isPrototype(LexContext *ctx);
+ bool isRegexOptionPrevToken(LexContext *ctx);
+ bool isRegexOption(const char *opt);
char getRegexDelim(LexContext *ctx);
Token *scanQuote(LexContext *ctx, char quote);
Token *scanNewLineKeyword(LexContext *ctx);
@@ -228,6 +231,7 @@ public:
Token *scanCurSymbol(LexContext *ctx, char symbol);
Token *scanDoubleCharacterOperator(LexContext *ctx, char symbol, char next_ch);
Token *scanTripleCharacterOperator(LexContext *ctx, char symbol, char next_ch, char after_next_ch);
+ Token *scanPostDeref(LexContext *ctx);
Token *scanSymbol(LexContext *ctx);
Token *scanWordDelimiter(LexContext *ctx);
Token *scanReference(LexContext *ctx);
@@ -182,38 +182,47 @@ use constant {
T_HereDocumentTag => 176,
T_HereDocumentRawTag => 177,
T_HereDocumentExecTag => 178,
- T_RawHereDocument => 179,
- T_HereDocument => 180,
- T_HereDocumentEnd => 181,
- T_FormatDecl => 182,
- T_Format => 183,
- T_FormatEnd => 184,
- T_Object => 185,
- T_RegExp => 186,
- T_Array => 187,
- T_Hash => 188,
- T_Operator => 189,
- T_LocalVar => 190,
- T_LocalArrayVar => 191,
- T_LocalHashVar => 192,
- T_GlobalVar => 193,
- T_GlobalArrayVar => 194,
- T_GlobalHashVar => 195,
- T_ArrayRef => 196,
- T_HashRef => 197,
- T_ArrayAt => 198,
- T_HashAt => 199,
- T_ArraySet => 200,
- T_HashSet => 201,
- T_Function => 202,
- T_Call => 203,
- T_Argument => 204,
- T_List => 205,
- T_Default => 206,
- T_Pod => 207,
- T_Comment => 208,
- T_WhiteSpace => 209,
- T_Undefined => 210
+ T_HereDocumentBareTag => 179,
+ T_RawHereDocument => 180,
+ T_HereDocument => 181,
+ T_HereDocumentEnd => 182,
+ T_FormatDecl => 183,
+ T_Format => 184,
+ T_FormatEnd => 185,
+ T_Object => 186,
+ T_RegExp => 187,
+ T_Array => 188,
+ T_Hash => 189,
+ T_Operator => 190,
+ T_LocalVar => 191,
+ T_LocalArrayVar => 192,
+ T_LocalHashVar => 193,
+ T_GlobalVar => 194,
+ T_GlobalArrayVar => 195,
+ T_GlobalHashVar => 196,
+ T_ArrayRef => 197,
+ T_HashRef => 198,
+ T_ArrayAt => 199,
+ T_HashAt => 200,
+ T_ArraySet => 201,
+ T_HashSet => 202,
+ T_Function => 203,
+ T_Call => 204,
+ T_Argument => 205,
+ T_List => 206,
+ T_Default => 207,
+ T_Pod => 208,
+ T_Comment => 209,
+ T_WhiteSpace => 210,
+ T_Undefined => 211,
+ T_PostDeref => 212,
+ T_PostDerefStar => 213,
+ T_PostDerefArraySliceOpenBracket => 214,
+ T_PostDerefArraySliceCloseBracket => 215,
+ T_PostDerefHashSliceOpenBrace => 216,
+ T_PostDerefHashSliceCloseBrace => 217,
+ T_PostDerefCodeOpenParen => 218,
+ T_PostDerefCodeCloseParen => 219
};
package Compiler::Lexer::SyntaxType;
@@ -11,7 +11,7 @@ our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw() ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw();
-our $VERSION = '0.19';
+our $VERSION = '0.22';
require XSLoader;
XSLoader::load(__PACKAGE__, $VERSION);
@@ -1,10 +1,11 @@
badges = ["travis", "coveralls"]
+authority = "cpan:GOCCY"
[build]
build_class = "builder::MyBuilder"
[no_index]
-directory = ['t', 'examples', 'builder']
+directory = ['t', 'examples', 'builder', 'experiments']
[FileGatherer]
-exclude_match = ['^t/perl/.*', '^example/bigdata.pl']
+exclude_match = ['^t/perl/.*', '^example/bigdata.pl', '^experiments/' ]
@@ -6,6 +6,7 @@ namespace TokenType = Enum::Token::Type;
namespace SyntaxType = Enum::Parser::Syntax;
namespace TokenKind = Enum::Token::Kind;
#define ITER_CAST(T, it) (T)*(it)
+#define EXTEND_BUFFER_SIZE (16) //This parameter is needed to correspond #53
Module::Module(const char *name_, const char *args_)
: name(name_), args(args_) {}
@@ -14,12 +15,12 @@ LexContext::LexContext(const char *filename, char *script)
: progress(0), buffer_idx(0)
{
script_size = strlen(script) + 1;
- token_buffer = (char *)malloc((script_size + 1) * 2);
+ token_buffer = (char *)malloc((script_size + EXTEND_BUFFER_SIZE) * 2);
buffer_head = token_buffer;
token_buffer[0] = EOL;
prev_type = TokenType::Undefined;
smgr = new ScriptManager(script);
- tmgr = new TokenManager(script_size);
+ tmgr = new TokenManager(script_size + EXTEND_BUFFER_SIZE);
finfo.start_line_num = 1;
finfo.filename = filename;
}
@@ -46,6 +47,7 @@ Tokens *Lexer::tokenize(char *script)
char ch = smgr->currentChar();
for (; ch != EOL; smgr->idx++) {
ch = smgr->currentChar();
+ if (smgr->end()) break;
if (ch == '\n') ctx->finfo.start_line_num++;
if (scanner.isSkip(ctx)) {
continue;
@@ -86,17 +88,43 @@ Tokens *Lexer::tokenize(char *script)
continue;
}
//fall through
- case '=': case '^': case '~': case '@':
+
+ case '$': case '@': case '%': case '&': case '*': // all of the sigils
+ if (scanner.isPostDeref(ctx)) {
+ tk = scanner.scanPostDeref(ctx);
+ tmgr->add(tk);
+ } else {
+ tmgr->add(scanner.scanSymbol(ctx));
+ }
+ smgr->idx += ctx->progress;
+ ctx->progress = 0;
+ break;
+ case '=': case '^': case '~':
case ',': case ':': case ';': case '+':
- case '<': case '>': case '&': case '|':
- case '!': case '*': case '/': case '%':
+ case '<': case '>': case '|':
+ case '!': case '/':
case '(': case ')': case '{': case '}':
- case '[': case ']': case '?': case '$':
+ case '[': case ']': case '?':
case '\\':
tmgr->add(scanner.scanSymbol(ctx));
smgr->idx += ctx->progress;
ctx->progress = 0;
break;
+ case 'x': {
+ char next_ch = smgr->nextChar();
+ char after_next_ch = smgr->afterNextChar();
+ if (next_ch != '=' || ctx->existsBuffer() ||
+ after_next_ch == '=' ||
+ after_next_ch == '>' ||
+ after_next_ch == '~') {
+ ctx->writeBuffer(ch);
+ } else {
+ tmgr->add(scanner.scanSymbol(ctx));
+ smgr->idx += ctx->progress;
+ ctx->progress = 0;
+ }
+ break;
+ }
case '\n':
tmgr->add(scanner.scanLineDelimiter(ctx));
tmgr->add(scanner.scanWhiteSpace(ctx)); // For newline character
@@ -229,14 +257,14 @@ void Lexer::prepare(Tokens *tokens)
Token *t = ITER_CAST(Token *, it);
switch (t->info.type) {
case TokenType::HereDocumentTag: case TokenType::HereDocumentRawTag:
- case TokenType::HereDocumentExecTag:
+ case TokenType::HereDocumentExecTag: case TokenType::HereDocumentBareTag:
tag_pos = it;
break;
case TokenType::HereDocument: {
assert(tag_pos != start_pos && "ERROR!: nothing use HereDocumentTag");
Token *tag = ITER_CAST(Token *, tag_pos);
switch (tag->info.type) {
- case TokenType::HereDocumentTag:
+ case TokenType::HereDocumentTag: case TokenType::HereDocumentBareTag:
tag->info.type = Enum::Token::Type::RegDoubleQuote;
tag->info.kind = Enum::Token::Kind::RegPrefix;
tag->info.name = "RegDoubleQuote";
@@ -6,9 +6,9 @@ namespace TokenKind = Enum::Token::Kind;
Scanner::Scanner() :
isStringStarted(false), isRegexStarted(false), isPrototypeStarted(false), isFormatStarted(false),
- isFormatDeclared(false), commentFlag(false), hereDocumentFlag(false), skipFlag(false),
+ formatDeclaredToken(NULL), commentFlag(false), hereDocumentFlag(false), skipFlag(false),
regex_delim(0), regex_middle_delim(0),
- brace_count_inner_regex(0), bracket_count_inner_regex(0), cury_brace_count_inner_regex(0)
+ brace_count_inner_regex(0), bracket_count_inner_regex(0), cury_brace_count_inner_regex(0)
{
const char *regex_prefixes[] = {
"q", "qq", "qw", "qx", "qr", "m", NULL
@@ -69,7 +69,7 @@ Token *Scanner::scanQuote(LexContext *ctx, char quote)
namespace_tk->info = tmgr->getTokenInfo(TokenType::Namespace);
tmgr->add(namespace_tk);
ctx->clearBuffer();
-
+
ctx->writeBuffer(cur_ch);
Token *namespace_resolver = tmgr->new_Token(ctx->buffer(), ctx->finfo);
namespace_resolver->info = tmgr->getTokenInfo(TokenType::NamespaceResolver);
@@ -157,8 +157,19 @@ bool Scanner::scanNegativeNumber(LexContext *ctx, char number)
if (number != EOL) {
num_buffer[0] = number;
if (atoi(num_buffer) > 0 || number == '0') {
- //negative number
- ctx->writeBuffer('-');
+ if (ctx->existsBuffer()) {
+ ctx->tmgr->add(ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo));
+ ctx->clearBuffer();
+ //sub operator
+ ctx->writeBuffer('-');
+ Token *sub_operator = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
+ sub_operator->info = ctx->tmgr->getTokenInfo(TokenType::Sub);
+ ctx->clearBuffer();
+ ctx->tmgr->add(sub_operator);
+ } else {
+ //negative number
+ ctx->writeBuffer('-');
+ }
return true;
}
}
@@ -265,7 +276,7 @@ bool Scanner::isRegexDelim(Token *prev_token, char symbol)
(symbol != '-' && symbol != '=' && symbol != ',' && symbol != ')') &&
regex_prefix_map.find(prev_tk) != regex_prefix_map.end()) {
return true;
- } else if (regex_delim == 0 && prev_token &&
+ } else if (regex_delim == 0 && prev_token &&
(prev_token->info.kind == TokenKind::RegPrefix || prev_token->info.kind == TokenKind::RegReplacePrefix)) {
return true;
}
@@ -275,7 +286,7 @@ bool Scanner::isRegexDelim(Token *prev_token, char symbol)
prev_type == TokenType::ExecString) return false;
if (symbol != '/') return false;
if (!prev_token) return true;
- if (symbol == '/' && (prev_tk == "xor" || prev_tk == "and")) return true;
+ if (symbol == '/' && (prev_tk == "xor" || prev_tk == "and" || prev_tk == "not" || prev_tk == "or")) return true;
if (strtod(prev_data, NULL)) return false;
if (prev_tk == "0") return false;
if (enable_regex_argument_func_map.find(prev_tk) != enable_regex_argument_func_map.end()) return true;
@@ -323,7 +334,7 @@ Token *Scanner::scanPrevSymbol(LexContext *ctx, char )
ret = ctx->tmgr->new_Token(token, ctx->finfo);
here_document_tag = string(token);
here_document_tag_tk = ret;
- ret->info = tmgr->getTokenInfo(TokenType::HereDocumentRawTag);
+ ret->info = tmgr->getTokenInfo(TokenType::HereDocumentBareTag);
} else {
ret = ctx->tmgr->new_Token(token, ctx->finfo);
}
@@ -332,6 +343,40 @@ Token *Scanner::scanPrevSymbol(LexContext *ctx, char )
return ret;
}
+bool Scanner::isRegexOption(const char *opt)
+{
+ size_t len = strlen(opt);
+ for (size_t i = 0; i < len; i++) {
+ char ch = opt[i];
+ switch (ch) {
+ case 'a': case 'c': case 'd': case 'e':
+ case 'g': case 'i': case 'm': case 'l':
+ case 'o': case 'p': case 'r': case 's':
+ case 'u': case 'x':
+ break;
+ default:
+ return false;
+ break;
+ }
+ }
+ return true;
+}
+
+bool Scanner::isRegexOptionPrevToken(LexContext *ctx)
+{
+ if (ctx->tmgr->size() < 2) return false;
+ Token *before_prev_token = ctx->tmgr->beforeLastToken();
+ Token *prev_token = ctx->tmgr->lastToken();
+ const char *data = prev_token->_data;
+ if (before_prev_token->info.type == TokenType::RegDelim &&
+ isalpha(data[0]) &&
+ string(data) != "or" &&
+ isRegexOption(data)) {
+ return true;
+ }
+ return false;
+}
+
Token *Scanner::scanCurSymbol(LexContext *ctx, char symbol)
{
Token *ret = NULL;
@@ -340,7 +385,8 @@ Token *Scanner::scanCurSymbol(LexContext *ctx, char symbol)
string prev_data = (prev_tk) ? prev_tk->_data : "";
int idx = ctx->tmgr->size() - 2;
string prev_before = (idx >= 0) ? string(ctx->tmgr->beforeLastToken()->_data) : "";
- if ((prev_before != "sub" && isRegexDelim(prev_tk, symbol)) ||
+ if ((prev_before != "sub" && !isRegexOptionPrevToken(ctx) &&
+ isRegexDelim(prev_tk, symbol)) ||
(prev_data == "{" && symbol == '/')) {
if (!isRegexEndDelim(ctx)) {
regex_delim = getRegexDelim(ctx);
@@ -358,7 +404,7 @@ Token *Scanner::scanCurSymbol(LexContext *ctx, char symbol)
ret = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
ret->info = tmgr->getTokenInfo(TokenType::RegDelim);
ctx->clearBuffer();
- } else if (symbol == '@' || symbol == '$' || symbol == '%') {// || symbol == '&') {
+ } else if (symbol == '@' || symbol == '$' || symbol == '%') { //|| symbol == '&')
ctx->writeBuffer(symbol);
} else if (symbol == ';') {
ctx->writeBuffer(symbol);
@@ -429,6 +475,121 @@ Token *Scanner::scanDoubleCharacterOperator(LexContext *ctx, char symbol, char n
return ret;
}
+/* Scanner::scanPostDeref
+
+The postfix dereference is a bit odd because we have to treat a sigil
+a bit special.
+
+Scalars are simple:
+
+ $scalar->$*
+
+Arrays have a special case with the last index, and support single
+element access and slices:
+
+ $array->@*
+ $array->$#*
+ $array->@[0]
+ $array->@[0,1]
+
+Hashes support single element access and slices:
+
+ $hash->%*
+ $array->%{key}
+ $array->%{key,key2}
+
+Code supports argument lists:
+
+ $code->&*
+ $code->&( arg, arg2 )
+
+Typeglobs have "keys" into the symbol table
+
+ $gref->**
+ $gref->*{SCALAR}
+
+*/
+
+Token *Scanner::scanPostDeref(LexContext *ctx)
+{
+ Token *ret = NULL;
+ Token *sigil_tk = NULL;
+
+ if (!isPostDeref(ctx)) return ret;
+
+ char symbol = ctx->smgr->currentChar();
+ ctx->writeBuffer(symbol);
+
+ if (symbol == '$') {
+ char next_ch = ctx->smgr->nextChar();
+ if (next_ch=='#') { // we have the last array index
+ symbol = ctx->smgr->forward(1);
+ ctx->writeBuffer(next_ch);
+ }
+ }
+
+ sigil_tk = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
+ sigil_tk->info = ctx->tmgr->getTokenInfo(TokenType::PostDeref);
+ ctx->clearBuffer();
+
+ // This is a bit odd because we add a Token directly instead of
+ // returning it and letting the rest of the system figure it out
+ ctx->tmgr->add(sigil_tk);
+
+ // We only care if it's a *. We'll let the rest of the tokenizer
+ // handle the slices, which would have [, {, (
+ char next_ch = ctx->smgr->nextChar();
+ if (next_ch != '*') return ret;
+
+ symbol = ctx->smgr->forward(1);
+ ctx->writeBuffer(symbol);
+ ret = ctx->tmgr->new_Token(ctx->buffer(), ctx->finfo);
+ ctx->clearBuffer();
+ ret->info = ctx->tmgr->getTokenInfo(TokenType::PostDerefStar);
+
+ return ret;
+}
+
+/* Scanner::isPostDeref
+
+See Scanner::scanPostDeref for the rules
+
+*/
+
+bool Scanner::isPostDeref(LexContext *ctx)
+{
+ Token *prev_token = ctx->tmgr->lastToken();
+ string prev_data = (prev_token) ? string(prev_token->_data) : "";
+ char symbol = ctx->smgr->currentChar();
+
+ // Should I check that the previous Token was Pointer
+ // instead of looking at the data
+ if (prev_data != "->") return false;
+
+ // do we need an isSigil method?
+ if (symbol != '$' && symbol != '@' && symbol != '%' && symbol != '&' && symbol != '*')
+ return false;
+
+ char next_ch = ctx->smgr->nextChar();
+
+ // scalar and array index case
+ if (symbol == '$' && ! ( next_ch == '*' || next_ch == '#' )) return false;
+
+ // array case
+ if (symbol == '@' && ! ( next_ch == '*' || next_ch == '[' )) return false;
+
+ // hash case
+ if (symbol == '%' && ! ( next_ch == '*' || next_ch == '{' )) return false;
+
+ // code case
+ if (symbol == '&' && ! ( next_ch == '*' || next_ch == '(' )) return false;
+
+ // typeglob case
+ if (symbol == '*' && ! ( next_ch == '*' || next_ch == '{' )) return false;
+
+ return true;
+}
+
Token *Scanner::scanSymbol(LexContext *ctx)
{
Token *ret = NULL;
@@ -437,8 +598,10 @@ Token *Scanner::scanSymbol(LexContext *ctx)
char next_ch = smgr->nextChar();
char after_next_ch = smgr->afterNextChar();
if (ctx->existsBuffer()) ctx->tmgr->add(scanPrevSymbol(ctx, symbol));
+
if (!isRegexStarted) {
- ret = scanTripleCharacterOperator(ctx, symbol, next_ch, after_next_ch);
+ ret = scanPostDeref(ctx);
+ if (!ret) ret = scanTripleCharacterOperator(ctx, symbol, next_ch, after_next_ch);
if (!ret) ret = scanDoubleCharacterOperator(ctx, symbol, next_ch);
}
if (!ret) ret = scanCurSymbol(ctx, symbol);
@@ -456,11 +619,16 @@ Token *Scanner::scanWordDelimiter(LexContext *ctx)
/* Key is HereDocument */
here_document_tag = string(token);
here_document_tag_tk = ret;
- ret->info = tmgr->getTokenInfo(TokenType::HereDocumentRawTag);
+ ret->info = tmgr->getTokenInfo(TokenType::HereDocumentBareTag);
} else if (string(token) == "format") {
- isFormatDeclared = true;
ret = ctx->tmgr->new_Token(token, ctx->finfo);
- ret->info = tmgr->getTokenInfo(TokenType::FormatDecl);
+
+ // if it has been declared `format` (means it has been in format context),
+ // this token should not be FormatDecl. Check here.
+ if (formatDeclaredToken == NULL) { // when it has not been in format context
+ ret->info = tmgr->getTokenInfo(TokenType::FormatDecl);
+ formatDeclaredToken = ret;
+ }
} else if (token[0] != '\n' || token[1] != EOL) {
ret = ctx->tmgr->new_Token(token, ctx->finfo);
}
@@ -517,10 +685,24 @@ Token *Scanner::scanLineDelimiter(LexContext *ctx)
Token *last_tk = ctx->tmgr->lastToken();
string data = (ret) ? string(ret->_data) :
(last_tk) ? string(last_tk->_data) : "";
- if (isFormatDeclared && data == "=") {
- isFormatDeclared = false;
- isFormatStarted = true;
- skipFlag = true;
+ if (formatDeclaredToken != NULL && data == "=") {
+ TokenManager *tmgr = ctx->tmgr;
+ Token *currentToken = tmgr->lastToken();
+ Token *prev_token = tmgr->previousToken(currentToken);
+ Token *before_prev_token = tmgr->beforePreviousToken(currentToken);
+ if (
+ (prev_token != NULL && prev_token->info.type != Enum::Token::Type::FormatDecl) &&
+ (before_prev_token != NULL && before_prev_token->info.type != Enum::Token::Type::FormatDecl)
+ ) {
+ // When reach here, maybe `FormatDecl` which was declared previous is invalid.
+ // So downgrade a doubtful token to `Undefined` and don't deal as format context.
+ formatDeclaredToken->info.type = Enum::Token::Type::Undefined;
+ } else {
+ // format context.
+ isFormatStarted = true;
+ skipFlag = true;
+ }
+ formatDeclaredToken = NULL;
} else if (here_document_tag != "") {
hereDocumentFlag = true;
skipFlag = true;
@@ -556,10 +738,10 @@ Token *Scanner::scanVersionString(LexContext *ctx)
TokenManager *tmgr = ctx->tmgr;
char *src = ctx->smgr->raw_script;
size_t i = ctx->smgr->idx;
- char *begin = src + i;
+ // char *begin = src + i;
char c = next(ctx, src, i);//NEXT();
Token *token = NULL;
- for (;(is_number(c) || c == '.') && c != EOL; c = next(ctx, src, i)) {}
+ for (;(is_number(c) || c == '.' || c == '_') && c != EOL; c = next(ctx, src, i)) {}
i -= 1;
char *buf = ctx->buffer();
buf[ctx->buffer_idx-1] = EOL;
@@ -575,7 +757,7 @@ Token *Scanner::scanNumber(LexContext *ctx)
TokenManager *tmgr = ctx->tmgr;
char *src = ctx->smgr->raw_script;
size_t i = ctx->smgr->idx;
- char *begin = src + i;
+ // char *begin = src + i;
int c = next(ctx, src, i);
Token *token = NULL;
assert((c == '.' || is_number(c)) && "It do not seem as Number");
@@ -452,6 +452,7 @@ TokenInfo decl_tokens[] = {
{Enum::Token::Type::HereDocumentTag, Enum::Token::Kind::Term, "HereDocumentTag", ""},
{Enum::Token::Type::HereDocumentRawTag, Enum::Token::Kind::Term, "HereDocumentRawTag", ""},
{Enum::Token::Type::HereDocumentExecTag, Enum::Token::Kind::Term, "HereDocumentExecTag", ""},
+ {Enum::Token::Type::HereDocumentBareTag, Enum::Token::Kind::Term, "HereDocumentBareTag", ""},
{Enum::Token::Type::RawHereDocument, Enum::Token::Kind::Term, "RawHereDocument", ""},
{Enum::Token::Type::HereDocument, Enum::Token::Kind::Term, "HereDocument", ""},
{Enum::Token::Type::HereDocumentEnd, Enum::Token::Kind::Term, "HereDocumentEnd", ""},
@@ -483,7 +484,15 @@ TokenInfo decl_tokens[] = {
{Enum::Token::Type::Pod, Enum::Token::Kind::Verbose, "Pod", ""},
{Enum::Token::Type::Comment, Enum::Token::Kind::Verbose, "Comment", ""},
{Enum::Token::Type::WhiteSpace, Enum::Token::Kind::Verbose, "WhiteSpace", ""},
- {Enum::Token::Type::Undefined, Enum::Token::Kind::Undefined, "Undefined", ""}
+ {Enum::Token::Type::Undefined, Enum::Token::Kind::Undefined, "Undefined", ""},
+ {Enum::Token::Type::PostDeref, Enum::Token::Kind::Symbol, "PostDeref", ""},
+ {Enum::Token::Type::PostDerefStar, Enum::Token::Kind::Symbol, "PostDerefStar", ""},
+ {Enum::Token::Type::PostDerefArraySliceOpenBracket, Enum::Token::Kind::Symbol, "PostDerefArraySliceOpenBracket", ""},
+ {Enum::Token::Type::PostDerefArraySliceCloseBracket, Enum::Token::Kind::Symbol, "PostDerefArraySliceCloseBracket", ""},
+ {Enum::Token::Type::PostDerefHashSliceOpenBrace, Enum::Token::Kind::Symbol, "PostDerefHashSliceOpenBrace", ""},
+ {Enum::Token::Type::PostDerefHashSliceCloseBrace, Enum::Token::Kind::Symbol, "PostDerefHashSliceCloseBrace", ""},
+ {Enum::Token::Type::PostDerefCodeOpenParen, Enum::Token::Kind::Symbol, "PostDerefCodeOpenParen", ""},
+ {Enum::Token::Type::PostDerefCodeCloseParen, Enum::Token::Kind::Symbol, "PostDerefCodeCloseParen", ""}
};
TokenInfo type_to_info[] = {
@@ -666,6 +675,7 @@ TokenInfo type_to_info[] = {
{Enum::Token::Type::HereDocumentTag, Enum::Token::Kind::Term, "HereDocumentTag", ""},
{Enum::Token::Type::HereDocumentRawTag, Enum::Token::Kind::Term, "HereDocumentRawTag", ""},
{Enum::Token::Type::HereDocumentExecTag, Enum::Token::Kind::Term, "HereDocumentExecTag", ""},
+ {Enum::Token::Type::HereDocumentBareTag, Enum::Token::Kind::Term, "HereDocumentBareTag", ""},
{Enum::Token::Type::RawHereDocument, Enum::Token::Kind::Term, "RawHereDocument", ""},
{Enum::Token::Type::HereDocument, Enum::Token::Kind::Term, "HereDocument", ""},
{Enum::Token::Type::HereDocumentEnd, Enum::Token::Kind::Term, "HereDocumentEnd", ""},
@@ -697,6 +707,14 @@ TokenInfo type_to_info[] = {
{Enum::Token::Type::Pod, Enum::Token::Kind::Verbose, "Pod", ""},
{Enum::Token::Type::Comment, Enum::Token::Kind::Verbose, "Comment", ""},
{Enum::Token::Type::WhiteSpace, Enum::Token::Kind::Verbose, "WhiteSpace", ""},
- {Enum::Token::Type::Undefined, Enum::Token::Kind::Undefined, "Undefined", ""}
+ {Enum::Token::Type::Undefined, Enum::Token::Kind::Undefined, "Undefined", ""},
+ {Enum::Token::Type::PostDeref, Enum::Token::Kind::Symbol, "PostDeref", ""},
+ {Enum::Token::Type::PostDerefStar, Enum::Token::Kind::Symbol, "PostDerefStar", ""},
+ {Enum::Token::Type::PostDerefArraySliceOpenBracket, Enum::Token::Kind::Symbol, "PostDerefArraySliceOpenBracket", ""},
+ {Enum::Token::Type::PostDerefArraySliceCloseBracket, Enum::Token::Kind::Symbol, "PostDerefArraySliceCloseBracket", ""},
+ {Enum::Token::Type::PostDerefHashSliceOpenBrace, Enum::Token::Kind::Symbol, "PostDerefHashSliceOpenBrace", ""},
+ {Enum::Token::Type::PostDerefHashSliceCloseBrace, Enum::Token::Kind::Symbol, "PostDerefHashSliceCloseBrace", ""},
+ {Enum::Token::Type::PostDerefCodeOpenParen, Enum::Token::Kind::Symbol, "PostDerefCodeOpenParen", ""},
+ {Enum::Token::Type::PostDerefCodeCloseParen, Enum::Token::Kind::Symbol, "PostDerefCodeCloseParen", ""}
};
@@ -4,7 +4,8 @@ use Data::Dumper;
use Test::More;
BEGIN { use_ok('Compiler::Lexer') };
-my $tokens = Compiler::Lexer->new('')->tokenize(<<'SCRIPT');
+subtest 'tokenize' => sub {
+ my $tokens = Compiler::Lexer->new('')->tokenize(<<'SCRIPT');
format STDOUT =
ok @<<<<<<<
$test
@@ -12,7 +13,6 @@ $test
my $hoge;
SCRIPT
-subtest 'tokenize' => sub {
is_deeply($tokens, [
bless( {
'kind' => Compiler::Lexer::Kind::T_Decl,
@@ -91,4 +91,250 @@ $test
]);
};
+subtest 'omitted handler name' => sub {
+ my $tokens = Compiler::Lexer->new('')->tokenize(<<'SCRIPT');
+format =
+ok @<<<<<<<
+$test
+.
+my $hoge;
+SCRIPT
+
+ is_deeply($tokens, [
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Decl,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'FormatDecl',
+ 'data' => 'format',
+ 'type' => Compiler::Lexer::TokenType::T_FormatDecl,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Assign,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Assign',
+ 'data' => '=',
+ 'type' => Compiler::Lexer::TokenType::T_Assign,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Format',
+ 'data' => 'ok @<<<<<<<
+$test
+',
+ 'type' => Compiler::Lexer::TokenType::T_Format,
+ 'line' => 4
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'FormatEnd',
+ 'data' => '.',
+ 'type' => Compiler::Lexer::TokenType::T_FormatEnd,
+ 'line' => 4
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Decl,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'VarDecl',
+ 'data' => 'my',
+ 'type' => Compiler::Lexer::TokenType::T_VarDecl,
+ 'line' => 4
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'LocalVar',
+ 'data' => '$hoge',
+ 'type' => Compiler::Lexer::TokenType::T_LocalVar,
+ 'line' => 4
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_StmtEnd,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'SemiColon',
+ 'data' => ';',
+ 'type' => Compiler::Lexer::TokenType::T_SemiColon,
+ 'line' => 4
+ }, 'Compiler::Lexer::Token' )
+ ]);
+};
+
+subtest 'do not misrecognize when confusing case' => sub {
+ my $tokens = Compiler::Lexer->new('')->tokenize(<<'SCRIPT');
+my $foo = {
+ format => 1,
+};
+
+my $bar =
+ "asdf";
+1;
+SCRIPT
+
+ is_deeply($tokens, [
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Decl,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'VarDecl',
+ 'data' => 'my',
+ 'type' => Compiler::Lexer::TokenType::T_VarDecl,
+ 'line' => 1,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'LocalVar',
+ 'data' => '$foo',
+ 'type' => Compiler::Lexer::TokenType::T_LocalVar,
+ 'line' => 1,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Assign,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Assign',
+ 'data' => '=',
+ 'type' => Compiler::Lexer::TokenType::T_Assign,
+ 'line' => 1,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Symbol,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'LeftBrace',
+ 'data' => '{',
+ 'type' => Compiler::Lexer::TokenType::T_LeftBrace,
+ 'line' => 1,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Key',
+ 'data' => 'format',
+ 'type' => Compiler::Lexer::TokenType::T_Key,
+ 'line' => 2,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Operator,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Arrow',
+ 'data' => '=>',
+ 'type' => Compiler::Lexer::TokenType::T_Arrow,
+ 'line' => 2,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Int',
+ 'data' => '1',
+ 'type' => Compiler::Lexer::TokenType::T_Int,
+ 'line' => 2,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Comma,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Comma',
+ 'data' => ',',
+ 'type' => Compiler::Lexer::TokenType::T_Comma,
+ 'line' => 2,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Symbol,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'RightBrace',
+ 'data' => '}',
+ 'type' => Compiler::Lexer::TokenType::T_RightBrace,
+ 'line' => 3,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_StmtEnd,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'SemiColon',
+ 'data' => ';',
+ 'type' => Compiler::Lexer::TokenType::T_SemiColon,
+ 'line' => 3,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Decl,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'VarDecl',
+ 'data' => 'my',
+ 'type' => Compiler::Lexer::TokenType::T_VarDecl,
+ 'line' => 5,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'LocalVar',
+ 'data' => '$bar',
+ 'type' => Compiler::Lexer::TokenType::T_LocalVar,
+ 'line' => 5,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Assign,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Assign',
+ 'data' => '=',
+ 'type' => Compiler::Lexer::TokenType::T_Assign,
+ 'line' => 5,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'String',
+ 'data' => 'asdf',
+ 'type' => Compiler::Lexer::TokenType::T_String,
+ 'line' => 6,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_StmtEnd,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'SemiColon',
+ 'data' => ';',
+ 'type' => Compiler::Lexer::TokenType::T_SemiColon,
+ 'line' => 6,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Int',
+ 'data' => '1',
+ 'type' => Compiler::Lexer::TokenType::T_Int,
+ 'line' => 7,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_StmtEnd,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'SemiColon',
+ 'data' => ';',
+ 'type' => Compiler::Lexer::TokenType::T_SemiColon,
+ 'line' => 7,
+ }, 'Compiler::Lexer::Token' )
+ ]);
+};
+
done_testing;
@@ -0,0 +1,37 @@
+use strict;
+use warnings;
+use Compiler::Lexer;
+use Test::More;
+
+my $tokens = Compiler::Lexer->new->tokenize('$foo x= 3');
+is_deeply($tokens, [
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'GlobalVar',
+ 'data' => '$foo',
+ 'type' => Compiler::Lexer::TokenType::T_GlobalVar,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Assign,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'StringMulEqual',
+ 'data' => 'x=',
+ 'type' => Compiler::Lexer::TokenType::T_StringMulEqual,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'Int',
+ 'data' => '3',
+ 'type' => Compiler::Lexer::TokenType::T_Int,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' )
+]);
+
+done_testing;
@@ -0,0 +1,20 @@
+use strict;
+use warnings;
+use Compiler::Lexer;
+use Test::More;
+
+my $tokens = Compiler::Lexer->new->tokenize(<<'...');
+/foo/m;
+/bar/;
+...
+
+my %delim = map {
+ ($_->data => 1)
+} grep {
+ $_->type == Compiler::Lexer::TokenType::T_RegDelim
+} @$tokens;
+
+
+is_deeply([keys %delim], ['/']);
+
+done_testing;
@@ -0,0 +1,47 @@
+use strict;
+use warnings;
+use Compiler::Lexer;
+use Test::More;
+use Data::Dumper;
+
+my $tokens = Compiler::Lexer->new->tokenize('not /\d/');
+is_deeply($tokens, [
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_SingleTerm,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'AlphabetNot',
+ 'data' => 'not',
+ 'type' => Compiler::Lexer::TokenType::T_AlphabetNot,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'RegDelim',
+ 'data' => '/',
+ 'type' => Compiler::Lexer::TokenType::T_RegDelim,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'RegExp',
+ 'data' => '\\d',
+ 'type' => Compiler::Lexer::TokenType::T_RegExp,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'name' => 'RegDelim',
+ 'data' => '/',
+ 'type' => Compiler::Lexer::TokenType::T_RegDelim,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' )
+]);
+
+done_testing;
@@ -0,0 +1,9 @@
+use strict;
+use warnings;
+use Compiler::Lexer;
+use Test::More;
+
+Compiler::Lexer->new->tokenize('^/');
+
+ok 1;
+done_testing;
@@ -0,0 +1,37 @@
+use strict;
+use warnings;
+use Compiler::Lexer;
+use Test::More;
+
+my $tokens = Compiler::Lexer->new->tokenize('$foo-1');
+is_deeply($tokens, [
+ bless( {
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'name' => 'GlobalVar',
+ 'type' => Compiler::Lexer::TokenType::T_GlobalVar,
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'data' => '$foo'
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_Operator,
+ 'name' => 'Sub',
+ 'type' => Compiler::Lexer::TokenType::T_Sub,
+ 'stype' => 0,
+ 'has_warnings' => 0,
+ 'data' => '-'
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'has_warnings' => 0,
+ 'stype' => 0,
+ 'data' => '1',
+ 'line' => 1,
+ 'type' => Compiler::Lexer::TokenType::T_Int,
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'name' => 'Int'
+ }, 'Compiler::Lexer::Token' )
+]);
+
+done_testing;
@@ -0,0 +1,75 @@
+use strict;
+use warnings;
+use Compiler::Lexer;
+use Test::More;
+use Data::Dumper;
+
+my $tokens = Compiler::Lexer->new->tokenize('s///;');
+print Dumper $tokens;
+is_deeply($tokens, [
+ bless( {
+ 'type' => Compiler::Lexer::TokenType::T_RegReplace,
+ 'name' => 'RegReplace',
+ 'stype' => 0,
+ 'data' => 's',
+ 'has_warnings' => 0,
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_RegReplacePrefix,
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'stype' => 0,
+ 'data' => '/',
+ 'type' => Compiler::Lexer::TokenType::T_RegDelim,
+ 'name' => 'RegDelim',
+ 'has_warnings' => 0,
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'line' => 1
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'has_warnings' => 0,
+ 'name' => 'RegReplaceFrom',
+ 'type' => Compiler::Lexer::TokenType::T_RegReplaceFrom,
+ 'data' => '',
+ 'stype' => 0,
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_Term
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'type' => Compiler::Lexer::TokenType::T_RegMiddleDelim,
+ 'name' => 'RegMiddleDelim',
+ 'data' => '/',
+ 'stype' => 0,
+ 'has_warnings' => 0
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'has_warnings' => 0,
+ 'type' => Compiler::Lexer::TokenType::T_RegReplaceTo,
+ 'name' => 'RegReplaceTo',
+ 'stype' => 0,
+ 'data' => ''
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_Term,
+ 'name' => 'RegDelim',
+ 'type' => Compiler::Lexer::TokenType::T_RegDelim,
+ 'stype' => 0,
+ 'data' => '/',
+ 'has_warnings' => 0
+ }, 'Compiler::Lexer::Token' ),
+ bless( {
+ 'has_warnings' => 0,
+ 'name' => 'SemiColon',
+ 'type' => Compiler::Lexer::TokenType::T_SemiColon,
+ 'stype' => 0,
+ 'data' => ';',
+ 'line' => 1,
+ 'kind' => Compiler::Lexer::Kind::T_StmtEnd
+ }, 'Compiler::Lexer::Token' )
+]);
+
+done_testing;