Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Tokenizer.pm |
Statements | Executed 77 statements in 5.34ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 1.69ms | 1.85ms | BEGIN@34 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 1.20ms | 1.39ms | BEGIN@33 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 618µs | 747µs | BEGIN@23 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 600µs | 925µs | BEGIN@30 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 598µs | 728µs | BEGIN@19 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 595µs | 1.55ms | BEGIN@15 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 471µs | 690µs | BEGIN@29 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 440µs | 589µs | BEGIN@20 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 377µs | 543µs | BEGIN@28 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 362µs | 519µs | BEGIN@22 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 358µs | 524µs | BEGIN@25 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 327µs | 504µs | BEGIN@36 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 317µs | 451µs | BEGIN@14 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 301µs | 722µs | BEGIN@17 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 285µs | 1.16ms | BEGIN@26 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 282µs | 414µs | BEGIN@32 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 266µs | 375µs | BEGIN@37 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 248µs | 386µs | BEGIN@40 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 246µs | 376µs | BEGIN@41 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 237µs | 398µs | BEGIN@18 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 234µs | 334µs | BEGIN@16 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 217µs | 354µs | BEGIN@31 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 206µs | 4.54ms | BEGIN@24 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 206µs | 338µs | BEGIN@27 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 201µs | 297µs | BEGIN@21 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 12µs | 23µs | BEGIN@3 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 8µs | 28µs | BEGIN@42 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 8µs | 32µs | BEGIN@9 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 8µs | 30µs | BEGIN@8 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 7µs | 27µs | BEGIN@43 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 7µs | 12µs | BEGIN@4 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 7µs | 53µs | BEGIN@6 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 5µs | 5µs | BEGIN@35 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 4µs | 4µs | BEGIN@38 | PPIx::Regexp::Tokenizer::
1 | 1 | 1 | 4µs | 4µs | BEGIN@39 | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKENIZER__finish | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKENIZER__init | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKENIZER__regexp | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKEN_FALLBACK__regexp | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKEN_FALLBACK__repl | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | __effective_modifiers | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | _known_tokenizer_check | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | _known_tokenizers | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | _remainder | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | capture | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | content | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | cookie | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | default_modifiers | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | encoding | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | errstr | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | expect | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | failures | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | find_matching_delimiter | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | find_regexp | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | get_start_delimiter | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | get_token | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | interpolates | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | make_token | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | match | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | modifier | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | modifier_duplicate | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | modifier_modify | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | modifier_pop | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | new | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | next_token | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | peek | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | ppi_document | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | prior | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | significant | PPIx::Regexp::Tokenizer::
0 | 0 | 0 | 0s | 0s | tokens | PPIx::Regexp::Tokenizer::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package PPIx::Regexp::Tokenizer; | ||||
2 | |||||
3 | 2 | 19µs | 2 | 35µs | # spent 23µs (12+12) within PPIx::Regexp::Tokenizer::BEGIN@3 which was called:
# once (12µs+12µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 3 # spent 23µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@3
# spent 12µs making 1 call to strict::import |
4 | 2 | 20µs | 2 | 16µs | # spent 12µs (7+4) within PPIx::Regexp::Tokenizer::BEGIN@4 which was called:
# once (7µs+4µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 4 # spent 12µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@4
# spent 4µs making 1 call to warnings::import |
5 | |||||
6 | 2 | 26µs | 2 | 99µs | # spent 53µs (7+46) within PPIx::Regexp::Tokenizer::BEGIN@6 which was called:
# once (7µs+46µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 6 # spent 53µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@6
# spent 46µs making 1 call to base::import |
7 | |||||
8 | 2 | 27µs | 2 | 52µs | # spent 30µs (8+22) within PPIx::Regexp::Tokenizer::BEGIN@8 which was called:
# once (8µs+22µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 8 # spent 30µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@8
# spent 22µs making 1 call to Exporter::import |
9 | 1 | 300ns | # spent 32µs (8+24) within PPIx::Regexp::Tokenizer::BEGIN@9 which was called:
# once (8µs+24µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 13 | ||
10 | MINIMUM_PERL | ||||
11 | TOKEN_LITERAL | ||||
12 | TOKEN_UNKNOWN | ||||
13 | 1 | 19µs | 2 | 56µs | }; # spent 32µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@9
# spent 24µs making 1 call to Exporter::import |
14 | 2 | 91µs | 1 | 451µs | # spent 451µs (317+134) within PPIx::Regexp::Tokenizer::BEGIN@14 which was called:
# once (317µs+134µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 14 # spent 451µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@14 |
15 | 2 | 90µs | 1 | 1.55ms | # spent 1.55ms (595µs+955µs) within PPIx::Regexp::Tokenizer::BEGIN@15 which was called:
# once (595µs+955µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 15 # spent 1.55ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@15 |
16 | 2 | 135µs | 1 | 334µs | # spent 334µs (234+100) within PPIx::Regexp::Tokenizer::BEGIN@16 which was called:
# once (234µs+100µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 16 # spent 334µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@16 |
17 | 2 | 102µs | 1 | 722µs | # spent 722µs (301+421) within PPIx::Regexp::Tokenizer::BEGIN@17 which was called:
# once (301µs+421µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 17 # spent 722µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@17 |
18 | 2 | 95µs | 1 | 398µs | # spent 398µs (237+161) within PPIx::Regexp::Tokenizer::BEGIN@18 which was called:
# once (237µs+161µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 18 # spent 398µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@18 |
19 | 2 | 99µs | 1 | 728µs | # spent 728µs (598+131) within PPIx::Regexp::Tokenizer::BEGIN@19 which was called:
# once (598µs+131µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 19 # spent 728µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@19 |
20 | 2 | 92µs | 1 | 589µs | # spent 589µs (440+150) within PPIx::Regexp::Tokenizer::BEGIN@20 which was called:
# once (440µs+150µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 20 # spent 589µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@20 |
21 | 2 | 89µs | 1 | 297µs | # spent 297µs (201+97) within PPIx::Regexp::Tokenizer::BEGIN@21 which was called:
# once (201µs+97µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 21 # spent 297µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@21 |
22 | 2 | 85µs | 1 | 519µs | # spent 519µs (362+157) within PPIx::Regexp::Tokenizer::BEGIN@22 which was called:
# once (362µs+157µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 22 # spent 519µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@22 |
23 | 2 | 113µs | 1 | 747µs | # spent 747µs (618+130) within PPIx::Regexp::Tokenizer::BEGIN@23 which was called:
# once (618µs+130µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 23 # spent 747µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@23 |
24 | 2 | 144µs | 1 | 4.54ms | # spent 4.54ms (206µs+4.34) within PPIx::Regexp::Tokenizer::BEGIN@24 which was called:
# once (206µs+4.34ms) by PPIx::Regexp::Lexer::BEGIN@61 at line 24 # spent 4.54ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@24 |
25 | 2 | 123µs | 1 | 524µs | # spent 524µs (358+166) within PPIx::Regexp::Tokenizer::BEGIN@25 which was called:
# once (358µs+166µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 25 # spent 524µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@25 |
26 | 2 | 128µs | 1 | 1.16ms | # spent 1.16ms (285µs+873µs) within PPIx::Regexp::Tokenizer::BEGIN@26 which was called:
# once (285µs+873µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 26 # spent 1.16ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@26 |
27 | 2 | 120µs | 1 | 338µs | # spent 338µs (206+133) within PPIx::Regexp::Tokenizer::BEGIN@27 which was called:
# once (206µs+133µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 27 # spent 338µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@27 |
28 | 2 | 124µs | 1 | 543µs | # spent 543µs (377+166) within PPIx::Regexp::Tokenizer::BEGIN@28 which was called:
# once (377µs+166µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 28 # spent 543µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@28 |
29 | 2 | 128µs | 1 | 690µs | # spent 690µs (471+220) within PPIx::Regexp::Tokenizer::BEGIN@29 which was called:
# once (471µs+220µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 29 # spent 690µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@29 |
30 | 2 | 123µs | 1 | 925µs | # spent 925µs (600+325) within PPIx::Regexp::Tokenizer::BEGIN@30 which was called:
# once (600µs+325µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 30 # spent 925µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@30 |
31 | 2 | 123µs | 1 | 354µs | # spent 354µs (217+136) within PPIx::Regexp::Tokenizer::BEGIN@31 which was called:
# once (217µs+136µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 31 # spent 354µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@31 |
32 | 2 | 137µs | 1 | 414µs | # spent 414µs (282+133) within PPIx::Regexp::Tokenizer::BEGIN@32 which was called:
# once (282µs+133µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 32 # spent 414µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@32 |
33 | 2 | 124µs | 1 | 1.39ms | # spent 1.39ms (1.20+185µs) within PPIx::Regexp::Tokenizer::BEGIN@33 which was called:
# once (1.20ms+185µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 33 # spent 1.39ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@33 |
34 | 2 | 109µs | 1 | 1.85ms | # spent 1.85ms (1.69+163µs) within PPIx::Regexp::Tokenizer::BEGIN@34 which was called:
# once (1.69ms+163µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 34 # spent 1.85ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@34 |
35 | 2 | 21µs | 1 | 5µs | # spent 5µs within PPIx::Regexp::Tokenizer::BEGIN@35 which was called:
# once (5µs+0s) by PPIx::Regexp::Lexer::BEGIN@61 at line 35 # spent 5µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@35 |
36 | 2 | 103µs | 1 | 504µs | # spent 504µs (327+177) within PPIx::Regexp::Tokenizer::BEGIN@36 which was called:
# once (327µs+177µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 36 # spent 504µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@36 |
37 | 2 | 97µs | 1 | 375µs | # spent 375µs (266+109) within PPIx::Regexp::Tokenizer::BEGIN@37 which was called:
# once (266µs+109µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 37 # spent 375µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@37 |
38 | 2 | 16µs | 1 | 4µs | # spent 4µs within PPIx::Regexp::Tokenizer::BEGIN@38 which was called:
# once (4µs+0s) by PPIx::Regexp::Lexer::BEGIN@61 at line 38 # spent 4µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@38 |
39 | 2 | 17µs | 1 | 4µs | # spent 4µs within PPIx::Regexp::Tokenizer::BEGIN@39 which was called:
# once (4µs+0s) by PPIx::Regexp::Lexer::BEGIN@61 at line 39 # spent 4µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@39 |
40 | 2 | 90µs | 1 | 386µs | # spent 386µs (248+139) within PPIx::Regexp::Tokenizer::BEGIN@40 which was called:
# once (248µs+139µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 40 # spent 386µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@40 |
41 | 2 | 89µs | 1 | 376µs | # spent 376µs (246+130) within PPIx::Regexp::Tokenizer::BEGIN@41 which was called:
# once (246µs+130µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 41 # spent 376µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@41 |
42 | 2 | 21µs | 2 | 49µs | # spent 28µs (8+21) within PPIx::Regexp::Tokenizer::BEGIN@42 which was called:
# once (8µs+21µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 42 # spent 28µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@42
# spent 21µs making 1 call to Exporter::import |
43 | 2 | 2.40ms | 2 | 47µs | # spent 27µs (7+20) within PPIx::Regexp::Tokenizer::BEGIN@43 which was called:
# once (7µs+20µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 43 # spent 27µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@43
# spent 20µs making 1 call to Exporter::import |
44 | |||||
45 | 1 | 700ns | our $VERSION = '0.036'; | ||
46 | |||||
47 | { | ||||
48 | # Names of classes containing tokenization machinery. There are few | ||||
49 | # known ordering requirements, since each class recognizes its own, | ||||
50 | # and I have tried to prevent overlap. Absent such constraints, the | ||||
51 | # order is in perceived frequency of acceptance, to keep the search | ||||
52 | # as short as possible. If I were conscientious I would gather | ||||
53 | # statistics on this. | ||||
54 | 2 | 3µs | my @classes = ( # TODO make readonly when acceptable way appears | ||
55 | 'PPIx::Regexp::Token::Literal', | ||||
56 | 'PPIx::Regexp::Token::Interpolation', | ||||
57 | 'PPIx::Regexp::Token::Control', # Note 1 | ||||
58 | 'PPIx::Regexp::Token::CharClass::Simple', # Note 2 | ||||
59 | 'PPIx::Regexp::Token::Quantifier', | ||||
60 | 'PPIx::Regexp::Token::Greediness', | ||||
61 | 'PPIx::Regexp::Token::CharClass::POSIX', # Note 3 | ||||
62 | 'PPIx::Regexp::Token::Structure', | ||||
63 | 'PPIx::Regexp::Token::Assertion', | ||||
64 | 'PPIx::Regexp::Token::Backreference', | ||||
65 | 'PPIx::Regexp::Token::Operator', # Note 4 | ||||
66 | ); | ||||
67 | |||||
68 | # Note 1: If we are in quote mode ( \Q ... \E ), Control makes a | ||||
69 | # literal out of anything it sees other than \E. So it | ||||
70 | # needs to come before almost all other tokenizers. Not | ||||
71 | # Literal, which already makes literals, and not | ||||
72 | # Interpolation, which is legal in quote mode, but | ||||
73 | # everything else. | ||||
74 | |||||
75 | # Note 2: CharClass::Simple must come after Literal, because it | ||||
76 | # relies on Literal to recognize a Unicode named character | ||||
77 | # ( \N{something} ), so any \N that comes through to it | ||||
78 | # must be the \N simple character class (which represents | ||||
79 | # anything but a newline, and was introduced in Perl | ||||
80 | # 5.11.0. | ||||
81 | |||||
82 | # Note 3: CharClass::POSIX has to come before Structure, since both | ||||
83 | # look for square brackets, and CharClass::POSIX is the | ||||
84 | # more particular. | ||||
85 | |||||
86 | # Note 4: Operator relies on Literal making the characters literal | ||||
87 | # if they appear in a context where they can not be | ||||
88 | # operators, and Control making them literals if quoting, | ||||
89 | # so it must come after both. | ||||
90 | |||||
91 | sub _known_tokenizers { | ||||
92 | my ( $self ) = @_; | ||||
93 | |||||
94 | my $mode = $self->{mode}; | ||||
95 | |||||
96 | my @expect; | ||||
97 | if ( $self->{expect_next} ) { | ||||
98 | $self->{expect} = $self->{expect_next}; | ||||
99 | $self->{expect_next} = undef; | ||||
100 | } | ||||
101 | if ( $self->{expect} ) { | ||||
102 | @expect = $self->_known_tokenizer_check( | ||||
103 | @{ $self->{expect} } ); | ||||
104 | } | ||||
105 | |||||
106 | exists $self->{known}{$mode} and return ( | ||||
107 | @expect, @{ $self->{known}{$mode} } ); | ||||
108 | |||||
109 | my @found = $self->_known_tokenizer_check( @classes ); | ||||
110 | |||||
111 | $self->{known}{$mode} = \@found; | ||||
112 | return (@expect, @found); | ||||
113 | } | ||||
114 | |||||
115 | sub _known_tokenizer_check { | ||||
116 | my ( $self, @args ) = @_; | ||||
117 | |||||
118 | my $mode = $self->{mode}; | ||||
119 | |||||
120 | my $handler = '__PPIX_TOKENIZER__' . $mode; | ||||
121 | my @found; | ||||
122 | |||||
123 | foreach my $class ( @args ) { | ||||
124 | |||||
125 | $class->can( $handler ) or next; | ||||
126 | push @found, $class; | ||||
127 | |||||
128 | } | ||||
129 | |||||
130 | return @found; | ||||
131 | } | ||||
132 | |||||
133 | } | ||||
134 | |||||
135 | { | ||||
136 | 2 | 100ns | my $errstr; | ||
137 | |||||
138 | sub new { | ||||
139 | my ( $class, $re, %args ) = @_; | ||||
140 | ref $class and $class = ref $class; | ||||
141 | |||||
142 | $errstr = undef; | ||||
143 | |||||
144 | exists $args{default_modifiers} | ||||
145 | and 'ARRAY' ne ref $args{default_modifiers} | ||||
146 | and do { | ||||
147 | $errstr = 'default_modifiers must be an array reference'; | ||||
148 | return; | ||||
149 | }; | ||||
150 | |||||
151 | my $self = { | ||||
152 | capture => undef, # Captures from find_regexp. | ||||
153 | content => undef, # The string we are tokenizing. | ||||
154 | cookie => {}, # Cookies | ||||
155 | cursor_curr => 0, # The current position in the string. | ||||
156 | cursor_limit => undef, # The end of the portion of the | ||||
157 | # string being tokenized. | ||||
158 | cursor_orig => undef, # Position of cursor when tokenizer | ||||
159 | # called. Used by get_token to prevent | ||||
160 | # recursion. | ||||
161 | cursor_modifiers => undef, # Position of modifiers. | ||||
162 | default_modifiers => $args{default_modifiers} || [], | ||||
163 | delimiter_finish => undef, # Finishing delimiter of regexp. | ||||
164 | delimiter_re => undef, # Recognize finishing delimiter. | ||||
165 | delimiter_start => undef, # Starting delimiter of regexp. | ||||
166 | encoding => $args{encoding}, # Character encoding. | ||||
167 | expect => undef, # Extra classes to expect. | ||||
168 | expect_next => undef, # Extra classes as of next parse cycle | ||||
169 | failures => 0, # Number of parse failures. | ||||
170 | find => undef, # String for find_regexp | ||||
171 | known => {}, # Known tokenizers, by mode. | ||||
172 | match => undef, # Match from find_regexp. | ||||
173 | mode => 'init', # Initialize | ||||
174 | modifiers => [{}], # Modifier hash. | ||||
175 | pending => [], # Tokens made but not returned. | ||||
176 | prior => TOKEN_UNKNOWN, # Prior significant token. | ||||
177 | source => $re, # The object we were initialized with. | ||||
178 | trace => __PACKAGE__->_defined_or( | ||||
179 | $args{trace}, $ENV{PPIX_REGEXP_TOKENIZER_TRACE}, 0 ), | ||||
180 | }; | ||||
181 | |||||
182 | if ( __instance( $re, 'PPI::Element' ) ) { | ||||
183 | $self->{content} = $re->content(); | ||||
184 | } elsif ( ref $re ) { | ||||
185 | $errstr = ref( $re ) . ' not supported'; | ||||
186 | return; | ||||
187 | } else { | ||||
188 | $self->{content} = $re; | ||||
189 | } | ||||
190 | |||||
191 | bless $self, $class; | ||||
192 | |||||
193 | $self->{content} = $self->decode( $self->{content} ); | ||||
194 | |||||
195 | if ( $self->{content} =~ m/ \s+ \z /smx ) { | ||||
196 | $self->{cursor_limit} = $-[0]; | ||||
197 | } else { | ||||
198 | $self->{cursor_limit} = length $self->{content}; | ||||
199 | } | ||||
200 | |||||
201 | $self->{trace} | ||||
202 | and warn "\ntokenizing '$self->{content}'\n"; | ||||
203 | |||||
204 | return $self; | ||||
205 | } | ||||
206 | |||||
207 | sub errstr { | ||||
208 | return $errstr; | ||||
209 | } | ||||
210 | |||||
211 | } | ||||
212 | |||||
213 | sub capture { | ||||
214 | my ( $self ) = @_; | ||||
215 | $self->{capture} or return; | ||||
216 | defined wantarray or return; | ||||
217 | return wantarray ? @{ $self->{capture} } : $self->{capture}; | ||||
218 | } | ||||
219 | |||||
220 | sub content { | ||||
221 | my ( $self ) = @_; | ||||
222 | return $self->{content}; | ||||
223 | } | ||||
224 | |||||
225 | sub cookie { | ||||
226 | my ( $self, $name, @args ) = @_; | ||||
227 | defined $name | ||||
228 | or confess "Programming error - undefined cookie name"; | ||||
229 | @args or return $self->{cookie}{$name}; | ||||
230 | my $cookie = shift @args; | ||||
231 | if ( ref $cookie eq 'CODE' ) { | ||||
232 | return ( $self->{cookie}{$name} = $cookie ); | ||||
233 | } elsif ( defined $cookie ) { | ||||
234 | confess "Programming error - cookie must be CODE ref or undef"; | ||||
235 | } else { | ||||
236 | return delete $self->{cookie}{$name}; | ||||
237 | } | ||||
238 | } | ||||
239 | |||||
240 | sub default_modifiers { | ||||
241 | my ( $self ) = @_; | ||||
242 | return [ @{ $self->{default_modifiers} } ]; | ||||
243 | } | ||||
244 | |||||
245 | sub __effective_modifiers { | ||||
246 | my ( $self ) = @_; | ||||
247 | 'HASH' eq ref $self->{effective_modifiers} | ||||
248 | or return {}; | ||||
249 | return { %{ $self->{effective_modifiers} } }; | ||||
250 | } | ||||
251 | |||||
252 | sub encoding { | ||||
253 | my ( $self ) = @_; | ||||
254 | return $self->{encoding}; | ||||
255 | } | ||||
256 | |||||
257 | sub expect { | ||||
258 | my ( $self, @args ) = @_; | ||||
259 | |||||
260 | @args | ||||
261 | or return; | ||||
262 | |||||
263 | $self->{expect_next} = [ | ||||
264 | map { m/ \A PPIx::Regexp:: /smx ? $_ : 'PPIx::Regexp::' . $_ } | ||||
265 | @args | ||||
266 | ]; | ||||
267 | $self->{expect} = undef; | ||||
268 | return; | ||||
269 | } | ||||
270 | |||||
271 | sub failures { | ||||
272 | my ( $self ) = @_; | ||||
273 | return $self->{failures}; | ||||
274 | } | ||||
275 | |||||
276 | sub find_matching_delimiter { | ||||
277 | my ( $self ) = @_; | ||||
278 | $self->{cursor_curr} ||= 0; | ||||
279 | my $start = substr | ||||
280 | $self->{content}, | ||||
281 | $self->{cursor_curr}, | ||||
282 | 1; | ||||
283 | |||||
284 | my $inx = $self->{cursor_curr}; | ||||
285 | my $finish = ( | ||||
286 | my $bracketed = $self->close_bracket( $start ) ) || $start; | ||||
287 | my $nest = 0; | ||||
288 | |||||
289 | while ( ++$inx < $self->{cursor_limit} ) { | ||||
290 | my $char = substr $self->{content}, $inx, 1; | ||||
291 | if ( $char eq '\\' && $finish ne '\\' ) { | ||||
292 | ++$inx; | ||||
293 | } elsif ( $bracketed && $char eq $start ) { | ||||
294 | ++$nest; | ||||
295 | } elsif ( $char eq $finish ) { | ||||
296 | --$nest < 0 | ||||
297 | and return $inx - $self->{cursor_curr}; | ||||
298 | } | ||||
299 | } | ||||
300 | |||||
301 | return; | ||||
302 | } | ||||
303 | |||||
304 | sub find_regexp { | ||||
305 | my ( $self, $regexp ) = @_; | ||||
306 | |||||
307 | ref $regexp eq 'Regexp' | ||||
308 | or confess | ||||
309 | 'Argument is a ', ( ref $regexp || 'scalar' ), ' not a Regexp'; | ||||
310 | |||||
311 | defined $self->{find} or $self->_remainder(); | ||||
312 | |||||
313 | $self->{find} =~ $regexp | ||||
314 | or return; | ||||
315 | |||||
316 | my @capture; | ||||
317 | foreach my $inx ( 0 .. $#+ ) { | ||||
318 | if ( defined $-[$inx] && defined $+[$inx] ) { | ||||
319 | push @capture, $self->{capture} = substr | ||||
320 | $self->{find}, | ||||
321 | $-[$inx], | ||||
322 | $+[$inx] - $-[$inx]; | ||||
323 | } else { | ||||
324 | push @capture, undef; | ||||
325 | } | ||||
326 | } | ||||
327 | $self->{match} = shift @capture; | ||||
328 | $self->{capture} = \@capture; | ||||
329 | |||||
330 | # The following circumlocution seems to be needed under Perl 5.13.0 | ||||
331 | # for reasons I do not fathom -- at least in the case where | ||||
332 | # wantarray is false. RT 56864 details the symptoms, which I was | ||||
333 | # never able to reproduce outside Perl::Critic. But returning $+[0] | ||||
334 | # directly, the value could transmogrify between here and the | ||||
335 | # calling module. | ||||
336 | ## my @data = ( $-[0], $+[0] ); | ||||
337 | ## return wantarray ? @data : $data[1]; | ||||
338 | return wantarray ? ( $-[0] + 0, $+[0] + 0 ) : $+[0] + 0; | ||||
339 | } | ||||
340 | |||||
341 | sub get_start_delimiter { | ||||
342 | my ( $self ) = @_; | ||||
343 | return $self->{delimiter_start}; | ||||
344 | } | ||||
345 | |||||
346 | sub get_token { | ||||
347 | my ( $self ) = @_; | ||||
348 | |||||
349 | caller eq __PACKAGE__ or $self->{cursor_curr} > $self->{cursor_orig} | ||||
350 | or confess 'Programming error - get_token() called without ', | ||||
351 | 'first calling make_token()'; | ||||
352 | |||||
353 | my $handler = '__PPIX_TOKENIZER__' . $self->{mode}; | ||||
354 | |||||
355 | my $character = substr( | ||||
356 | $self->{content}, | ||||
357 | $self->{cursor_curr}, | ||||
358 | 1 | ||||
359 | ); | ||||
360 | |||||
361 | return ( __PACKAGE__->$handler( $self, $character ) ); | ||||
362 | } | ||||
363 | |||||
364 | sub interpolates { | ||||
365 | my ( $self ) = @_; | ||||
366 | return $self->{delimiter_start} ne q{'}; | ||||
367 | } | ||||
368 | |||||
369 | sub make_token { | ||||
370 | my ( $self, $length, $class, $arg ) = @_; | ||||
371 | defined $class or $class = caller; | ||||
372 | |||||
373 | if ( $length + $self->{cursor_curr} > $self->{cursor_limit} ) { | ||||
374 | $length = $self->{cursor_limit} - $self->{cursor_curr} | ||||
375 | or return; | ||||
376 | } | ||||
377 | |||||
378 | $class =~ m/ \A PPIx::Regexp:: /smx | ||||
379 | or $class = 'PPIx::Regexp::' . $class; | ||||
380 | my $content = substr | ||||
381 | $self->{content}, | ||||
382 | $self->{cursor_curr}, | ||||
383 | $length; | ||||
384 | |||||
385 | $self->{trace} | ||||
386 | and warn "make_token( $length, '$class' ) => '$content'\n"; | ||||
387 | $self->{trace} > 1 | ||||
388 | and warn " make_token: cursor_curr = $self->{cursor_curr}; ", | ||||
389 | "cursor_limit = $self->{cursor_limit}\n"; | ||||
390 | my $token = $class->_new( $content ) or return; | ||||
391 | $token->significant() and $self->{expect} = undef; | ||||
392 | $token->__PPIX_TOKEN__post_make( $self, $arg ); | ||||
393 | |||||
394 | $token->isa( TOKEN_UNKNOWN ) and $self->{failures}++; | ||||
395 | |||||
396 | $self->{cursor_curr} += $length; | ||||
397 | $self->{find} = undef; | ||||
398 | $self->{match} = undef; | ||||
399 | $self->{capture} = undef; | ||||
400 | |||||
401 | foreach my $name ( keys %{ $self->{cookie} } ) { | ||||
402 | my $cookie = $self->{cookie}{$name}; | ||||
403 | $cookie->( $self, $token ) | ||||
404 | or delete $self->{cookie}{$name}; | ||||
405 | } | ||||
406 | |||||
407 | # Record this token as the prior token if it is significant. We must | ||||
408 | # do this after processing cookies, so that the cookies have access | ||||
409 | # to the old token if they want. | ||||
410 | $token->significant() | ||||
411 | and $self->{prior} = $token; | ||||
412 | |||||
413 | return $token; | ||||
414 | } | ||||
415 | |||||
416 | sub match { | ||||
417 | my ( $self ) = @_; | ||||
418 | return $self->{match}; | ||||
419 | } | ||||
420 | |||||
421 | sub modifier { | ||||
422 | my ( $self, $modifier ) = @_; | ||||
423 | return $self->{modifiers}[-1]{$modifier}; | ||||
424 | } | ||||
425 | |||||
426 | sub modifier_duplicate { | ||||
427 | my ( $self ) = @_; | ||||
428 | push @{ $self->{modifiers} }, | ||||
429 | { %{ $self->{modifiers}[-1] } }; | ||||
430 | return; | ||||
431 | } | ||||
432 | |||||
433 | sub modifier_modify { | ||||
434 | my ( $self, %args ) = @_; | ||||
435 | |||||
436 | # Modifier code is centralized in PPIx::Regexp::Token::Modifier | ||||
437 | $self->{modifiers}[-1] = | ||||
438 | PPIx::Regexp::Token::Modifier::__PPIX_TOKENIZER__modifier_modify( | ||||
439 | $self->{modifiers}[-1], \%args ); | ||||
440 | |||||
441 | return; | ||||
442 | |||||
443 | } | ||||
444 | |||||
445 | sub modifier_pop { | ||||
446 | my ( $self ) = @_; | ||||
447 | @{ $self->{modifiers} } > 1 | ||||
448 | and pop @{ $self->{modifiers} }; | ||||
449 | return; | ||||
450 | } | ||||
451 | |||||
452 | sub next_token { | ||||
453 | my ( $self ) = @_; | ||||
454 | |||||
455 | { | ||||
456 | |||||
457 | if ( @{ $self->{pending} } ) { | ||||
458 | return shift @{ $self->{pending} }; | ||||
459 | } | ||||
460 | |||||
461 | if ( $self->{cursor_curr} >= $self->{cursor_limit} ) { | ||||
462 | $self->{cursor_limit} >= length $self->{content} | ||||
463 | and return; | ||||
464 | $self->{mode} eq 'finish' and return; | ||||
465 | $self->{mode} = 'finish'; | ||||
466 | $self->{cursor_limit}++; | ||||
467 | } | ||||
468 | |||||
469 | if ( my @tokens = $self->get_token() ) { | ||||
470 | push @{ $self->{pending} }, @tokens; | ||||
471 | redo; | ||||
472 | |||||
473 | } | ||||
474 | |||||
475 | } | ||||
476 | |||||
477 | return; | ||||
478 | |||||
479 | } | ||||
480 | |||||
481 | sub peek { | ||||
482 | my ( $self, $offset ) = @_; | ||||
483 | defined $offset or $offset = 0; | ||||
484 | $offset < 0 and return; | ||||
485 | $offset += $self->{cursor_curr}; | ||||
486 | $offset >= $self->{cursor_limit} and return; | ||||
487 | return substr $self->{content}, $offset, 1; | ||||
488 | } | ||||
489 | |||||
490 | sub ppi_document { | ||||
491 | my ( $self ) = @_; | ||||
492 | |||||
493 | defined $self->{find} or $self->_remainder(); | ||||
494 | |||||
495 | return PPI::Document->new( \"$self->{find}" ); | ||||
496 | } | ||||
497 | |||||
498 | sub prior { | ||||
499 | my ( $self, $method, @args ) = @_; | ||||
500 | defined $method or return $self->{prior}; | ||||
501 | $self->{prior}->can( $method ) | ||||
502 | or confess 'Programming error - ', | ||||
503 | ( ref $self->{prior} || $self->{prior} ), | ||||
504 | ' does not support method ', $method; | ||||
505 | return $self->{prior}->$method( @args ); | ||||
506 | } | ||||
507 | |||||
508 | sub significant { | ||||
509 | return 1; | ||||
510 | } | ||||
511 | |||||
512 | sub tokens { | ||||
513 | my ( $self ) = @_; | ||||
514 | |||||
515 | my @rslt; | ||||
516 | while ( my $token = $self->next_token() ) { | ||||
517 | push @rslt, $token; | ||||
518 | } | ||||
519 | |||||
520 | return @rslt; | ||||
521 | } | ||||
522 | |||||
523 | sub _remainder { | ||||
524 | my ( $self ) = @_; | ||||
525 | |||||
526 | $self->{cursor_curr} > $self->{cursor_limit} | ||||
527 | and confess "Programming error - Trying to find past end of string"; | ||||
528 | $self->{find} = substr( | ||||
529 | $self->{content}, | ||||
530 | $self->{cursor_curr}, | ||||
531 | $self->{cursor_limit} - $self->{cursor_curr} | ||||
532 | ); | ||||
533 | |||||
534 | return; | ||||
535 | } | ||||
536 | |||||
537 | sub __PPIX_TOKENIZER__init { | ||||
538 | my ( $class, $tokenizer, $character ) = @_; | ||||
539 | |||||
540 | $tokenizer->{mode} = 'kaput'; | ||||
541 | $tokenizer->{content} =~ m/ \A \s* ( qr | m | s )? ( \s* ) ( [^\w\s] ) /smx | ||||
542 | or return $tokenizer->make_token( | ||||
543 | length( $tokenizer->{content} ), TOKEN_UNKNOWN, { | ||||
544 | error => 'Tokenizer found illegal first characters', | ||||
545 | }, | ||||
546 | ); | ||||
547 | # my ( $type, $white, $delim ) = ( $1, $2, $3 ); | ||||
548 | my ( $type, $white ) = ( $1, $2 ); | ||||
549 | my $start_pos = defined $-[1] ? $-[1] : | ||||
550 | defined $-[2] ? $-[2] : | ||||
551 | defined $-[3] ? $-[3] : 0; | ||||
552 | |||||
553 | defined $type or $type = ''; | ||||
554 | $tokenizer->{type} = $type; | ||||
555 | |||||
556 | my @tokens; | ||||
557 | $start_pos | ||||
558 | and push @tokens, $tokenizer->make_token( $start_pos, | ||||
559 | 'PPIx::Regexp::Token::Whitespace' ); | ||||
560 | push @tokens, $tokenizer->make_token( length $type, | ||||
561 | 'PPIx::Regexp::Token::Structure' ); | ||||
562 | length $white > 0 | ||||
563 | and push @tokens, $tokenizer->make_token( length $white, | ||||
564 | 'PPIx::Regexp::Token::Whitespace' ); | ||||
565 | |||||
566 | { | ||||
567 | my @mods = @{ $tokenizer->{default_modifiers} }; | ||||
568 | if ( $tokenizer->{content} =~ m/ ( [[:lower:]]* ) \s* \z /smx ) { | ||||
569 | my $mod = $1; | ||||
570 | $tokenizer->{cursor_limit} -= length $mod; | ||||
571 | push @mods, $mod; | ||||
572 | } | ||||
573 | $tokenizer->{effective_modifiers} = | ||||
574 | PPIx::Regexp::Token::Modifier::__aggregate_modifiers ( | ||||
575 | @mods ); | ||||
576 | $tokenizer->{modifiers} = [ | ||||
577 | { %{ $tokenizer->{effective_modifiers} } }, | ||||
578 | ]; | ||||
579 | $tokenizer->{cursor_modifiers} = $tokenizer->{cursor_limit}; | ||||
580 | } | ||||
581 | |||||
582 | $tokenizer->{delimiter_start} = substr | ||||
583 | $tokenizer->{content}, | ||||
584 | $tokenizer->{cursor_curr}, | ||||
585 | 1; | ||||
586 | |||||
587 | if ( $type eq 's' and my $offset = $tokenizer->find_matching_delimiter() ) { | ||||
588 | $tokenizer->{cursor_limit} = $tokenizer->{cursor_curr} + $offset; | ||||
589 | } else { | ||||
590 | $tokenizer->{cursor_limit} = $tokenizer->{cursor_modifiers} - 1; | ||||
591 | } | ||||
592 | |||||
593 | $tokenizer->{delimiter_finish} = substr | ||||
594 | $tokenizer->{content}, | ||||
595 | $tokenizer->{cursor_limit}, | ||||
596 | 1; | ||||
597 | $tokenizer->{delimiter_re} = undef; | ||||
598 | |||||
599 | push @tokens, $tokenizer->make_token( 1, | ||||
600 | 'PPIx::Regexp::Token::Delimiter' ); | ||||
601 | |||||
602 | $tokenizer->{mode} = 'regexp'; | ||||
603 | |||||
604 | return @tokens; | ||||
605 | } | ||||
606 | |||||
607 | sub __PPIX_TOKENIZER__regexp { | ||||
608 | my ( $class, $tokenizer, $character ) = @_; | ||||
609 | |||||
610 | my $mode = $tokenizer->{mode}; | ||||
611 | my $handler = '__PPIX_TOKENIZER__' . $mode; | ||||
612 | |||||
613 | $tokenizer->{cursor_orig} = $tokenizer->{cursor_curr}; | ||||
614 | foreach my $class( $tokenizer->_known_tokenizers() ) { | ||||
615 | my @tokens = grep { $_ } $class->$handler( $tokenizer, $character ); | ||||
616 | $tokenizer->{trace} | ||||
617 | and warn $class, "->$handler( \$tokenizer, '$character' )", | ||||
618 | " => (@tokens)\n"; | ||||
619 | @tokens | ||||
620 | and return ( map { | ||||
621 | ref $_ ? $_ : $tokenizer->make_token( $_, | ||||
622 | $class ) } @tokens ); | ||||
623 | } | ||||
624 | |||||
625 | # Find a fallback processor for the character. | ||||
626 | my $fallback = __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__' . $mode ) | ||||
627 | || __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__regexp' ) | ||||
628 | || confess "Programming error - unable to find fallback for $mode"; | ||||
629 | return $fallback->( $class, $tokenizer, $character ); | ||||
630 | } | ||||
631 | |||||
632 | 1 | 1µs | *__PPIX_TOKENIZER__repl = \&__PPIX_TOKENIZER__regexp; | ||
633 | |||||
634 | sub __PPIX_TOKEN_FALLBACK__regexp { | ||||
635 | my ( $class, $tokenizer, $character ) = @_; | ||||
636 | |||||
637 | # As a fallback in regexp mode, any escaped character is a literal. | ||||
638 | if ( $character eq '\\' | ||||
639 | && $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr} > 1 | ||||
640 | ) { | ||||
641 | return $tokenizer->make_token( 2, TOKEN_LITERAL ); | ||||
642 | } | ||||
643 | |||||
644 | # Any normal character is unknown. | ||||
645 | return $tokenizer->make_token( 1, TOKEN_UNKNOWN, { | ||||
646 | error => 'Tokenizer found unexpected literal', | ||||
647 | }, | ||||
648 | ); | ||||
649 | } | ||||
650 | |||||
651 | sub __PPIX_TOKEN_FALLBACK__repl { | ||||
652 | my ( $class, $tokenizer, $character ) = @_; | ||||
653 | |||||
654 | # As a fallback in replacement mode, any escaped character is a literal. | ||||
655 | if ( $character eq '\\' | ||||
656 | && defined ( my $next = $tokenizer->peek( 1 ) ) ) { | ||||
657 | |||||
658 | if ( $tokenizer->interpolates() || $next eq q<'> || $next eq '\\' ) { | ||||
659 | return $tokenizer->make_token( 2, TOKEN_LITERAL ); | ||||
660 | } | ||||
661 | return $tokenizer->make_token( 1, TOKEN_LITERAL ); | ||||
662 | } | ||||
663 | |||||
664 | # So is any normal character. | ||||
665 | return $tokenizer->make_token( 1, TOKEN_LITERAL ); | ||||
666 | } | ||||
667 | |||||
668 | sub __PPIX_TOKENIZER__finish { | ||||
669 | my ( $class, $tokenizer, $character ) = @_; | ||||
670 | |||||
671 | $tokenizer->{cursor_limit} > length $tokenizer->{content} | ||||
672 | and confess "Programming error - ran off string"; | ||||
673 | my @tokens = $tokenizer->make_token( 1, | ||||
674 | 'PPIx::Regexp::Token::Delimiter' ); | ||||
675 | |||||
676 | if ( $tokenizer->{cursor_curr} eq $tokenizer->{cursor_modifiers} ) { | ||||
677 | |||||
678 | # We are out of string. Make the modifier token and close up | ||||
679 | # shop. | ||||
680 | my $trailer; | ||||
681 | if ( $tokenizer->{content} =~ m/ \s+ \z /smx ) { | ||||
682 | $tokenizer->{cursor_limit} = $-[0]; | ||||
683 | $trailer = length( $tokenizer->{content} ) - | ||||
684 | $tokenizer->{cursor_curr}; | ||||
685 | } else { | ||||
686 | $tokenizer->{cursor_limit} = length $tokenizer->{content}; | ||||
687 | } | ||||
688 | push @tokens, $tokenizer->make_token( | ||||
689 | $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr}, | ||||
690 | 'PPIx::Regexp::Token::Modifier' ); | ||||
691 | if ( $trailer ) { | ||||
692 | $tokenizer->{cursor_limit} = length $tokenizer->{content}; | ||||
693 | push @tokens, $tokenizer->make_token( | ||||
694 | $trailer, 'PPIx::Regexp::Token::Whitespace' ); | ||||
695 | } | ||||
696 | $tokenizer->{mode} = 'kaput'; | ||||
697 | |||||
698 | } else { | ||||
699 | |||||
700 | # Clear the cookies, because we are going around again. | ||||
701 | $tokenizer->{cookie} = {}; | ||||
702 | |||||
703 | # Move the cursor limit to just before the modifiers. | ||||
704 | $tokenizer->{cursor_limit} = $tokenizer->{cursor_modifiers} - 1; | ||||
705 | |||||
706 | # If the preceding regular expression was bracketed, we need to | ||||
707 | # consume possible whitespace and find another delimiter. | ||||
708 | |||||
709 | if ( $tokenizer->close_bracket( $tokenizer->{delimiter_start} ) ) { | ||||
710 | my $accept; | ||||
711 | $accept = $tokenizer->find_regexp( qr{ \A \s+ }smx ) | ||||
712 | and push @tokens, $tokenizer->make_token( | ||||
713 | $accept, 'PPIx::Regexp::Token::Whitespace' ); | ||||
714 | my $character = $tokenizer->peek(); | ||||
715 | $tokenizer->{delimiter_start} = $character; | ||||
716 | push @tokens, $tokenizer->make_token( | ||||
717 | 1, 'PPIx::Regexp::Token::Delimiter' ); | ||||
718 | $tokenizer->{delimiter_finish} = substr | ||||
719 | $tokenizer->{content}, | ||||
720 | $tokenizer->{cursor_limit} - 1, | ||||
721 | 1; | ||||
722 | $tokenizer->{delimiter_re} = undef; | ||||
723 | } | ||||
724 | |||||
725 | if ( $tokenizer->modifier( 'e' ) ) { | ||||
726 | # With /e, the replacement portion is code. We make it all | ||||
727 | # into one big PPIx::Regexp::Token::Code, slap on the | ||||
728 | # trailing delimiter and modifiers, and return it all. | ||||
729 | push @tokens, $tokenizer->make_token( | ||||
730 | $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr}, | ||||
731 | 'PPIx::Regexp::Token::Code', | ||||
732 | { perl_version_introduced => MINIMUM_PERL }, | ||||
733 | ); | ||||
734 | $tokenizer->{cursor_limit} = length $tokenizer->{content}; | ||||
735 | push @tokens, $tokenizer->make_token( 1, | ||||
736 | 'PPIx::Regexp::Token::Delimiter' ); | ||||
737 | push @tokens, $tokenizer->make_token( | ||||
738 | $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr}, | ||||
739 | 'PPIx::Regexp::Token::Modifier' ); | ||||
740 | $tokenizer->{mode} = 'kaput'; | ||||
741 | } else { | ||||
742 | # Put our mode to replacement. | ||||
743 | $tokenizer->{mode} = 'repl'; | ||||
744 | } | ||||
745 | |||||
746 | } | ||||
747 | |||||
748 | return @tokens; | ||||
749 | |||||
750 | } | ||||
751 | |||||
752 | 1 | 5µs | 1; | ||
753 | |||||
754 | __END__ |