← Index
NYTProf Performance Profile   « line view »
For /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/bin/perlcritic
  Run on Sat Mar 19 22:12:22 2016
Reported on Sat Mar 19 22:14:13 2016

Filename/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Lexer.pm
StatementsExecuted 59 statements in 3.10ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1113.13ms24.1msPPIx::Regexp::Lexer::::BEGIN@61PPIx::Regexp::Lexer::BEGIN@61
1111.02ms1.21msPPIx::Regexp::Lexer::::BEGIN@44PPIx::Regexp::Lexer::BEGIN@44
111281┬Ás384┬ÁsPPIx::Regexp::Lexer::::BEGIN@58PPIx::Regexp::Lexer::BEGIN@58
111257┬Ás350┬ÁsPPIx::Regexp::Lexer::::BEGIN@55PPIx::Regexp::Lexer::BEGIN@55
111250┬Ás372┬ÁsPPIx::Regexp::Lexer::::BEGIN@49PPIx::Regexp::Lexer::BEGIN@49
111246┬Ás372┬ÁsPPIx::Regexp::Lexer::::BEGIN@46PPIx::Regexp::Lexer::BEGIN@46
111227┬Ás318┬ÁsPPIx::Regexp::Lexer::::BEGIN@51PPIx::Regexp::Lexer::BEGIN@51
111193┬Ás286┬ÁsPPIx::Regexp::Lexer::::BEGIN@48PPIx::Regexp::Lexer::BEGIN@48
111192┬Ás299┬ÁsPPIx::Regexp::Lexer::::BEGIN@47PPIx::Regexp::Lexer::BEGIN@47
111184┬Ás317┬ÁsPPIx::Regexp::Lexer::::BEGIN@53PPIx::Regexp::Lexer::BEGIN@53
111178┬Ás270┬ÁsPPIx::Regexp::Lexer::::BEGIN@52PPIx::Regexp::Lexer::BEGIN@52
111162┬Ás259┬ÁsPPIx::Regexp::Lexer::::BEGIN@54PPIx::Regexp::Lexer::BEGIN@54
111152┬Ás632┬ÁsPPIx::Regexp::Lexer::::BEGIN@60PPIx::Regexp::Lexer::BEGIN@60
111138┬Ás236┬ÁsPPIx::Regexp::Lexer::::BEGIN@59PPIx::Regexp::Lexer::BEGIN@59
111133┬Ás230┬ÁsPPIx::Regexp::Lexer::::BEGIN@50PPIx::Regexp::Lexer::BEGIN@50
111131┬Ás230┬ÁsPPIx::Regexp::Lexer::::BEGIN@45PPIx::Regexp::Lexer::BEGIN@45
111130┬Ás228┬ÁsPPIx::Regexp::Lexer::::BEGIN@57PPIx::Regexp::Lexer::BEGIN@57
111129┬Ás221┬ÁsPPIx::Regexp::Lexer::::BEGIN@43PPIx::Regexp::Lexer::BEGIN@43
111114┬Ás204┬ÁsPPIx::Regexp::Lexer::::BEGIN@56PPIx::Regexp::Lexer::BEGIN@56
11112┬Ás24┬ÁsPPIx::Regexp::Lexer::::BEGIN@36PPIx::Regexp::Lexer::BEGIN@36
1118┬Ás30┬ÁsPPIx::Regexp::Lexer::::BEGIN@62PPIx::Regexp::Lexer::BEGIN@62
1118┬Ás12┬ÁsPPIx::Regexp::Lexer::::BEGIN@37PPIx::Regexp::Lexer::BEGIN@37
1117┬Ás29┬ÁsPPIx::Regexp::Lexer::::BEGIN@41PPIx::Regexp::Lexer::BEGIN@41
1117┬Ás27┬ÁsPPIx::Regexp::Lexer::::BEGIN@42PPIx::Regexp::Lexer::BEGIN@42
1117┬Ás468┬ÁsPPIx::Regexp::Lexer::::BEGIN@39PPIx::Regexp::Lexer::BEGIN@39
0000s0sPPIx::Regexp::Lexer::::_curlyPPIx::Regexp::Lexer::_curly
0000s0sPPIx::Regexp::Lexer::::_finalizePPIx::Regexp::Lexer::_finalize
0000s0sPPIx::Regexp::Lexer::::_get_delimitedPPIx::Regexp::Lexer::_get_delimited
0000s0sPPIx::Regexp::Lexer::::_get_tokenPPIx::Regexp::Lexer::_get_token
0000s0sPPIx::Regexp::Lexer::::_in_regex_setPPIx::Regexp::Lexer::_in_regex_set
0000s0sPPIx::Regexp::Lexer::::_make_nodePPIx::Regexp::Lexer::_make_node
0000s0sPPIx::Regexp::Lexer::::_recover_curlyPPIx::Regexp::Lexer::_recover_curly
0000s0sPPIx::Regexp::Lexer::::_recover_curly_quantifiersPPIx::Regexp::Lexer::_recover_curly_quantifiers
0000s0sPPIx::Regexp::Lexer::::_regex_setPPIx::Regexp::Lexer::_regex_set
0000s0sPPIx::Regexp::Lexer::::_roundPPIx::Regexp::Lexer::_round
0000s0sPPIx::Regexp::Lexer::::_squarePPIx::Regexp::Lexer::_square
0000s0sPPIx::Regexp::Lexer::::_unget_tokenPPIx::Regexp::Lexer::_unget_token
0000s0sPPIx::Regexp::Lexer::::errstrPPIx::Regexp::Lexer::errstr
0000s0sPPIx::Regexp::Lexer::::failuresPPIx::Regexp::Lexer::failures
0000s0sPPIx::Regexp::Lexer::::lexPPIx::Regexp::Lexer::lex
0000s0sPPIx::Regexp::Lexer::::newPPIx::Regexp::Lexer::new
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1=head1 NAME
2
3PPIx::Regexp::Lexer - Assemble tokenizer output.
4
5=head1 SYNOPSIS
6
7 use PPIx::Regexp::Lexer;
8 use PPIx::Regexp::Dumper;
9 my $lex = PPIx::Regexp::Lexer->new('qr{foo}smx');
10 my $dmp = PPIx::Regexp::Dumper->new( $lex );
11 $dmp->print();
12
13=head1 INHERITANCE
14
15C<PPIx::Regexp::Lexer> is a
16L<PPIx::Regexp::Support|PPIx::Regexp::Support>.
17
18C<PPIx::Regexp::Lexer> has no descendants.
19
20=head1 DESCRIPTION
21
22This class takes the token stream generated by
23L<PPIx::Regexp::Tokenizer|PPIx::Regexp::Tokenizer> and generates the
24parse tree.
25
26=head1 METHODS
27
28This class provides the following public methods. Methods not documented
29here are private, and unsupported in the sense that the author reserves
30the right to change or remove them without notice.
31
32=cut
33
34package PPIx::Regexp::Lexer;
35
36221┬Ás236┬Ás
# spent 24┬Ás (12+12) within PPIx::Regexp::Lexer::BEGIN@36 which was called: # once (12┬Ás+12┬Ás) by PPIx::Regexp::BEGIN@90 at line 36
use strict;
# spent 24┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@36 # spent 12┬Ás making 1 call to strict::import
37224┬Ás217┬Ás
# spent 12┬Ás (8+5) within PPIx::Regexp::Lexer::BEGIN@37 which was called: # once (8┬Ás+5┬Ás) by PPIx::Regexp::BEGIN@90 at line 37
use warnings;
# spent 12┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@37 # spent 5┬Ás making 1 call to warnings::import
38
39223┬Ás2929┬Ás
# spent 468┬Ás (7+461) within PPIx::Regexp::Lexer::BEGIN@39 which was called: # once (7┬Ás+461┬Ás) by PPIx::Regexp::BEGIN@90 at line 39
use base qw{ PPIx::Regexp::Support };
# spent 468┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@39 # spent 461┬Ás making 1 call to base::import
40
41221┬Ás250┬Ás
# spent 29┬Ás (7+21) within PPIx::Regexp::Lexer::BEGIN@41 which was called: # once (7┬Ás+21┬Ás) by PPIx::Regexp::BEGIN@90 at line 41
use Carp qw{ confess };
# spent 29┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@41 # spent 21┬Ás making 1 call to Exporter::import
42218┬Ás248┬Ás
# spent 27┬Ás (7+20) within PPIx::Regexp::Lexer::BEGIN@42 which was called: # once (7┬Ás+20┬Ás) by PPIx::Regexp::BEGIN@90 at line 42
use PPIx::Regexp::Constant qw{ TOKEN_LITERAL TOKEN_UNKNOWN };
# spent 27┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@42 # spent 20┬Ás making 1 call to Exporter::import
43290┬Ás1221┬Ás
# spent 221┬Ás (129+92) within PPIx::Regexp::Lexer::BEGIN@43 which was called: # once (129┬Ás+92┬Ás) by PPIx::Regexp::BEGIN@90 at line 43
use PPIx::Regexp::Node::Range ();
# spent 221┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@43
44297┬Ás11.21ms
# spent 1.21ms (1.02+195┬Ás) within PPIx::Regexp::Lexer::BEGIN@44 which was called: # once (1.02ms+195┬Ás) by PPIx::Regexp::BEGIN@90 at line 44
use PPIx::Regexp::Structure ();
# spent 1.21ms making 1 call to PPIx::Regexp::Lexer::BEGIN@44
45291┬Ás1230┬Ás
# spent 230┬Ás (131+99) within PPIx::Regexp::Lexer::BEGIN@45 which was called: # once (131┬Ás+99┬Ás) by PPIx::Regexp::BEGIN@90 at line 45
use PPIx::Regexp::Structure::Assertion ();
# spent 230┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@45
46283┬Ás1372┬Ás
# spent 372┬Ás (246+125) within PPIx::Regexp::Lexer::BEGIN@46 which was called: # once (246┬Ás+125┬Ás) by PPIx::Regexp::BEGIN@90 at line 46
use PPIx::Regexp::Structure::BranchReset ();
# spent 372┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@46
47292┬Ás1299┬Ás
# spent 299┬Ás (192+107) within PPIx::Regexp::Lexer::BEGIN@47 which was called: # once (192┬Ás+107┬Ás) by PPIx::Regexp::BEGIN@90 at line 47
use PPIx::Regexp::Structure::Code ();
# spent 299┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@47
48289┬Ás1286┬Ás
# spent 286┬Ás (193+93) within PPIx::Regexp::Lexer::BEGIN@48 which was called: # once (193┬Ás+93┬Ás) by PPIx::Regexp::BEGIN@90 at line 48
use PPIx::Regexp::Structure::Capture ();
# spent 286┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@48
49291┬Ás1372┬Ás
# spent 372┬Ás (250+123) within PPIx::Regexp::Lexer::BEGIN@49 which was called: # once (250┬Ás+123┬Ás) by PPIx::Regexp::BEGIN@90 at line 49
use PPIx::Regexp::Structure::CharClass ();
# spent 372┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@49
50292┬Ás1230┬Ás
# spent 230┬Ás (133+97) within PPIx::Regexp::Lexer::BEGIN@50 which was called: # once (133┬Ás+97┬Ás) by PPIx::Regexp::BEGIN@90 at line 50
use PPIx::Regexp::Structure::Subexpression ();
# spent 230┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@50
51284┬Ás1318┬Ás
# spent 318┬Ás (227+91) within PPIx::Regexp::Lexer::BEGIN@51 which was called: # once (227┬Ás+91┬Ás) by PPIx::Regexp::BEGIN@90 at line 51
use PPIx::Regexp::Structure::Main ();
# spent 318┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@51
52285┬Ás1270┬Ás
# spent 270┬Ás (178+91) within PPIx::Regexp::Lexer::BEGIN@52 which was called: # once (178┬Ás+91┬Ás) by PPIx::Regexp::BEGIN@90 at line 52
use PPIx::Regexp::Structure::Modifier ();
# spent 270┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@52
53292┬Ás1317┬Ás
# spent 317┬Ás (184+133) within PPIx::Regexp::Lexer::BEGIN@53 which was called: # once (184┬Ás+133┬Ás) by PPIx::Regexp::BEGIN@90 at line 53
use PPIx::Regexp::Structure::NamedCapture ();
# spent 317┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@53
54282┬Ás1259┬Ás
# spent 259┬Ás (162+97) within PPIx::Regexp::Lexer::BEGIN@54 which was called: # once (162┬Ás+97┬Ás) by PPIx::Regexp::BEGIN@90 at line 54
use PPIx::Regexp::Structure::Quantifier ();
# spent 259┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@54
55283┬Ás1350┬Ás
# spent 350┬Ás (257+93) within PPIx::Regexp::Lexer::BEGIN@55 which was called: # once (257┬Ás+93┬Ás) by PPIx::Regexp::BEGIN@90 at line 55
use PPIx::Regexp::Structure::Regexp ();
# spent 350┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@55
56274┬Ás1204┬Ás
# spent 204┬Ás (114+91) within PPIx::Regexp::Lexer::BEGIN@56 which was called: # once (114┬Ás+91┬Ás) by PPIx::Regexp::BEGIN@90 at line 56
use PPIx::Regexp::Structure::RegexSet ();
# spent 204┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@56
57285┬Ás1228┬Ás
# spent 228┬Ás (130+97) within PPIx::Regexp::Lexer::BEGIN@57 which was called: # once (130┬Ás+97┬Ás) by PPIx::Regexp::BEGIN@90 at line 57
use PPIx::Regexp::Structure::Replacement ();
# spent 228┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@57
58286┬Ás1384┬Ás
# spent 384┬Ás (281+103) within PPIx::Regexp::Lexer::BEGIN@58 which was called: # once (281┬Ás+103┬Ás) by PPIx::Regexp::BEGIN@90 at line 58
use PPIx::Regexp::Structure::Switch ();
# spent 384┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@58
59297┬Ás1236┬Ás
# spent 236┬Ás (138+98) within PPIx::Regexp::Lexer::BEGIN@59 which was called: # once (138┬Ás+98┬Ás) by PPIx::Regexp::BEGIN@90 at line 59
use PPIx::Regexp::Structure::Unknown ();
# spent 236┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@59
602105┬Ás1632┬Ás
# spent 632┬Ás (152+480) within PPIx::Regexp::Lexer::BEGIN@60 which was called: # once (152┬Ás+480┬Ás) by PPIx::Regexp::BEGIN@90 at line 60
use PPIx::Regexp::Token::Unmatched ();
# spent 632┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@60
61282┬Ás124.1ms
# spent 24.1ms (3.13+20.9) within PPIx::Regexp::Lexer::BEGIN@61 which was called: # once (3.13ms+20.9ms) by PPIx::Regexp::BEGIN@90 at line 61
use PPIx::Regexp::Tokenizer ();
# spent 24.1ms making 1 call to PPIx::Regexp::Lexer::BEGIN@61
6221.30ms252┬Ás
# spent 30┬Ás (8+22) within PPIx::Regexp::Lexer::BEGIN@62 which was called: # once (8┬Ás+22┬Ás) by PPIx::Regexp::BEGIN@90 at line 62
use PPIx::Regexp::Util qw{ __instance };
# spent 30┬Ás making 1 call to PPIx::Regexp::Lexer::BEGIN@62 # spent 22┬Ás making 1 call to Exporter::import
63
641700nsour $VERSION = '0.036';
65
66=head2 new
67
68This method instantiates the lexer. It takes as its argument either a
69L<PPIx::Regexp::Tokenizer|PPIx::Regexp::Tokenizer> or the text to be
70parsed. In the latter case the tokenizer is instantiated from the text.
71
72Any optional name/value pairs after the first argument are passed to the
73tokenizer, which interprets them or not as the case may be.
74
75=cut
76
77{
78
792700ns my $errstr;
80
81 sub new {
82 my ( $class, $tokenizer, %args ) = @_;
83 ref $class and $class = ref $class;
84
85 __instance( $tokenizer, 'PPIx::Regexp::Tokenizer' )
86 or $tokenizer = PPIx::Regexp::Tokenizer->new( $tokenizer, %args )
87 or do {
88 $errstr = PPIx::Regexp::Tokenizer->errstr();
89 return;
90 };
91
92 my $self = {
93 deferred => [], # Deferred tokens
94 failures => 0,
95 tokenizer => $tokenizer,
96 };
97
98 bless $self, $class;
99 return $self;
100 }
101
102 sub errstr {
103 return $errstr;
104 }
105
106}
107
108=head2 errstr
109
110This method returns the error string from the last attempt to
111instantiate a C<PPIx::Regexp::Lexer>. If the last attempt succeeded, the
112error will be C<undef>.
113
114=cut
115
116# Defined above
117
118=head2 failures
119
120 print $lexer->failures(), " parse failures\n";
121
122This method returns the number of parse failures encountered. A
123parse failure is either a tokenization failure (see
124L<< PPIx::Regexp::Tokenizer->failures()|PPIx::Regexp::Tokenizer/failures >>)
125or a structural error.
126
127=cut
128
129sub failures {
130 my ( $self ) = @_;
131 return $self->{failures};
132}
133
134=head2 lex
135
136This method lexes the tokens in the text, and returns the lexed list of
137elements.
138
139=cut
140
141sub lex {
142 my ( $self ) = @_;
143
144 my @content;
145 $self->{failures} = 0;
146
147 # Accept everything up to the first delimiter.
148 {
149 my $token = $self->_get_token()
150 or return $self->_finalize( @content );
151 $token->isa( 'PPIx::Regexp::Token::Delimiter' ) or do {
152 push @content, $token;
153 redo;
154 };
155 $self->_unget_token( $token );
156 }
157
158 # Accept the first delimited structure.
159 push @content, ( my $regexp = $self->_get_delimited(
160 'PPIx::Regexp::Structure::Regexp' ) );
161
162 # If we are a substitution ...
163 if ( $content[0]->content() eq 's' ) {
164
165 # Accept any insignificant stuff.
166 while ( my $token = $self->_get_token() ) {
167 if ( $token->significant() ) {
168 $self->_unget_token( $token );
169 last;
170 } else {
171 push @content, $token;
172 }
173 }
174
175 # Figure out if we should expect an opening bracket.
176 my $expect_open_bracket = $self->close_bracket(
177 $regexp->start( 0 ) ) || 0;
178
179 # Accept the next delimited structure.
180 push @content, $self->_get_delimited(
181 'PPIx::Regexp::Structure::Replacement',
182 $expect_open_bracket,
183 );
184 }
185
186 # Accept the modifiers (we hope!) plus any trailing white space.
187 while ( my $token = $self->_get_token() ) {
188 push @content, $token;
189 }
190
191 # Let all the elements finalize themselves, recording any additional
192 # errors as they do so.
193 $self->_finalize( @content );
194
195 # If we found a regular expression (and we should have done so) ...
196 if ( $regexp ) {
197
198 # Retrieve the maximum capture group.
199 my $max_capture = $regexp->max_capture_number();
200
201 # Hashify the known capture names
202 my $capture_name = {
203 map { $_ => 1 } $regexp->capture_names(),
204 };
205
206 # For all the backreferences found
207 foreach my $elem ( @{ $regexp->find(
208 'PPIx::Regexp::Token::Backreference' ) || [] } ) {
209 # Rebless them as needed, recording any errors found.
210 $self->{failures} +=
211 $elem->__PPIX_LEXER__rebless(
212 capture_name => $capture_name,
213 max_capture => $max_capture,
214 );
215 }
216 }
217
218 return @content;
219
220}
221
222# Finalize the content array, updating the parse failures count as we
223# go.
224sub _finalize {
225 my ( $self, @content ) = @_;
226 foreach my $elem ( @content ) {
227 $self->{failures} += $elem->__PPIX_LEXER__finalize();
228 }
229 defined wantarray and return @content;
230 return;
231}
232
233{
234
23522┬Ás my %bracket = (
236 '{' => '}',
237 '(' => ')',
238 '[' => ']',
239 '(?[' => '])',
240 ## '<' => '>',
241 );
242
24311┬Ás my %unclosed = (
244 '{' => '_recover_curly',
245 );
246
247 sub _get_delimited {
248 my ( $self, $class, $expect_open_bracket ) = @_;
249 defined $expect_open_bracket or $expect_open_bracket = 1;
250
251 my @rslt;
252 $self->{_rslt} = \@rslt;
253
254 if ( $expect_open_bracket ) {
255 if ( my $token = $self->_get_token() ) {
256 push @rslt, [];
257 if ( $token->isa( 'PPIx::Regexp::Token::Delimiter' ) ) {
258 push @{ $rslt[-1] }, '', $token;
259 } else {
260 push @{ $rslt[-1] }, '', undef;
261 $self->_unget_token( $token );
262 }
263 } else {
264 return;
265 }
266 } else {
267 push @rslt, [ '', undef ];
268 }
269
270 while ( my $token = $self->_get_token() ) {
271 if ( $token->isa( 'PPIx::Regexp::Token::Delimiter' ) ) {
272 $self->_unget_token( $token );
273 last;
274 }
275 if ( $token->isa( 'PPIx::Regexp::Token::Structure' ) ) {
276 my $content = $token->content();
277
278 if ( my $finish = $bracket{$content} ) {
279 # Open bracket
280 push @rslt, [ $finish, $token ];
281
282 } elsif ( $content eq $rslt[-1][0] ) {
283
284 # Matched close bracket
285 $self->_make_node( $token );
286
287 } elsif ( $content ne ')' ) {
288
289 # If the close bracket is not a parenthesis, it becomes
290 # a literal.
291 bless $token, TOKEN_LITERAL;
292 push @{ $rslt[-1] }, $token;
293
294 } elsif ( $content eq ')'
295 and @rslt > 1 # Ignore enclosing delimiter
296 and my $recover = $unclosed{$rslt[-1][1]->content()} ) {
297 # If the close bracket is a parenthesis and there is a
298 # recovery procedure, we use it.
299 $self->$recover( $token );
300
301 } else {
302
303 # Unmatched close with no recovery.
304 $self->{failures}++;
305 bless $token, 'PPIx::Regexp::Token::Unmatched';
306 push @{ $rslt[-1] }, $token;
307 }
308
309 } else {
310 push @{ $rslt[-1] }, $token;
311 }
312
313 # We have to hand-roll the Range object.
314 if ( __instance( $rslt[-1][-2], 'PPIx::Regexp::Token::Operator' )
315 && $rslt[-1][-2]->content() eq '-'
316 && $rslt[-1][0] eq ']' # It's a character class
317 ) {
318 my @tokens = splice @{ $rslt[-1] }, -3;
319 push @{ $rslt[-1] },
320 PPIx::Regexp::Node::Range->_new( @tokens );
321 }
322 }
323
324 while ( @rslt > 1 ) {
325 if ( my $recover = $unclosed{$rslt[-1][1]->content()} ) {
326 $self->$recover();
327 } else {
328 $self->{failures}++;
329 $self->_make_node( undef );
330 }
331 }
332
333 if ( @rslt == 1 ) {
334 my @last = @{ pop @rslt };
335 shift @last;
336 push @last, $self->_get_token();
337 return $class->_new( @last );
338 } else {
339 confess "Missing data";
340 }
341
342 }
343
344}
345
346# $token = $self->_get_token();
347#
348# This method returns the next token from the tokenizer.
349
350sub _get_token {
351 my ( $self ) = @_;
352
353 if ( @{ $self->{deferred} } ) {
354 return shift @{ $self->{deferred} };
355 }
356
357 my $token = $self->{tokenizer}->next_token() or return;
358
359 return $token;
360}
361
362{
363
36421┬Ás my %handler = (
365 '(' => '_round',
366 '[' => '_square',
367 '{' => '_curly',
368 '(?[' => '_regex_set',
369 );
370
371 sub _make_node {
372 my ( $self, $token ) = @_;
373 my @args = @{ pop @{ $self->{_rslt} } };
374 shift @args;
375 push @args, $token;
376 my @node;
377 if ( my $method = $handler{ $args[0]->content() } ) {
378 @node = $self->$method( \@args );
379 }
380 @node or @node = PPIx::Regexp::Structure->_new( @args );
381 push @{ $self->{_rslt}[-1] }, @node;
382 return;
383 }
384
385}
386
387sub _curly {
388 my ( $self, $args ) = @_;
389
390 if ( $args->[-1] && $args->[-1]->is_quantifier() ) {
391
392 # If the tokenizer has marked the right curly as a quantifier,
393 # make the whole thing a quantifier structure.
394 return PPIx::Regexp::Structure::Quantifier->_new( @{ $args } );
395
396 } elsif ( $args->[-1] ) {
397
398 # If there is a right curly but it is not a quantifier,
399 # make both curlys into literals.
400 foreach my $inx ( 0, -1 ) {
401 bless $args->[$inx], TOKEN_LITERAL;
402 }
403
404 # Try to recover possible quantifiers not recognized because we
405 # thought this was a structure.
406 $self->_recover_curly_quantifiers( $args );
407
408 return @{ $args };
409
410 } else {
411
412 # If there is no right curly, just make a generic structure
413 # TODO maybe this should be something else?
414 return PPIx::Regexp::Structure->_new( @{ $args } );
415 }
416}
417
418# Recover from an unclosed left curly.
419sub _recover_curly {
420 my ( $self, $token ) = @_;
421
422 # Get all the stuff we have accumulated for this curly.
423 my @content = @{ pop @{ $self->{_rslt} } };
424
425 # Lose the right bracket, which we have already failed to match.
426 shift @content;
427
428 # Rebless the left curly to a literal.
429 bless $content[0], TOKEN_LITERAL;
430
431 # Try to recover possible quantifiers not recognized because we
432 # thought this was a structure.
433 $self->_recover_curly_quantifiers( \@content );
434
435 # Shove the curly and its putative contents into whatever structure
436 # we have going.
437 # The checks are to try to trap things like RT 56864, though on
438 # further reflection it turned out that you could get here with an
439 # empty $self->{_rslt} on things like 'm{)}'. This one did not get
440 # made into an RT ticket, but was fixed by not calling the recovery
441 # code if $self->{_rslt} contained only the enclosing delimiters.
442 'ARRAY' eq ref $self->{_rslt}
443 or confess 'Programming error - $self->{_rslt} not array ref, ',
444 "parsing '", $self->{tokenizer}->content(), "' at ",
445 $token->content();
446 @{ $self->{_rslt} }
447 or confess 'Programming error - $self->{_rslt} empty, ',
448 "parsing '", $self->{tokenizer}->content(), "' at ",
449 $token->content();
450 push @{ $self->{_rslt}[-1] }, @content;
451
452 # Shove the mismatched delimiter back into the input so we can have
453 # another crack at it.
454 $token and $self->_unget_token( $token );
455
456 # We gone.
457 return;
458}
459
460sub _recover_curly_quantifiers {
461 my ( $self, $args ) = @_;
462
463 if ( __instance( $args->[0], TOKEN_LITERAL )
464 && __instance( $args->[1], TOKEN_UNKNOWN )
465 && PPIx::Regexp::Token::Quantifier->could_be_quantifier(
466 $args->[1]->content() )
467 ) {
468 bless $args->[1], 'PPIx::Regexp::Token::Quantifier';
469
470 if ( __instance( $args->[2], TOKEN_UNKNOWN )
471 && PPIx::Regexp::Token::Greediness->could_be_greediness(
472 $args->[2]->content() )
473 ) {
474 bless $args->[2], 'PPIx::Regexp::Token::Greediness';
475 }
476
477 }
478
479 return;
480}
481
482sub _in_regex_set {
483 my ( $self ) = @_;
484 foreach my $stack_entry ( reverse @{ $self->{_rslt} } ) {
485 $stack_entry->[0] eq '])'
486 and return 1;
487 }
488 return 0;
489}
490
491sub _round {
492 my ( $self, $args ) = @_;
493
494 # If we're inside a regex set, parens do not capture.
495 $self->_in_regex_set()
496 and return PPIx::Regexp::Structure->_new( @{ $args } );
497
498 # The instantiator will rebless based on the first token if need be.
499 return PPIx::Regexp::Structure::Capture->_new( @{ $args } );
500}
501
502sub _square {
503 my ( $self, $args ) = @_;
504 return PPIx::Regexp::Structure::CharClass->_new( @{ $args } );
505}
506
507sub _regex_set {
508 my ( $self, $args ) = @_;
509 return PPIx::Regexp::Structure::RegexSet->_new( @{ $args } );
510}
511
512# $self->_unget_token( $token );
513#
514# This method caches its argument so that it will be returned by
515# the next call to C<_get_token()>. If more than one argument is
516# passed, they will be returned in the order given; that is,
517# _unget_token/_get_token work like unshift/shift.
518
519sub _unget_token {
520 my ( $self, @args ) = @_;
521 unshift @{ $self->{deferred} }, @args;
522 return $self;
523}
524
52516┬Ás1;
526
527__END__