← Index
NYTProf Performance Profile   « line view »
For /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/bin/perlcritic
  Run on Sat Mar 19 22:12:22 2016
Reported on Sat Mar 19 22:14:14 2016

Filename/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Structure.pm
StatementsExecuted 31 statements in 1.57ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1111.60ms1.80msPPIx::Regexp::Token::Structure::::BEGIN@51PPIx::Regexp::Token::Structure::BEGIN@51
111403µs655µsPPIx::Regexp::Token::Structure::::BEGIN@54PPIx::Regexp::Token::Structure::BEGIN@54
11119µs39µsPPIx::Regexp::Token::Structure::::BEGIN@35PPIx::Regexp::Token::Structure::BEGIN@35
11113µs20µsPPIx::Regexp::Token::Structure::::BEGIN@36PPIx::Regexp::Token::Structure::BEGIN@36
11111µs96µsPPIx::Regexp::Token::Structure::::BEGIN@38PPIx::Regexp::Token::Structure::BEGIN@38
11111µs66µsPPIx::Regexp::Token::Structure::::BEGIN@40PPIx::Regexp::Token::Structure::BEGIN@40
1117µs7µsPPIx::Regexp::Token::Structure::::BEGIN@52PPIx::Regexp::Token::Structure::BEGIN@52
1115µs5µsPPIx::Regexp::Token::Structure::::BEGIN@50PPIx::Regexp::Token::Structure::BEGIN@50
1115µs5µsPPIx::Regexp::Token::Structure::::BEGIN@53PPIx::Regexp::Token::Structure::BEGIN@53
0000s0sPPIx::Regexp::Token::Structure::::__ANON__[:152]PPIx::Regexp::Token::Structure::__ANON__[:152]
0000s0sPPIx::Regexp::Token::Structure::::__ANON__[:220]PPIx::Regexp::Token::Structure::__ANON__[:220]
0000s0sPPIx::Regexp::Token::Structure::::__ANON__[:245]PPIx::Regexp::Token::Structure::__ANON__[:245]
0000s0sPPIx::Regexp::Token::Structure::::__PPIX_LEXER__finalizePPIx::Regexp::Token::Structure::__PPIX_LEXER__finalize
0000s0sPPIx::Regexp::Token::Structure::::__PPIX_TOKENIZER__regexpPPIx::Regexp::Token::Structure::__PPIX_TOKENIZER__regexp
0000s0sPPIx::Regexp::Token::Structure::::can_be_quantifiedPPIx::Regexp::Token::Structure::can_be_quantified
0000s0sPPIx::Regexp::Token::Structure::::is_quantifierPPIx::Regexp::Token::Structure::is_quantifier
0000s0sPPIx::Regexp::Token::Structure::::perl_version_introducedPPIx::Regexp::Token::Structure::perl_version_introduced
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1=head1 NAME
2
3PPIx::Regexp::Token::Structure - Represent structural elements.
4
5=head1 SYNOPSIS
6
7 use PPIx::Regexp::Dumper;
8 PPIx::Regexp::Dumper->new( 'qr{(foo)}smx' )
9 ->print();
10
11=head1 INHERITANCE
12
13C<PPIx::Regexp::Token::Structure> is a
14L<PPIx::Regexp::Token|PPIx::Regexp::Token>.
15
16C<PPIx::Regexp::Token::Structure> is the parent of
17L<PPIx::Regexp::Token::Delimiter|PPIx::Regexp::Token::Delimiter>.
18
19=head1 DESCRIPTION
20
21This class represents things that define the structure of the regular
22expression. This typically means brackets of various sorts, but to
23prevent proliferation of token classes the type of the regular
24expression is stored here.
25
26=head1 METHODS
27
28This class provides no public methods beyond those provided by its
29superclass.
30
31=cut
32
33package PPIx::Regexp::Token::Structure;
34
35233µs258µs
# spent 39µs (19+19) within PPIx::Regexp::Token::Structure::BEGIN@35 which was called: # once (19µs+19µs) by base::import at line 35
use strict;
# spent 39µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@35 # spent 19µs making 1 call to strict::import
36233µs227µs
# spent 20µs (13+7) within PPIx::Regexp::Token::Structure::BEGIN@36 which was called: # once (13µs+7µs) by base::import at line 36
use warnings;
# spent 20µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@36 # spent 7µs making 1 call to warnings::import
37
38243µs296µs
# spent 96µs (11+85) within PPIx::Regexp::Token::Structure::BEGIN@38 which was called: # once (11µs+85µs) by base::import at line 38
use base qw{ PPIx::Regexp::Token };
# spent 96µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@38 # spent 85µs making 1 call to base::import, recursion: max depth 1, sum of overlapping time 85µs
39
401400ns
# spent 66µs (11+55) within PPIx::Regexp::Token::Structure::BEGIN@40 which was called: # once (11µs+55µs) by base::import at line 46
use PPIx::Regexp::Constant qw{
41 COOKIE_CLASS
42 COOKIE_QUANT
43 COOKIE_REGEX_SET
44 MINIMUM_PERL
45 TOKEN_LITERAL
46134µs2121µs};
# spent 66µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@40 # spent 55µs making 1 call to Exporter::import
47
48# Tokens we are responsible for making, under at least some
49# circumstances.
50226µs15µs
# spent 5µs within PPIx::Regexp::Token::Structure::BEGIN@50 which was called: # once (5µs+0s) by base::import at line 50
use PPIx::Regexp::Token::Comment ();
# spent 5µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@50
512161µs11.80ms
# spent 1.80ms (1.60+202µs) within PPIx::Regexp::Token::Structure::BEGIN@51 which was called: # once (1.60ms+202µs) by base::import at line 51
use PPIx::Regexp::Token::Modifier ();
# spent 1.80ms making 1 call to PPIx::Regexp::Token::Structure::BEGIN@51
52228µs17µs
# spent 7µs within PPIx::Regexp::Token::Structure::BEGIN@52 which was called: # once (7µs+0s) by base::import at line 52
use PPIx::Regexp::Token::Backreference ();
# spent 7µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@52
53226µs15µs
# spent 5µs within PPIx::Regexp::Token::Structure::BEGIN@53 which was called: # once (5µs+0s) by base::import at line 53
use PPIx::Regexp::Token::Backtrack ();
# spent 5µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@53
5421.14ms1655µs
# spent 655µs (403+252) within PPIx::Regexp::Token::Structure::BEGIN@54 which was called: # once (403µs+252µs) by base::import at line 54
use PPIx::Regexp::Token::Recursion ();
# spent 655µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@54
55
561800nsour $VERSION = '0.036';
57
58# Return true if the token can be quantified, and false otherwise
59
6014µsmy %quant = map { $_ => 1 } ')', ']';
61sub can_be_quantified {
62 my ( $self ) = @_;
63 ref $self or return;
64 return $quant{ $self->content() };
65};
66
67sub is_quantifier {
68 my ( $self ) = @_;
69 ref $self or return;
70 return $self->{is_quantifier};
71}
72
73{
74
75 # Note that the implementation equivocates on the ::Token::Structure
76 # class, using it both for the initial token that determines the
77 # type of the regex and things like parentheses internal to the
78 # regex. Rather than sort out this equivocation, I have relied on
79 # the currently-true assumption that 'qr' will not satisfy the
80 # ::Token::Structure recognition logic, and the only way this class
81 # can acquire this content is by the brute-force approach used to
82 # generate the initial token object.
83
8423µs my %perl_version_introduced = (
85 qr => '5.005',
86 '(?[' => '5.017008',
87 );
88
89 sub perl_version_introduced {
90 my ( $self ) = @_;
91 return $perl_version_introduced{ $self->content() } || MINIMUM_PERL;
92 }
93}
94
95{
96
9725µs my %delim = map { $_ => 1 } qw/ ( ) { } [ ] /;
98
99 # Regular expressions to match various parenthesized tokens, and the
100 # classes to make them into.
101
102515µs5106µs my @paren_token = map {
10315µs [ $_ => $_->__PPIX_TOKEN__recognize() ]
104 }
105 'PPIx::Regexp::Token::Comment',
106 'PPIx::Regexp::Token::Modifier',
107 'PPIx::Regexp::Token::Backreference',
108 'PPIx::Regexp::Token::Backtrack',
109 'PPIx::Regexp::Token::Recursion',
110 ;
111
112 sub __PPIX_TOKENIZER__regexp {
113 my ( $class, $tokenizer, $character ) = @_;
114
115 # We are not interested in anything but delimiters.
116 $delim{$character} or return;
117
118 # Inside a character class, all the delimiters are normal characters
119 # except for the close square bracket.
120 if ( $tokenizer->cookie( COOKIE_CLASS ) ) {
121 $character eq ']'
122 or return $tokenizer->make_token( 1, TOKEN_LITERAL );
123 }
124
125 # Open parentheses have various interesting possibilities ...
126 if ( $character eq '(' ) {
127
128 # Sometimes the whole bunch of parenthesized characters seems
129 # naturally to be a token.
130 foreach ( @paren_token ) {
131 my ( $class, @recognize ) = @{ $_ };
132 foreach ( @recognize ) {
133 my ( $regexp, $arg ) = @{ $_ };
134 my $accept = $tokenizer->find_regexp( $regexp ) or next;
135 return $tokenizer->make_token( $accept, $class, $arg );
136 }
137 }
138
139 # Modifier changes are local to this parenthesis group
140 $tokenizer->modifier_duplicate();
141
142 # The regex-set functionality introduced with 5.17.8 is most
143 # conveniently handled by treating the initial '(?[' and
144 # final '])' as ::Structure tokens. Fortunately for us,
145 # perl5178delta documents that these may not have interior
146 # spaces.
147
148 if ( my $accept = $tokenizer->find_regexp(
149 qr{ \A [(] [?] [[] }smx # ] ) - help for vim
150 )
151 ) {
152 $tokenizer->cookie( COOKIE_REGEX_SET, sub { return 1 } );
153 $tokenizer->modifier_modify( x => 1 ); # Implicitly /x
154 return $accept;
155 }
156
157 # We expect certain tokens only after a left paren.
158 $tokenizer->expect(
159 'PPIx::Regexp::Token::GroupType::Modifier',
160 'PPIx::Regexp::Token::GroupType::NamedCapture',
161 'PPIx::Regexp::Token::GroupType::Assertion',
162 'PPIx::Regexp::Token::GroupType::Code',
163 'PPIx::Regexp::Token::GroupType::BranchReset',
164 'PPIx::Regexp::Token::GroupType::Subexpression',
165 'PPIx::Regexp::Token::GroupType::Switch',
166 );
167
168 # Accept the parenthesis.
169 return 1;
170 }
171
172 # Close parentheses end modifier localization
173 if ( $character eq ')' ) {
174 $tokenizer->modifier_pop();
175 return 1;
176 }
177
178 # Open curlys are complicated because they may or may not represent
179 # the beginning of a quantifier, depending on what comes before the
180 # close curly. So we set a cookie to monitor the token stream for
181 # interlopers. If all goes well, the right curly will find the
182 # cookie and know it is supposed to be a quantifier.
183 if ( $character eq '{' ) {
184
185 # If the prior token can not be quantified, all this is
186 # unnecessary.
187 $tokenizer->prior( 'can_be_quantified' )
188 or return 1;
189
190 # We make our token now, before setting the cookie. Otherwise
191 # the cookie has to deal with this token.
192 my $token = $tokenizer->make_token( 1 );
193
194 # A cookie for the next '}'.
195 my $commas = 0;
196 $tokenizer->cookie( COOKIE_QUANT, sub {
197 my ( $tokenizer, $token ) = @_;
198 $token or return 1;
199
200 # Of literals, we accept exactly one comma provided it
201 # is not immediately after a '{'. We also accept
202 # anything that matches '\d';
203 if ( $token->isa( TOKEN_LITERAL ) ) {
204 my $character = $token->content();
205 if ( $character eq ',' ) {
206 $commas++ and return;
207 return $tokenizer->prior( 'content' ) ne '{';
208 }
209 return $character =~ m/ \A \d \z /smx;
210 }
211
212 # Since we do not know what is in an interpolation, we
213 # trustingly accept it.
214 if ( $token->isa( 'PPIx::Regexp::Token::Interpolation' )
215 ) {
216 return 1;
217 }
218
219 return;
220 },
221 );
222
223 return $token;
224 }
225
226 # The close curly bracket is a little complicated because if the
227 # cookie posted by the left curly bracket is still around, we are a
228 # quantifier, otherwise not.
229 if ( $character eq '}' ) {
230 $tokenizer->cookie( COOKIE_QUANT, undef )
231 or return 1;
232 $tokenizer->prior( 'class' )->isa( __PACKAGE__ )
233 and return 1;
234 my $token = $tokenizer->make_token( 1 );
235 $token->{is_quantifier} = 1;
236 return $token;
237 }
238
239 # The parse rules are different inside a character class, so we set
240 # another cookie. Sigh. If your tool is a hammer ...
241 if ( $character eq '[' ) {
242
243 # Set our cookie. Since it always returns 1, it does not matter
244 # where in the following mess we set it.
245 $tokenizer->cookie( COOKIE_CLASS, sub { return 1 } );
246
247 # Make our token now, since the easiest place to deal with the
248 # beginning-of-character-class strangeness seems to be right
249 # here.
250 my @tokens = $tokenizer->make_token( 1 );
251
252 # Get the next character, returning tokens if there is none.
253 defined ( $character = $tokenizer->peek() )
254 or return @tokens;
255
256 # If we have a caret, it is a negation operator. Make its token
257 # and fetch the next character, returning if none.
258 if ( $character eq '^' ) {
259 push @tokens, $tokenizer->make_token(
260 1, 'PPIx::Regexp::Token::Operator' );
261 defined ( $character = $tokenizer->peek() )
262 or return @tokens;
263 }
264
265 # If we have a close square at this point, it is not the end of
266 # the class, but just a literal. Make its token.
267 $character eq ']'
268 and push @tokens, $tokenizer->make_token( 1, TOKEN_LITERAL );
269
270 # Return all tokens made.
271 return @tokens;
272 }
273 # per perlop, the metas inside a [] are -]\^$.
274 # per perlop, the metas outside a [] are {}[]()^$.|*+?\
275 # The difference is that {}[().|*+? are not metas in [], but - is.
276
277 # Close bracket is complicated by the addition of regex sets.
278 # And more complicated by the fact that you can have an
279 # old-style character class inside a regex set. Fortunately they
280 # have not (yet!) permitted nested regex sets.
281 if ( $character eq ']' ) {
282
283 # If we find '])' and COOKIE_REGEX_SET is present, we have a
284 # regex set. We need to delete the cookie and accept both
285 # characters.
286 if ( ( my $accept = $tokenizer->find_regexp(
287 # help vim - ( [
288 qr{ \A []] [)] }smx
289 ) )
290 && $tokenizer->cookie( COOKIE_REGEX_SET )
291
292 ) {
293 $tokenizer->cookie( COOKIE_REGEX_SET, undef );
294 return $accept;
295 }
296
297 # Otherwise we assume we're in a bracketed character class,
298 # delete the cookie, and accept the close bracket.
299 $tokenizer->cookie( COOKIE_CLASS, undef );
300 return 1;
301 }
302
303 return 1;
304 }
305
306}
307
308# Called by the lexer once it has done its worst to all the tokens.
309# Called as a method with no arguments. The return is the number of
310# parse failures discovered when finalizing.
311sub __PPIX_LEXER__finalize {
312 my ( $self ) = @_;
313 delete $self->{is_quantifier};
314 return 0;
315}
316
317110µs1;
318
319__END__