Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Interpolation.pm |
Statements | Executed 20 statements in 1.14ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 17µs | 32µs | BEGIN@32 | PPIx::Regexp::Token::Interpolation::
1 | 1 | 1 | 10µs | 16µs | BEGIN@33 | PPIx::Regexp::Token::Interpolation::
1 | 1 | 1 | 9µs | 86µs | BEGIN@35 | PPIx::Regexp::Token::Interpolation::
1 | 1 | 1 | 8µs | 42µs | BEGIN@38 | PPIx::Regexp::Token::Interpolation::
1 | 1 | 1 | 5µs | 5µs | BEGIN@37 | PPIx::Regexp::Token::Interpolation::
2 | 2 | 1 | 4µs | 4µs | CORE:qr (opcode) | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKENIZER__regexp | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKENIZER__repl | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKEN__post_make | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | _curly | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | _interpolation | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | _square | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | _subscript | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | perl_version_introduced | PPIx::Regexp::Token::Interpolation::
0 | 0 | 0 | 0s | 0s | ppi | PPIx::Regexp::Token::Interpolation::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | =head1 NAME | ||||
2 | |||||
3 | PPIx::Regexp::Token::Interpolation - Represent an interpolation in the PPIx::Regexp package. | ||||
4 | |||||
5 | =head1 SYNOPSIS | ||||
6 | |||||
7 | use PPIx::Regexp::Dumper; | ||||
8 | PPIx::Regexp::Dumper->new('qr{$foo}smx')->print(); | ||||
9 | |||||
10 | =head1 INHERITANCE | ||||
11 | |||||
12 | C<PPIx::Regexp::Token::Interpolation> is a | ||||
13 | L<PPIx::Regexp::Token::Code|PPIx::Regexp::Token::Code>. | ||||
14 | |||||
15 | C<PPIx::Regexp::Token::Interpolation> has no descendants. | ||||
16 | |||||
17 | =head1 DESCRIPTION | ||||
18 | |||||
19 | This class represents a variable interpolation into a regular | ||||
20 | expression. In the L</SYNOPSIS> the C<$foo> would be represented by an | ||||
21 | object of this class. | ||||
22 | |||||
23 | =head1 METHODS | ||||
24 | |||||
25 | This class provides no public methods beyond those provided by its | ||||
26 | superclass. | ||||
27 | |||||
28 | =cut | ||||
29 | |||||
30 | package PPIx::Regexp::Token::Interpolation; | ||||
31 | |||||
32 | 2 | 26µs | 2 | 48µs | # spent 32µs (17+16) within PPIx::Regexp::Token::Interpolation::BEGIN@32 which was called:
# once (17µs+16µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 32 # spent 32µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@32
# spent 16µs making 1 call to strict::import |
33 | 2 | 39µs | 2 | 22µs | # spent 16µs (10+6) within PPIx::Regexp::Token::Interpolation::BEGIN@33 which was called:
# once (10µs+6µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 33 # spent 16µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@33
# spent 6µs making 1 call to warnings::import |
34 | |||||
35 | 2 | 27µs | 2 | 162µs | # spent 86µs (9+76) within PPIx::Regexp::Token::Interpolation::BEGIN@35 which was called:
# once (9µs+76µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 35 # spent 86µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@35
# spent 76µs making 1 call to base::import |
36 | |||||
37 | 2 | 25µs | 1 | 5µs | # spent 5µs within PPIx::Regexp::Token::Interpolation::BEGIN@37 which was called:
# once (5µs+0s) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 37 # spent 5µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@37 |
38 | 1 | 200ns | # spent 42µs (8+34) within PPIx::Regexp::Token::Interpolation::BEGIN@38 which was called:
# once (8µs+34µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 40 | ||
39 | COOKIE_CLASS COOKIE_REGEX_SET TOKEN_LITERAL MINIMUM_PERL | ||||
40 | 1 | 992µs | 2 | 77µs | }; # spent 42µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@38
# spent 34µs making 1 call to Exporter::import |
41 | |||||
42 | 1 | 700ns | our $VERSION = '0.036'; | ||
43 | |||||
44 | # Return true if the token can be quantified, and false otherwise | ||||
45 | # This can be quantified because it might interpolate a quantifiable | ||||
46 | # token. Of course, it might not, but we need to be permissive here. | ||||
47 | # sub can_be_quantified { return }; | ||||
48 | |||||
49 | # We overrode this in PPIx::Regexp::Token::Code, since (?{...}) did not | ||||
50 | # appear until Perl 5.5. But interpolation has been there since the | ||||
51 | # beginning, so we have to override again. This turns out to be OK, | ||||
52 | # though, because while Regex Sets were introduced in 5.17.8, | ||||
53 | # interpolation inside them was not introduced until 5.17.9. | ||||
54 | sub perl_version_introduced { | ||||
55 | my ( $self ) = @_; | ||||
56 | return $self->{perl_version_introduced}; | ||||
57 | } | ||||
58 | |||||
59 | =head2 ppi | ||||
60 | |||||
61 | This convenience method returns the L<PPI::Document|PPI::Document> | ||||
62 | representing the content. This document should be considered read only. | ||||
63 | |||||
64 | Note that the content of the returned L<PPI::Document|PPI::Document> may | ||||
65 | not be the same as the content of the original | ||||
66 | C<PPIx::Regexp::Token::Interpolation>. This can happen because | ||||
67 | interpolated variable names may be enclosed in curly brackets, but this | ||||
68 | does not happen in normal code. For example, in C</${foo}bar/>, the | ||||
69 | content of the C<PPIx::Regexp::Token::Interpolation> object will be | ||||
70 | C<'${foo}'>, but the content of the C<PPI::Document> will be C<'$foo'>. | ||||
71 | |||||
72 | =cut | ||||
73 | |||||
74 | sub ppi { | ||||
75 | my ( $self ) = @_; | ||||
76 | if ( exists $self->{ppi} ) { | ||||
77 | return $self->{ppi}; | ||||
78 | } elsif ( exists $self->{content} ) { | ||||
79 | ( my $code = $self->{content} ) =~ | ||||
80 | s/ \A ( [\@\$] ) [{] ( .* ) [}] \z /$1$2/smx; | ||||
81 | return ( $self->{ppi} = PPI::Document->new( | ||||
82 | \$code, readonly => 1 ) ); | ||||
83 | } else { | ||||
84 | return; | ||||
85 | } | ||||
86 | } | ||||
87 | |||||
88 | |||||
89 | # Match the beginning of an interpolation. | ||||
90 | |||||
91 | 1 | 10µs | 1 | 2µs | my $interp_re = # spent 2µs making 1 call to PPIx::Regexp::Token::Interpolation::CORE:qr |
92 | qr{ \A (?: [\@\$]? \$ [-\w&`'+^./\\";%=~:?!\@\$<>\[\]\{\},#] | | ||||
93 | \@ [\w\{] ) | ||||
94 | }smx; | ||||
95 | |||||
96 | # Match bracketed interpolation | ||||
97 | |||||
98 | 1 | 5µs | 1 | 2µs | my $brkt_interp_re = # spent 2µs making 1 call to PPIx::Regexp::Token::Interpolation::CORE:qr |
99 | qr{ \A (?: [\@\$] \$* [#]? \$* [\{] (?: [][\-&`'+,^./\\";%=:?\@\$<>,#] | | ||||
100 | \^? \w+ (?: :: \w+ )* ) [\}] | | ||||
101 | \@ [\{] \w+ (?: :: \w+ )* [\}] ) | ||||
102 | }smx; | ||||
103 | |||||
104 | # We pull out the logic of finding and dealing with the interpolation | ||||
105 | # into a separate subroutine because if we fail to find an interpolation | ||||
106 | # we want to do something with the sigils. | ||||
107 | |||||
108 | 1 | 2µs | my %allow_subscript_based_on_cast_symbol = ( | ||
109 | q<$#> => 0, | ||||
110 | q<$> => 1, | ||||
111 | q<@> => 1, | ||||
112 | ); | ||||
113 | |||||
114 | sub _interpolation { | ||||
115 | my ( $class, $tokenizer, $character, $in_regexp ) = @_; | ||||
116 | |||||
117 | # If the regexp does not interpolate, bail now. | ||||
118 | $tokenizer->interpolates() or return; | ||||
119 | |||||
120 | # If we're a bracketed interpolation, just accept it | ||||
121 | if ( my $len = $tokenizer->find_regexp( $brkt_interp_re ) ) { | ||||
122 | return $len; | ||||
123 | } | ||||
124 | |||||
125 | # Make sure we start off plausibly | ||||
126 | $tokenizer->find_regexp( $interp_re ) | ||||
127 | or return; | ||||
128 | |||||
129 | # See if PPI can figure out what we have | ||||
130 | my $doc = $tokenizer->ppi_document() | ||||
131 | or return; | ||||
132 | |||||
133 | # Get the first statement to work on. | ||||
134 | my $stmt = $doc->find_first( 'PPI::Statement' ) | ||||
135 | or return; | ||||
136 | |||||
137 | my @accum; # The elements of the interpolation | ||||
138 | my $allow_subscript; # Assume no subscripts allowed | ||||
139 | |||||
140 | # Find the beginning of the interpolation | ||||
141 | my $next = $stmt->schild( 0 ) or return; | ||||
142 | |||||
143 | # The interpolation should start with | ||||
144 | if ( $next->isa( 'PPI::Token::Symbol' ) ) { | ||||
145 | |||||
146 | # A symbol | ||||
147 | push @accum, $next; | ||||
148 | $allow_subscript = 1; # Subscripts are allowed | ||||
149 | |||||
150 | } elsif ( $next->isa( 'PPI::Token::Cast' ) ) { | ||||
151 | |||||
152 | # Or a cast followed by a block | ||||
153 | push @accum, $next; | ||||
154 | $next = $next->next_sibling() or return; | ||||
155 | if ( $next->isa( 'PPI::Token::Symbol' ) ) { | ||||
156 | defined ( | ||||
157 | $allow_subscript = | ||||
158 | $allow_subscript_based_on_cast_symbol{ | ||||
159 | $accum[-1]->content() | ||||
160 | } | ||||
161 | ) or return; | ||||
162 | push @accum, $next; | ||||
163 | } elsif ( $next->isa( 'PPI::Structure::Block' ) ) { | ||||
164 | |||||
165 | =begin comment | ||||
166 | |||||
167 | local $_ = $next->content(); | ||||
168 | if ( m< \A { / } >smx ) { | ||||
169 | push @accum, 3; # Number of characters to accept. | ||||
170 | } else { | ||||
171 | ## $allow_subscript = $accum[-1]->content() ne '$#'; | ||||
172 | push @accum, $next; | ||||
173 | } | ||||
174 | |||||
175 | =end comment | ||||
176 | |||||
177 | =cut | ||||
178 | |||||
179 | push @accum, $next; | ||||
180 | } else { | ||||
181 | return; | ||||
182 | } | ||||
183 | |||||
184 | } elsif ( $next->isa( 'PPI::Token::ArrayIndex' ) ) { | ||||
185 | |||||
186 | # Or an array index | ||||
187 | push @accum, $next; | ||||
188 | |||||
189 | } else { | ||||
190 | |||||
191 | # None others need apply. | ||||
192 | return; | ||||
193 | |||||
194 | } | ||||
195 | |||||
196 | # The interpolation _may_ be subscripted. If so ... | ||||
197 | { | ||||
198 | |||||
199 | # Only accept a subscript if wanted and available | ||||
200 | $allow_subscript and $next = $next->snext_sibling() or last; | ||||
201 | |||||
202 | # Accept an optional dereference operator. | ||||
203 | my @subscr; | ||||
204 | if ( $next->isa( 'PPI::Token::Operator' ) ) { | ||||
205 | $next->content() eq '->' or last; | ||||
206 | push @subscr, $next; | ||||
207 | $next = $next->next_sibling() or last; | ||||
208 | } | ||||
209 | |||||
210 | # Accept only a subscript | ||||
211 | $next->isa( 'PPI::Structure::Subscript' ) or last; | ||||
212 | |||||
213 | # The subscript must have a closing delimiter. | ||||
214 | $next->finish() or last; | ||||
215 | |||||
216 | # If we are in a regular expression rather than a replacement | ||||
217 | # string, screen the subscript for content, since [] could be a | ||||
218 | # character class, and {} could be a quantifier. The perlop docs | ||||
219 | # say that Perl applies undocumented heuristics subject to | ||||
220 | # change without notice to figure this out. So we do our poor | ||||
221 | # best to be heuristical and undocumented. | ||||
222 | not $in_regexp or $class->_subscript( $next ) or last; | ||||
223 | |||||
224 | # If we got this far, accept the subscript and try for another | ||||
225 | # one. | ||||
226 | push @accum, @subscr, $next; | ||||
227 | redo; | ||||
228 | } | ||||
229 | |||||
230 | # Compute the length of all the PPI elements accumulated, and return | ||||
231 | # it. | ||||
232 | my $length = 0; | ||||
233 | foreach ( @accum ) { | ||||
234 | $length += ref $_ ? length $_->content() : $_; | ||||
235 | } | ||||
236 | return $length; | ||||
237 | } | ||||
238 | |||||
239 | { | ||||
240 | |||||
241 | 2 | 2µs | my %allowed = ( | ||
242 | '[' => '_square', | ||||
243 | '{' => '_curly', | ||||
244 | ); | ||||
245 | |||||
246 | sub _subscript { | ||||
247 | my ( $class, $struct ) = @_; | ||||
248 | |||||
249 | # We expect to have a left delimiter, which is either a '[' or a | ||||
250 | # '{'. | ||||
251 | my $left = $struct->start() or return; | ||||
252 | my $lc = $left->content(); | ||||
253 | my $handler = $allowed{$lc} or return; | ||||
254 | |||||
255 | # We expect a single child, which is a PPI::Statement | ||||
256 | ( my @kids = $struct->schildren() ) == 1 or return; | ||||
257 | $kids[0]->isa( 'PPI::Statement' ) or return; | ||||
258 | |||||
259 | # We expect the statement to have at least one child. | ||||
260 | ( @kids = $kids[0]->schildren() ) or return; | ||||
261 | |||||
262 | return $class->$handler( @kids ); | ||||
263 | |||||
264 | } | ||||
265 | |||||
266 | } | ||||
267 | |||||
268 | # Return true if we think a curly-bracketed subscript is really a | ||||
269 | # subscript, rather than a quantifier. | ||||
270 | sub _curly { | ||||
271 | my ( $class, @kids ) = @_; | ||||
272 | |||||
273 | # If the first child is a word, and either it is an only child or | ||||
274 | # the next child is the fat comma operator, we accept it as a | ||||
275 | # subscript. | ||||
276 | if ( $kids[0]->isa( 'PPI::Token::Word' ) ) { | ||||
277 | @kids == 1 and return 1; | ||||
278 | $kids[1]->isa( 'PPI::Token::Operator' ) | ||||
279 | and $kids[1]->content() eq '=>' | ||||
280 | and return 1; | ||||
281 | } | ||||
282 | |||||
283 | # If we have exactly one child which is a symbol, we accept it as a | ||||
284 | # subscript. | ||||
285 | @kids == 1 | ||||
286 | and $kids[0]->isa( 'PPI::Token::Symbol' ) | ||||
287 | and return 1; | ||||
288 | |||||
289 | # We reject anything else. | ||||
290 | return; | ||||
291 | } | ||||
292 | |||||
293 | # Return true if we think a square-bracketed subscript is really a | ||||
294 | # subscript, rather than a character class. | ||||
295 | sub _square { | ||||
296 | my ( $class, @kids ) = @_; | ||||
297 | |||||
298 | # We expect to have either a number or a symbol as the first | ||||
299 | # element. | ||||
300 | $kids[0]->isa( 'PPI::Token::Number' ) and return 1; | ||||
301 | $kids[0]->isa( 'PPI::Token::Symbol' ) and return 1; | ||||
302 | |||||
303 | # Anything else is rejected. | ||||
304 | return; | ||||
305 | } | ||||
306 | |||||
307 | { | ||||
308 | |||||
309 | 2 | 800ns | my %default = ( | ||
310 | perl_version_introduced => MINIMUM_PERL, | ||||
311 | ); | ||||
312 | |||||
313 | sub __PPIX_TOKEN__post_make { | ||||
314 | my ( $self, $tokenizer, $arg ) = @_; | ||||
315 | |||||
316 | # If we're manufacturing objects directly (which is UNSUPPORTED, | ||||
317 | # but used in t/version.t) we may not have a $tokenizer. | ||||
318 | $tokenizer | ||||
319 | and $tokenizer->cookie( COOKIE_REGEX_SET ) | ||||
320 | and $self->{perl_version_introduced} = '5.017009'; | ||||
321 | |||||
322 | $self->__impose_defaults( $arg, \%default ); | ||||
323 | |||||
324 | return; | ||||
325 | } | ||||
326 | |||||
327 | } | ||||
328 | |||||
329 | # Alternate classes for the sigils, depending on whether we are in a | ||||
330 | # character class (index 1) or not (index 0). | ||||
331 | 1 | 2µs | my %sigil_alternate = ( | ||
332 | '$' => [ 'PPIx::Regexp::Token::Assertion', TOKEN_LITERAL ], | ||||
333 | '@' => [ TOKEN_LITERAL, TOKEN_LITERAL ], | ||||
334 | ); | ||||
335 | |||||
336 | sub __PPIX_TOKENIZER__regexp { | ||||
337 | my ( $class, $tokenizer, $character ) = @_; | ||||
338 | |||||
339 | exists $sigil_alternate{$character} or return; | ||||
340 | |||||
341 | if ( my $accept = _interpolation( $class, $tokenizer, $character, 1 ) ) { | ||||
342 | return $accept; | ||||
343 | } | ||||
344 | |||||
345 | my $alternate = $sigil_alternate{$character} or return; | ||||
346 | return $tokenizer->make_token( | ||||
347 | 1, $alternate->[$tokenizer->cookie( COOKIE_CLASS ) ? 1 : 0 ] ); | ||||
348 | |||||
349 | } | ||||
350 | |||||
351 | sub __PPIX_TOKENIZER__repl { | ||||
352 | my ( $class, $tokenizer, $character ) = @_; | ||||
353 | |||||
354 | exists $sigil_alternate{$character} or return; | ||||
355 | |||||
356 | if ( my $accept = _interpolation( $class, $tokenizer, $character, 0 ) ) { | ||||
357 | return $accept; | ||||
358 | } | ||||
359 | |||||
360 | return $tokenizer->make_token( 1, TOKEN_LITERAL ); | ||||
361 | |||||
362 | } | ||||
363 | |||||
364 | 1 | 7µs | 1; | ||
365 | |||||
366 | __END__ | ||||
sub PPIx::Regexp::Token::Interpolation::CORE:qr; # opcode |