| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Interpolation.pm |
| Statements | Executed 20 statements in 1.14ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 17µs | 32µs | PPIx::Regexp::Token::Interpolation::BEGIN@32 |
| 1 | 1 | 1 | 10µs | 16µs | PPIx::Regexp::Token::Interpolation::BEGIN@33 |
| 1 | 1 | 1 | 9µs | 86µs | PPIx::Regexp::Token::Interpolation::BEGIN@35 |
| 1 | 1 | 1 | 8µs | 42µs | PPIx::Regexp::Token::Interpolation::BEGIN@38 |
| 1 | 1 | 1 | 5µs | 5µs | PPIx::Regexp::Token::Interpolation::BEGIN@37 |
| 2 | 2 | 1 | 4µs | 4µs | PPIx::Regexp::Token::Interpolation::CORE:qr (opcode) |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::__PPIX_TOKENIZER__regexp |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::__PPIX_TOKENIZER__repl |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::__PPIX_TOKEN__post_make |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::_curly |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::_interpolation |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::_square |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::_subscript |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::perl_version_introduced |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Interpolation::ppi |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | =head1 NAME | ||||
| 2 | |||||
| 3 | PPIx::Regexp::Token::Interpolation - Represent an interpolation in the PPIx::Regexp package. | ||||
| 4 | |||||
| 5 | =head1 SYNOPSIS | ||||
| 6 | |||||
| 7 | use PPIx::Regexp::Dumper; | ||||
| 8 | PPIx::Regexp::Dumper->new('qr{$foo}smx')->print(); | ||||
| 9 | |||||
| 10 | =head1 INHERITANCE | ||||
| 11 | |||||
| 12 | C<PPIx::Regexp::Token::Interpolation> is a | ||||
| 13 | L<PPIx::Regexp::Token::Code|PPIx::Regexp::Token::Code>. | ||||
| 14 | |||||
| 15 | C<PPIx::Regexp::Token::Interpolation> has no descendants. | ||||
| 16 | |||||
| 17 | =head1 DESCRIPTION | ||||
| 18 | |||||
| 19 | This class represents a variable interpolation into a regular | ||||
| 20 | expression. In the L</SYNOPSIS> the C<$foo> would be represented by an | ||||
| 21 | object of this class. | ||||
| 22 | |||||
| 23 | =head1 METHODS | ||||
| 24 | |||||
| 25 | This class provides no public methods beyond those provided by its | ||||
| 26 | superclass. | ||||
| 27 | |||||
| 28 | =cut | ||||
| 29 | |||||
| 30 | package PPIx::Regexp::Token::Interpolation; | ||||
| 31 | |||||
| 32 | 2 | 26µs | 2 | 48µs | # spent 32µs (17+16) within PPIx::Regexp::Token::Interpolation::BEGIN@32 which was called:
# once (17µs+16µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 32 # spent 32µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@32
# spent 16µs making 1 call to strict::import |
| 33 | 2 | 39µs | 2 | 22µs | # spent 16µs (10+6) within PPIx::Regexp::Token::Interpolation::BEGIN@33 which was called:
# once (10µs+6µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 33 # spent 16µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@33
# spent 6µs making 1 call to warnings::import |
| 34 | |||||
| 35 | 2 | 27µs | 2 | 162µs | # spent 86µs (9+76) within PPIx::Regexp::Token::Interpolation::BEGIN@35 which was called:
# once (9µs+76µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 35 # spent 86µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@35
# spent 76µs making 1 call to base::import |
| 36 | |||||
| 37 | 2 | 25µs | 1 | 5µs | # spent 5µs within PPIx::Regexp::Token::Interpolation::BEGIN@37 which was called:
# once (5µs+0s) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 37 # spent 5µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@37 |
| 38 | 1 | 200ns | # spent 42µs (8+34) within PPIx::Regexp::Token::Interpolation::BEGIN@38 which was called:
# once (8µs+34µs) by PPIx::Regexp::Tokenizer::BEGIN@33 at line 40 | ||
| 39 | COOKIE_CLASS COOKIE_REGEX_SET TOKEN_LITERAL MINIMUM_PERL | ||||
| 40 | 1 | 992µs | 2 | 77µs | }; # spent 42µs making 1 call to PPIx::Regexp::Token::Interpolation::BEGIN@38
# spent 34µs making 1 call to Exporter::import |
| 41 | |||||
| 42 | 1 | 700ns | our $VERSION = '0.036'; | ||
| 43 | |||||
| 44 | # Return true if the token can be quantified, and false otherwise | ||||
| 45 | # This can be quantified because it might interpolate a quantifiable | ||||
| 46 | # token. Of course, it might not, but we need to be permissive here. | ||||
| 47 | # sub can_be_quantified { return }; | ||||
| 48 | |||||
| 49 | # We overrode this in PPIx::Regexp::Token::Code, since (?{...}) did not | ||||
| 50 | # appear until Perl 5.5. But interpolation has been there since the | ||||
| 51 | # beginning, so we have to override again. This turns out to be OK, | ||||
| 52 | # though, because while Regex Sets were introduced in 5.17.8, | ||||
| 53 | # interpolation inside them was not introduced until 5.17.9. | ||||
| 54 | sub perl_version_introduced { | ||||
| 55 | my ( $self ) = @_; | ||||
| 56 | return $self->{perl_version_introduced}; | ||||
| 57 | } | ||||
| 58 | |||||
| 59 | =head2 ppi | ||||
| 60 | |||||
| 61 | This convenience method returns the L<PPI::Document|PPI::Document> | ||||
| 62 | representing the content. This document should be considered read only. | ||||
| 63 | |||||
| 64 | Note that the content of the returned L<PPI::Document|PPI::Document> may | ||||
| 65 | not be the same as the content of the original | ||||
| 66 | C<PPIx::Regexp::Token::Interpolation>. This can happen because | ||||
| 67 | interpolated variable names may be enclosed in curly brackets, but this | ||||
| 68 | does not happen in normal code. For example, in C</${foo}bar/>, the | ||||
| 69 | content of the C<PPIx::Regexp::Token::Interpolation> object will be | ||||
| 70 | C<'${foo}'>, but the content of the C<PPI::Document> will be C<'$foo'>. | ||||
| 71 | |||||
| 72 | =cut | ||||
| 73 | |||||
| 74 | sub ppi { | ||||
| 75 | my ( $self ) = @_; | ||||
| 76 | if ( exists $self->{ppi} ) { | ||||
| 77 | return $self->{ppi}; | ||||
| 78 | } elsif ( exists $self->{content} ) { | ||||
| 79 | ( my $code = $self->{content} ) =~ | ||||
| 80 | s/ \A ( [\@\$] ) [{] ( .* ) [}] \z /$1$2/smx; | ||||
| 81 | return ( $self->{ppi} = PPI::Document->new( | ||||
| 82 | \$code, readonly => 1 ) ); | ||||
| 83 | } else { | ||||
| 84 | return; | ||||
| 85 | } | ||||
| 86 | } | ||||
| 87 | |||||
| 88 | |||||
| 89 | # Match the beginning of an interpolation. | ||||
| 90 | |||||
| 91 | 1 | 10µs | 1 | 2µs | my $interp_re = # spent 2µs making 1 call to PPIx::Regexp::Token::Interpolation::CORE:qr |
| 92 | qr{ \A (?: [\@\$]? \$ [-\w&`'+^./\\";%=~:?!\@\$<>\[\]\{\},#] | | ||||
| 93 | \@ [\w\{] ) | ||||
| 94 | }smx; | ||||
| 95 | |||||
| 96 | # Match bracketed interpolation | ||||
| 97 | |||||
| 98 | 1 | 5µs | 1 | 2µs | my $brkt_interp_re = # spent 2µs making 1 call to PPIx::Regexp::Token::Interpolation::CORE:qr |
| 99 | qr{ \A (?: [\@\$] \$* [#]? \$* [\{] (?: [][\-&`'+,^./\\";%=:?\@\$<>,#] | | ||||
| 100 | \^? \w+ (?: :: \w+ )* ) [\}] | | ||||
| 101 | \@ [\{] \w+ (?: :: \w+ )* [\}] ) | ||||
| 102 | }smx; | ||||
| 103 | |||||
| 104 | # We pull out the logic of finding and dealing with the interpolation | ||||
| 105 | # into a separate subroutine because if we fail to find an interpolation | ||||
| 106 | # we want to do something with the sigils. | ||||
| 107 | |||||
| 108 | 1 | 2µs | my %allow_subscript_based_on_cast_symbol = ( | ||
| 109 | q<$#> => 0, | ||||
| 110 | q<$> => 1, | ||||
| 111 | q<@> => 1, | ||||
| 112 | ); | ||||
| 113 | |||||
| 114 | sub _interpolation { | ||||
| 115 | my ( $class, $tokenizer, $character, $in_regexp ) = @_; | ||||
| 116 | |||||
| 117 | # If the regexp does not interpolate, bail now. | ||||
| 118 | $tokenizer->interpolates() or return; | ||||
| 119 | |||||
| 120 | # If we're a bracketed interpolation, just accept it | ||||
| 121 | if ( my $len = $tokenizer->find_regexp( $brkt_interp_re ) ) { | ||||
| 122 | return $len; | ||||
| 123 | } | ||||
| 124 | |||||
| 125 | # Make sure we start off plausibly | ||||
| 126 | $tokenizer->find_regexp( $interp_re ) | ||||
| 127 | or return; | ||||
| 128 | |||||
| 129 | # See if PPI can figure out what we have | ||||
| 130 | my $doc = $tokenizer->ppi_document() | ||||
| 131 | or return; | ||||
| 132 | |||||
| 133 | # Get the first statement to work on. | ||||
| 134 | my $stmt = $doc->find_first( 'PPI::Statement' ) | ||||
| 135 | or return; | ||||
| 136 | |||||
| 137 | my @accum; # The elements of the interpolation | ||||
| 138 | my $allow_subscript; # Assume no subscripts allowed | ||||
| 139 | |||||
| 140 | # Find the beginning of the interpolation | ||||
| 141 | my $next = $stmt->schild( 0 ) or return; | ||||
| 142 | |||||
| 143 | # The interpolation should start with | ||||
| 144 | if ( $next->isa( 'PPI::Token::Symbol' ) ) { | ||||
| 145 | |||||
| 146 | # A symbol | ||||
| 147 | push @accum, $next; | ||||
| 148 | $allow_subscript = 1; # Subscripts are allowed | ||||
| 149 | |||||
| 150 | } elsif ( $next->isa( 'PPI::Token::Cast' ) ) { | ||||
| 151 | |||||
| 152 | # Or a cast followed by a block | ||||
| 153 | push @accum, $next; | ||||
| 154 | $next = $next->next_sibling() or return; | ||||
| 155 | if ( $next->isa( 'PPI::Token::Symbol' ) ) { | ||||
| 156 | defined ( | ||||
| 157 | $allow_subscript = | ||||
| 158 | $allow_subscript_based_on_cast_symbol{ | ||||
| 159 | $accum[-1]->content() | ||||
| 160 | } | ||||
| 161 | ) or return; | ||||
| 162 | push @accum, $next; | ||||
| 163 | } elsif ( $next->isa( 'PPI::Structure::Block' ) ) { | ||||
| 164 | |||||
| 165 | =begin comment | ||||
| 166 | |||||
| 167 | local $_ = $next->content(); | ||||
| 168 | if ( m< \A { / } >smx ) { | ||||
| 169 | push @accum, 3; # Number of characters to accept. | ||||
| 170 | } else { | ||||
| 171 | ## $allow_subscript = $accum[-1]->content() ne '$#'; | ||||
| 172 | push @accum, $next; | ||||
| 173 | } | ||||
| 174 | |||||
| 175 | =end comment | ||||
| 176 | |||||
| 177 | =cut | ||||
| 178 | |||||
| 179 | push @accum, $next; | ||||
| 180 | } else { | ||||
| 181 | return; | ||||
| 182 | } | ||||
| 183 | |||||
| 184 | } elsif ( $next->isa( 'PPI::Token::ArrayIndex' ) ) { | ||||
| 185 | |||||
| 186 | # Or an array index | ||||
| 187 | push @accum, $next; | ||||
| 188 | |||||
| 189 | } else { | ||||
| 190 | |||||
| 191 | # None others need apply. | ||||
| 192 | return; | ||||
| 193 | |||||
| 194 | } | ||||
| 195 | |||||
| 196 | # The interpolation _may_ be subscripted. If so ... | ||||
| 197 | { | ||||
| 198 | |||||
| 199 | # Only accept a subscript if wanted and available | ||||
| 200 | $allow_subscript and $next = $next->snext_sibling() or last; | ||||
| 201 | |||||
| 202 | # Accept an optional dereference operator. | ||||
| 203 | my @subscr; | ||||
| 204 | if ( $next->isa( 'PPI::Token::Operator' ) ) { | ||||
| 205 | $next->content() eq '->' or last; | ||||
| 206 | push @subscr, $next; | ||||
| 207 | $next = $next->next_sibling() or last; | ||||
| 208 | } | ||||
| 209 | |||||
| 210 | # Accept only a subscript | ||||
| 211 | $next->isa( 'PPI::Structure::Subscript' ) or last; | ||||
| 212 | |||||
| 213 | # The subscript must have a closing delimiter. | ||||
| 214 | $next->finish() or last; | ||||
| 215 | |||||
| 216 | # If we are in a regular expression rather than a replacement | ||||
| 217 | # string, screen the subscript for content, since [] could be a | ||||
| 218 | # character class, and {} could be a quantifier. The perlop docs | ||||
| 219 | # say that Perl applies undocumented heuristics subject to | ||||
| 220 | # change without notice to figure this out. So we do our poor | ||||
| 221 | # best to be heuristical and undocumented. | ||||
| 222 | not $in_regexp or $class->_subscript( $next ) or last; | ||||
| 223 | |||||
| 224 | # If we got this far, accept the subscript and try for another | ||||
| 225 | # one. | ||||
| 226 | push @accum, @subscr, $next; | ||||
| 227 | redo; | ||||
| 228 | } | ||||
| 229 | |||||
| 230 | # Compute the length of all the PPI elements accumulated, and return | ||||
| 231 | # it. | ||||
| 232 | my $length = 0; | ||||
| 233 | foreach ( @accum ) { | ||||
| 234 | $length += ref $_ ? length $_->content() : $_; | ||||
| 235 | } | ||||
| 236 | return $length; | ||||
| 237 | } | ||||
| 238 | |||||
| 239 | { | ||||
| 240 | |||||
| 241 | 2 | 2µs | my %allowed = ( | ||
| 242 | '[' => '_square', | ||||
| 243 | '{' => '_curly', | ||||
| 244 | ); | ||||
| 245 | |||||
| 246 | sub _subscript { | ||||
| 247 | my ( $class, $struct ) = @_; | ||||
| 248 | |||||
| 249 | # We expect to have a left delimiter, which is either a '[' or a | ||||
| 250 | # '{'. | ||||
| 251 | my $left = $struct->start() or return; | ||||
| 252 | my $lc = $left->content(); | ||||
| 253 | my $handler = $allowed{$lc} or return; | ||||
| 254 | |||||
| 255 | # We expect a single child, which is a PPI::Statement | ||||
| 256 | ( my @kids = $struct->schildren() ) == 1 or return; | ||||
| 257 | $kids[0]->isa( 'PPI::Statement' ) or return; | ||||
| 258 | |||||
| 259 | # We expect the statement to have at least one child. | ||||
| 260 | ( @kids = $kids[0]->schildren() ) or return; | ||||
| 261 | |||||
| 262 | return $class->$handler( @kids ); | ||||
| 263 | |||||
| 264 | } | ||||
| 265 | |||||
| 266 | } | ||||
| 267 | |||||
| 268 | # Return true if we think a curly-bracketed subscript is really a | ||||
| 269 | # subscript, rather than a quantifier. | ||||
| 270 | sub _curly { | ||||
| 271 | my ( $class, @kids ) = @_; | ||||
| 272 | |||||
| 273 | # If the first child is a word, and either it is an only child or | ||||
| 274 | # the next child is the fat comma operator, we accept it as a | ||||
| 275 | # subscript. | ||||
| 276 | if ( $kids[0]->isa( 'PPI::Token::Word' ) ) { | ||||
| 277 | @kids == 1 and return 1; | ||||
| 278 | $kids[1]->isa( 'PPI::Token::Operator' ) | ||||
| 279 | and $kids[1]->content() eq '=>' | ||||
| 280 | and return 1; | ||||
| 281 | } | ||||
| 282 | |||||
| 283 | # If we have exactly one child which is a symbol, we accept it as a | ||||
| 284 | # subscript. | ||||
| 285 | @kids == 1 | ||||
| 286 | and $kids[0]->isa( 'PPI::Token::Symbol' ) | ||||
| 287 | and return 1; | ||||
| 288 | |||||
| 289 | # We reject anything else. | ||||
| 290 | return; | ||||
| 291 | } | ||||
| 292 | |||||
| 293 | # Return true if we think a square-bracketed subscript is really a | ||||
| 294 | # subscript, rather than a character class. | ||||
| 295 | sub _square { | ||||
| 296 | my ( $class, @kids ) = @_; | ||||
| 297 | |||||
| 298 | # We expect to have either a number or a symbol as the first | ||||
| 299 | # element. | ||||
| 300 | $kids[0]->isa( 'PPI::Token::Number' ) and return 1; | ||||
| 301 | $kids[0]->isa( 'PPI::Token::Symbol' ) and return 1; | ||||
| 302 | |||||
| 303 | # Anything else is rejected. | ||||
| 304 | return; | ||||
| 305 | } | ||||
| 306 | |||||
| 307 | { | ||||
| 308 | |||||
| 309 | 2 | 800ns | my %default = ( | ||
| 310 | perl_version_introduced => MINIMUM_PERL, | ||||
| 311 | ); | ||||
| 312 | |||||
| 313 | sub __PPIX_TOKEN__post_make { | ||||
| 314 | my ( $self, $tokenizer, $arg ) = @_; | ||||
| 315 | |||||
| 316 | # If we're manufacturing objects directly (which is UNSUPPORTED, | ||||
| 317 | # but used in t/version.t) we may not have a $tokenizer. | ||||
| 318 | $tokenizer | ||||
| 319 | and $tokenizer->cookie( COOKIE_REGEX_SET ) | ||||
| 320 | and $self->{perl_version_introduced} = '5.017009'; | ||||
| 321 | |||||
| 322 | $self->__impose_defaults( $arg, \%default ); | ||||
| 323 | |||||
| 324 | return; | ||||
| 325 | } | ||||
| 326 | |||||
| 327 | } | ||||
| 328 | |||||
| 329 | # Alternate classes for the sigils, depending on whether we are in a | ||||
| 330 | # character class (index 1) or not (index 0). | ||||
| 331 | 1 | 2µs | my %sigil_alternate = ( | ||
| 332 | '$' => [ 'PPIx::Regexp::Token::Assertion', TOKEN_LITERAL ], | ||||
| 333 | '@' => [ TOKEN_LITERAL, TOKEN_LITERAL ], | ||||
| 334 | ); | ||||
| 335 | |||||
| 336 | sub __PPIX_TOKENIZER__regexp { | ||||
| 337 | my ( $class, $tokenizer, $character ) = @_; | ||||
| 338 | |||||
| 339 | exists $sigil_alternate{$character} or return; | ||||
| 340 | |||||
| 341 | if ( my $accept = _interpolation( $class, $tokenizer, $character, 1 ) ) { | ||||
| 342 | return $accept; | ||||
| 343 | } | ||||
| 344 | |||||
| 345 | my $alternate = $sigil_alternate{$character} or return; | ||||
| 346 | return $tokenizer->make_token( | ||||
| 347 | 1, $alternate->[$tokenizer->cookie( COOKIE_CLASS ) ? 1 : 0 ] ); | ||||
| 348 | |||||
| 349 | } | ||||
| 350 | |||||
| 351 | sub __PPIX_TOKENIZER__repl { | ||||
| 352 | my ( $class, $tokenizer, $character ) = @_; | ||||
| 353 | |||||
| 354 | exists $sigil_alternate{$character} or return; | ||||
| 355 | |||||
| 356 | if ( my $accept = _interpolation( $class, $tokenizer, $character, 0 ) ) { | ||||
| 357 | return $accept; | ||||
| 358 | } | ||||
| 359 | |||||
| 360 | return $tokenizer->make_token( 1, TOKEN_LITERAL ); | ||||
| 361 | |||||
| 362 | } | ||||
| 363 | |||||
| 364 | 1 | 7µs | 1; | ||
| 365 | |||||
| 366 | __END__ | ||||
sub PPIx::Regexp::Token::Interpolation::CORE:qr; # opcode |