| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Modifier.pm |
| Statements | Executed 21 statements in 1.52ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 18µs | 38µs | PPIx::Regexp::Token::Modifier::BEGIN@81 |
| 1 | 1 | 1 | 11µs | 98µs | PPIx::Regexp::Token::Modifier::BEGIN@84 |
| 1 | 1 | 1 | 11µs | 48µs | PPIx::Regexp::Token::Modifier::BEGIN@86 |
| 1 | 1 | 1 | 11µs | 18µs | PPIx::Regexp::Token::Modifier::BEGIN@82 |
| 1 | 1 | 1 | 10µs | 13µs | PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize |
| 2 | 1 | 1 | 2µs | 2µs | PPIx::Regexp::Token::Modifier::CORE:qr (opcode) |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::__PPIX_TOKENIZER__modifier_modify |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__post_make |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::__aggregate_modifiers |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::__asserts |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::_decode |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::_perl_version_introduced |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::asserts |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::can_be_quantified |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::match_semantics |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::modifiers |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::negates |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::Modifier::perl_version_introduced |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | =head1 NAME | ||||
| 2 | |||||
| 3 | PPIx::Regexp::Token::Modifier - Represent modifiers. | ||||
| 4 | |||||
| 5 | =head1 SYNOPSIS | ||||
| 6 | |||||
| 7 | use PPIx::Regexp::Dumper; | ||||
| 8 | PPIx::Regexp::Dumper->new( 'qr{foo}smx' ) | ||||
| 9 | ->print(); | ||||
| 10 | |||||
| 11 | The trailing C<smx> will be represented by this class. | ||||
| 12 | |||||
| 13 | This class also represents the whole of things like C<(?ismx)>. But the | ||||
| 14 | modifiers in something like C<(?i:foo)> are represented by a | ||||
| 15 | L<PPIx::Regexp::Token::GroupType::Modifier|PPIx::Regexp::Token::GroupType::Modifier>. | ||||
| 16 | |||||
| 17 | =head1 INHERITANCE | ||||
| 18 | |||||
| 19 | C<PPIx::Regexp::Token::Modifier> is a | ||||
| 20 | L<PPIx::Regexp::Token|PPIx::Regexp::Token>. | ||||
| 21 | |||||
| 22 | C<PPIx::Regexp::Token::Modifier> is the parent of | ||||
| 23 | L<PPIx::Regexp::Token::GroupType::Modifier|PPIx::Regexp::Token::GroupType::Modifier>. | ||||
| 24 | |||||
| 25 | =head1 DESCRIPTION | ||||
| 26 | |||||
| 27 | This class represents modifier characters at the end of the regular | ||||
| 28 | expression. For example, in C<qr{foo}smx> this class would represent | ||||
| 29 | the terminal C<smx>. | ||||
| 30 | |||||
| 31 | =head2 The C<a>, C<aa>, C<d>, C<l>, and C<u> modifiers | ||||
| 32 | |||||
| 33 | The C<a>, C<aa>, C<d>, C<l>, and C<u> modifiers, introduced starting in | ||||
| 34 | Perl 5.13.6, are used to force either Unicode pattern semantics (C<u>), | ||||
| 35 | locale semantics (C<l>) default semantics (C<d> the traditional Perl | ||||
| 36 | semantics, which can also mean 'dual' since it means Unicode if the | ||||
| 37 | string's UTF-8 bit is on, and locale if the UTF-8 bit is off), or | ||||
| 38 | restricted default semantics (C<a>). These are mutually exclusive, and | ||||
| 39 | only one can be asserted at a time. Asserting any of these overrides | ||||
| 40 | the inherited value of any of the others. The C<asserted()> method | ||||
| 41 | reports as asserted the last one it sees, or none of them if it has seen | ||||
| 42 | none. | ||||
| 43 | |||||
| 44 | For example, given C<PPIx::Regexp::Token::Modifier> C<$elem> | ||||
| 45 | representing the invalid regular expression fragment C<(?dul)>, | ||||
| 46 | C<< $elem->asserted( 'l' ) >> would return true, but | ||||
| 47 | C<< $elem->asserted( 'u' ) >> would return false. Note that | ||||
| 48 | C<< $elem->negated( 'u' ) >> would also return false, since C<u> is not | ||||
| 49 | explicitly negated. | ||||
| 50 | |||||
| 51 | If C<$elem> represented regular expression fragment C<(?i)>, | ||||
| 52 | C<< $elem->asserted( 'd' ) >> would return false, since even though C<d> | ||||
| 53 | represents the default behavior it is not explicitly asserted. | ||||
| 54 | |||||
| 55 | =head2 The caret (C<^>) modifier | ||||
| 56 | |||||
| 57 | Calling C<^> a modifier is a bit of a misnomer. The C<(?^...)> | ||||
| 58 | construction was introduced in Perl 5.13.6, to prevent the inheritance | ||||
| 59 | of modifiers. The documentation calls the caret a shorthand equivalent | ||||
| 60 | for C<d-imsx>, and that it the way this class handles it. | ||||
| 61 | |||||
| 62 | For example, given C<PPIx::Regexp::Token::Modifier> C<$elem> | ||||
| 63 | representing regular expression fragment C<(?^i)>, | ||||
| 64 | C<< $elem->asserted( 'd' ) >> would return true, since in the absence of | ||||
| 65 | an explicit C<l> or C<u> this class considers the C<^> to explicitly | ||||
| 66 | assert C<d>. | ||||
| 67 | |||||
| 68 | B<Note> that if this is retracted before Perl 5.14 is released, this | ||||
| 69 | support will disappear. See L<PPIx::Regexp/NOTICE> for some explanation. | ||||
| 70 | |||||
| 71 | =head1 METHODS | ||||
| 72 | |||||
| 73 | This class provides the following public methods. Methods not documented | ||||
| 74 | here are private, and unsupported in the sense that the author reserves | ||||
| 75 | the right to change or remove them without notice. | ||||
| 76 | |||||
| 77 | =cut | ||||
| 78 | |||||
| 79 | package PPIx::Regexp::Token::Modifier; | ||||
| 80 | |||||
| 81 | 2 | 31µs | 2 | 57µs | # spent 38µs (18+19) within PPIx::Regexp::Token::Modifier::BEGIN@81 which was called:
# once (18µs+19µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 81 # spent 38µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@81
# spent 19µs making 1 call to strict::import |
| 82 | 2 | 40µs | 2 | 25µs | # spent 18µs (11+7) within PPIx::Regexp::Token::Modifier::BEGIN@82 which was called:
# once (11µs+7µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 82 # spent 18µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@82
# spent 7µs making 1 call to warnings::import |
| 83 | |||||
| 84 | 2 | 46µs | 2 | 98µs | # spent 98µs (11+86) within PPIx::Regexp::Token::Modifier::BEGIN@84 which was called:
# once (11µs+86µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 84 # spent 98µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@84
# spent 86µs making 1 call to base::import, recursion: max depth 1, sum of overlapping time 86µs |
| 85 | |||||
| 86 | 1 | 500ns | # spent 48µs (11+37) within PPIx::Regexp::Token::Modifier::BEGIN@86 which was called:
# once (11µs+37µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 89 | ||
| 87 | MINIMUM_PERL | ||||
| 88 | MODIFIER_GROUP_MATCH_SEMANTICS | ||||
| 89 | 1 | 1.36ms | 2 | 86µs | }; # spent 48µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@86
# spent 37µs making 1 call to Exporter::import |
| 90 | |||||
| 91 | 1 | 1µs | our $VERSION = '0.036'; | ||
| 92 | |||||
| 93 | # Define modifiers that are to be aggregated internally for ease of | ||||
| 94 | # computation. | ||||
| 95 | 1 | 4µs | my %aggregate = ( | ||
| 96 | a => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
| 97 | aa => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
| 98 | d => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
| 99 | l => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
| 100 | u => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
| 101 | ); | ||||
| 102 | 1 | 300ns | my %de_aggregate; | ||
| 103 | 1 | 3µs | foreach my $value ( values %aggregate ) { | ||
| 104 | 5 | 4µs | $de_aggregate{$value}++; | ||
| 105 | } | ||||
| 106 | |||||
| 107 | =head2 asserts | ||||
| 108 | |||||
| 109 | $token->asserts( 'i' ) and print "token asserts i"; | ||||
| 110 | foreach ( $token->asserts() ) { print "token asserts $_\n" } | ||||
| 111 | |||||
| 112 | This method returns true if the token explicitly asserts the given | ||||
| 113 | modifier. The example would return true for the modifier in | ||||
| 114 | C<(?i:foo)>, but false for C<(?-i:foo)>. | ||||
| 115 | |||||
| 116 | If called without an argument, or with an undef argument, all modifiers | ||||
| 117 | explicitly asserted by this token are returned. | ||||
| 118 | |||||
| 119 | =cut | ||||
| 120 | |||||
| 121 | sub asserts { | ||||
| 122 | my ( $self, $modifier ) = @_; | ||||
| 123 | $self->{modifiers} ||= $self->_decode(); | ||||
| 124 | if ( defined $modifier ) { | ||||
| 125 | return __asserts( $self->{modifiers}, $modifier ); | ||||
| 126 | } else { | ||||
| 127 | return ( sort grep { defined $_ && $self->{modifiers}{$_} } | ||||
| 128 | map { $de_aggregate{$_} ? $self->{modifiers}{$_} : $_ } | ||||
| 129 | keys %{ $self->{modifiers} } ); | ||||
| 130 | } | ||||
| 131 | } | ||||
| 132 | |||||
| 133 | # This is a kluge for both determining whether the object asserts | ||||
| 134 | # modifiers (hence the 'ductype') and determining whether the given | ||||
| 135 | # modifier is actually asserted. The signature is the invocant and the | ||||
| 136 | # modifier name, which must not be undef. The return is a boolean. | ||||
| 137 | 1 | 2µs | *__ducktype_modifier_asserted = \&asserts; | ||
| 138 | |||||
| 139 | sub __asserts { | ||||
| 140 | my ( $present, $modifier ) = @_; | ||||
| 141 | my $bin = $aggregate{$modifier} | ||||
| 142 | or return $present->{$modifier}; | ||||
| 143 | return defined $present->{$bin} && $modifier eq $present->{$bin}; | ||||
| 144 | } | ||||
| 145 | |||||
| 146 | sub can_be_quantified { return }; | ||||
| 147 | |||||
| 148 | =head2 match_semantics | ||||
| 149 | |||||
| 150 | my $sem = $token->match_semantics(); | ||||
| 151 | defined $sem or $sem = 'undefined'; | ||||
| 152 | print "This token has $sem match semantics\n"; | ||||
| 153 | |||||
| 154 | This method returns the match semantics asserted by the token, as one of | ||||
| 155 | the strings C<'a'>, C<'aa'>, C<'d'>, C<'l'>, or C<'u'>. If no explicit | ||||
| 156 | match semantics are asserted, this method returns C<undef>. | ||||
| 157 | |||||
| 158 | =cut | ||||
| 159 | |||||
| 160 | sub match_semantics { | ||||
| 161 | my ( $self ) = @_; | ||||
| 162 | $self->{modifiers} ||= $self->_decode(); | ||||
| 163 | return $self->{modifiers}{ MODIFIER_GROUP_MATCH_SEMANTICS() }; | ||||
| 164 | } | ||||
| 165 | |||||
| 166 | =head2 modifiers | ||||
| 167 | |||||
| 168 | my %mods = $token->modifiers(); | ||||
| 169 | |||||
| 170 | Returns all modifiers asserted or negated by this token, and the values | ||||
| 171 | set (true for asserted, false for negated). If called in scalar context, | ||||
| 172 | returns a reference to a hash containing the values. | ||||
| 173 | |||||
| 174 | =cut | ||||
| 175 | |||||
| 176 | sub modifiers { | ||||
| 177 | my ( $self ) = @_; | ||||
| 178 | $self->{modifiers} ||= $self->_decode(); | ||||
| 179 | my %mods = %{ $self->{modifiers} }; | ||||
| 180 | foreach my $bin ( keys %de_aggregate ) { | ||||
| 181 | defined ( my $val = delete $mods{$bin} ) | ||||
| 182 | or next; | ||||
| 183 | $mods{$bin} = $val; | ||||
| 184 | } | ||||
| 185 | return wantarray ? %mods : \%mods; | ||||
| 186 | } | ||||
| 187 | |||||
| 188 | =head2 negates | ||||
| 189 | |||||
| 190 | $token->negates( 'i' ) and print "token negates i\n"; | ||||
| 191 | foreach ( $token->negates() ) { print "token negates $_\n" } | ||||
| 192 | |||||
| 193 | This method returns true if the token explicitly negates the given | ||||
| 194 | modifier. The example would return true for the modifier in | ||||
| 195 | C<(?-i:foo)>, but false for C<(?i:foo)>. | ||||
| 196 | |||||
| 197 | If called without an argument, or with an undef argument, all modifiers | ||||
| 198 | explicitly negated by this token are returned. | ||||
| 199 | |||||
| 200 | =cut | ||||
| 201 | |||||
| 202 | sub negates { | ||||
| 203 | my ( $self, $modifier ) = @_; | ||||
| 204 | $self->{modifiers} ||= $self->_decode(); | ||||
| 205 | # Note that since the values of hash entries that represent | ||||
| 206 | # aggregated modifiers will never be false (at least, not unless '0' | ||||
| 207 | # becomes a modifier) we need no special logic to handle them. | ||||
| 208 | defined $modifier | ||||
| 209 | or return ( sort grep { ! $self->{modifiers}{$_} } | ||||
| 210 | keys %{ $self->{modifiers} } ); | ||||
| 211 | return exists $self->{modifiers}{$modifier} | ||||
| 212 | && ! $self->{modifiers}{$modifier}; | ||||
| 213 | } | ||||
| 214 | |||||
| 215 | sub perl_version_introduced { | ||||
| 216 | my ( $self ) = @_; | ||||
| 217 | return ( $self->{perl_version_introduced} ||= | ||||
| 218 | $self->_perl_version_introduced() ); | ||||
| 219 | } | ||||
| 220 | |||||
| 221 | sub _perl_version_introduced { | ||||
| 222 | my ( $self ) = @_; | ||||
| 223 | my $content = $self->content(); | ||||
| 224 | my $is_statement_modifier = ( $content !~ m/ \A [(]? [?] /smx ); | ||||
| 225 | my $match_semantics = $self->match_semantics(); | ||||
| 226 | |||||
| 227 | # Match semantics modifiers became available as regular expression | ||||
| 228 | # modifiers in 5.13.10. | ||||
| 229 | defined $match_semantics | ||||
| 230 | and $is_statement_modifier | ||||
| 231 | and return '5.013010'; | ||||
| 232 | |||||
| 233 | # /aa was introduced in 5.13.10. | ||||
| 234 | defined $match_semantics | ||||
| 235 | and 'aa' eq $match_semantics | ||||
| 236 | and return '5.013010'; | ||||
| 237 | |||||
| 238 | # /a was introduced in 5.13.9, but only in (?...), not as modifier | ||||
| 239 | # of the entire regular expression. | ||||
| 240 | defined $match_semantics | ||||
| 241 | and not $is_statement_modifier | ||||
| 242 | and 'a' eq $match_semantics | ||||
| 243 | and return '5.013009'; | ||||
| 244 | |||||
| 245 | # /d, /l, and /u were introduced in 5.13.6, but only in (?...), not | ||||
| 246 | # as modifiers of the entire regular expression. | ||||
| 247 | defined $match_semantics | ||||
| 248 | and not $is_statement_modifier | ||||
| 249 | and return '5.013006'; | ||||
| 250 | |||||
| 251 | # The '^' reassert-defaults modifier in embedded modifiers was | ||||
| 252 | # introduced in 5.13.6. | ||||
| 253 | not $is_statement_modifier | ||||
| 254 | and $content =~ m/ \^ /smx | ||||
| 255 | and return '5.013006'; | ||||
| 256 | |||||
| 257 | $self->asserts( 'r' ) and return '5.013002'; | ||||
| 258 | $self->asserts( 'p' ) and return '5.009005'; | ||||
| 259 | $self->content() =~ m/ \A [(]? [?] .* - /smx | ||||
| 260 | and return '5.005'; | ||||
| 261 | $self->asserts( 'c' ) and return '5.004'; | ||||
| 262 | return MINIMUM_PERL; | ||||
| 263 | } | ||||
| 264 | |||||
| 265 | # Return true if the token can be quantified, and false otherwise | ||||
| 266 | # sub can_be_quantified { return }; | ||||
| 267 | |||||
| 268 | |||||
| 269 | # $present => __aggregate_modifiers( 'modifiers', ... ); | ||||
| 270 | # | ||||
| 271 | # This subroutine is private to the PPIx::Regexp package. It may change | ||||
| 272 | # or be retracted without notice. Its purpose is to support defaulted | ||||
| 273 | # modifiers. | ||||
| 274 | # | ||||
| 275 | # Aggregate the given modifiers left-to-right, returning a hash of those | ||||
| 276 | # present and their values. | ||||
| 277 | |||||
| 278 | sub __aggregate_modifiers { | ||||
| 279 | my ( @mods ) = @_; | ||||
| 280 | my %present; | ||||
| 281 | foreach my $content ( @mods ) { | ||||
| 282 | $content =~ s{ [?/]+ }{}smxg; | ||||
| 283 | if ( $content =~ m/ \A \^ /smx ) { | ||||
| 284 | @present{ MODIFIER_GROUP_MATCH_SEMANTICS(), qw{ i s m x } } | ||||
| 285 | = qw{ d 0 0 0 0 }; | ||||
| 286 | } | ||||
| 287 | |||||
| 288 | # Have to do the global match rather than a split, because the | ||||
| 289 | # expression modifiers come through here too, and we need to | ||||
| 290 | # distinguish between s/.../.../e and s/.../.../ee. But the | ||||
| 291 | # modifiers can be randomized (that is, /eie is the same as | ||||
| 292 | # /eei), so we reorder the content first. | ||||
| 293 | $content = join '', sort split qr{}smx, $content; | ||||
| 294 | my $value = 1; | ||||
| 295 | while ( $content =~ m/ ( ( [[:alpha:]-] ) \2* ) /smxg ) { | ||||
| 296 | if ( '-' eq $1 ) { | ||||
| 297 | $value = 0; | ||||
| 298 | } elsif ( my $bin = $aggregate{$1} ) { | ||||
| 299 | # Yes, technically the match semantics stuff can't be | ||||
| 300 | # negated in a regex. But it can in a 'use re', which | ||||
| 301 | # also comes through here, so we have to handle it. | ||||
| 302 | $present{$bin} = $value ? $1 : undef; | ||||
| 303 | } else { | ||||
| 304 | $present{$1} = $value; | ||||
| 305 | } | ||||
| 306 | } | ||||
| 307 | } | ||||
| 308 | return \%present; | ||||
| 309 | } | ||||
| 310 | |||||
| 311 | # This must be implemented by tokens which do not recognize themselves. | ||||
| 312 | # The return is a list of list references. Each list reference must | ||||
| 313 | # contain a regular expression that recognizes the token, and optionally | ||||
| 314 | # a reference to a hash to pass to make_token as the class-specific | ||||
| 315 | # arguments. The regular expression MUST be anchored to the beginning of | ||||
| 316 | # the string. | ||||
| 317 | # spent 13µs (10+2) within PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize which was called:
# once (10µs+2µs) by base::import at line 102 of PPIx/Regexp/Token/Structure.pm | ||||
| 318 | return ( | ||||
| 319 | 1 | 15µs | 2 | 2µs | [ qr{ \A [(] [?] [[:lower:]]* -? [[:lower:]]* [)] }smx ], # spent 2µs making 2 calls to PPIx::Regexp::Token::Modifier::CORE:qr, avg 1µs/call |
| 320 | [ qr{ \A [(] [?] \^ [[:lower:]]* [)] }smx ], | ||||
| 321 | ); | ||||
| 322 | } | ||||
| 323 | |||||
| 324 | # After the token is made, figure out what it asserts or negates. | ||||
| 325 | |||||
| 326 | sub __PPIX_TOKEN__post_make { | ||||
| 327 | my ( $self, $tokenizer ) = @_; | ||||
| 328 | defined $tokenizer | ||||
| 329 | and $tokenizer->modifier_modify( $self->modifiers() ); | ||||
| 330 | return; | ||||
| 331 | } | ||||
| 332 | |||||
| 333 | { | ||||
| 334 | |||||
| 335 | # Called by the tokenizer to modify the current modifiers with a new | ||||
| 336 | # set. Both are passed as hash references, and a reference to the | ||||
| 337 | # new hash is returned. | ||||
| 338 | 1 | 500ns | sub __PPIX_TOKENIZER__modifier_modify { | ||
| 339 | my ( @args ) = @_; | ||||
| 340 | |||||
| 341 | my %merged; | ||||
| 342 | foreach my $hash ( @args ) { | ||||
| 343 | while ( my ( $key, $val ) = each %{ $hash } ) { | ||||
| 344 | if ( $val ) { | ||||
| 345 | $merged{$key} = $val; | ||||
| 346 | } else { | ||||
| 347 | delete $merged{$key}; | ||||
| 348 | } | ||||
| 349 | } | ||||
| 350 | } | ||||
| 351 | |||||
| 352 | return \%merged; | ||||
| 353 | |||||
| 354 | } | ||||
| 355 | |||||
| 356 | # Decode modifiers from the content of the token. | ||||
| 357 | sub _decode { | ||||
| 358 | my ( $self ) = @_; | ||||
| 359 | return __aggregate_modifiers( $self->content() ); | ||||
| 360 | } | ||||
| 361 | } | ||||
| 362 | |||||
| 363 | 1 | 8µs | 1; | ||
| 364 | |||||
| 365 | __END__ | ||||
# spent 2µs within PPIx::Regexp::Token::Modifier::CORE:qr which was called 2 times, avg 1µs/call:
# 2 times (2µs+0s) by PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize at line 319, avg 1µs/call |