Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Modifier.pm |
Statements | Executed 21 statements in 1.52ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 18µs | 38µs | BEGIN@81 | PPIx::Regexp::Token::Modifier::
1 | 1 | 1 | 11µs | 98µs | BEGIN@84 | PPIx::Regexp::Token::Modifier::
1 | 1 | 1 | 11µs | 48µs | BEGIN@86 | PPIx::Regexp::Token::Modifier::
1 | 1 | 1 | 11µs | 18µs | BEGIN@82 | PPIx::Regexp::Token::Modifier::
1 | 1 | 1 | 10µs | 13µs | __PPIX_TOKEN__recognize | PPIx::Regexp::Token::Modifier::
2 | 1 | 1 | 2µs | 2µs | CORE:qr (opcode) | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKENIZER__modifier_modify | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKEN__post_make | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | __aggregate_modifiers | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | __asserts | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | _decode | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | _perl_version_introduced | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | asserts | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | can_be_quantified | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | match_semantics | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | modifiers | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | negates | PPIx::Regexp::Token::Modifier::
0 | 0 | 0 | 0s | 0s | perl_version_introduced | PPIx::Regexp::Token::Modifier::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | =head1 NAME | ||||
2 | |||||
3 | PPIx::Regexp::Token::Modifier - Represent modifiers. | ||||
4 | |||||
5 | =head1 SYNOPSIS | ||||
6 | |||||
7 | use PPIx::Regexp::Dumper; | ||||
8 | PPIx::Regexp::Dumper->new( 'qr{foo}smx' ) | ||||
9 | ->print(); | ||||
10 | |||||
11 | The trailing C<smx> will be represented by this class. | ||||
12 | |||||
13 | This class also represents the whole of things like C<(?ismx)>. But the | ||||
14 | modifiers in something like C<(?i:foo)> are represented by a | ||||
15 | L<PPIx::Regexp::Token::GroupType::Modifier|PPIx::Regexp::Token::GroupType::Modifier>. | ||||
16 | |||||
17 | =head1 INHERITANCE | ||||
18 | |||||
19 | C<PPIx::Regexp::Token::Modifier> is a | ||||
20 | L<PPIx::Regexp::Token|PPIx::Regexp::Token>. | ||||
21 | |||||
22 | C<PPIx::Regexp::Token::Modifier> is the parent of | ||||
23 | L<PPIx::Regexp::Token::GroupType::Modifier|PPIx::Regexp::Token::GroupType::Modifier>. | ||||
24 | |||||
25 | =head1 DESCRIPTION | ||||
26 | |||||
27 | This class represents modifier characters at the end of the regular | ||||
28 | expression. For example, in C<qr{foo}smx> this class would represent | ||||
29 | the terminal C<smx>. | ||||
30 | |||||
31 | =head2 The C<a>, C<aa>, C<d>, C<l>, and C<u> modifiers | ||||
32 | |||||
33 | The C<a>, C<aa>, C<d>, C<l>, and C<u> modifiers, introduced starting in | ||||
34 | Perl 5.13.6, are used to force either Unicode pattern semantics (C<u>), | ||||
35 | locale semantics (C<l>) default semantics (C<d> the traditional Perl | ||||
36 | semantics, which can also mean 'dual' since it means Unicode if the | ||||
37 | string's UTF-8 bit is on, and locale if the UTF-8 bit is off), or | ||||
38 | restricted default semantics (C<a>). These are mutually exclusive, and | ||||
39 | only one can be asserted at a time. Asserting any of these overrides | ||||
40 | the inherited value of any of the others. The C<asserted()> method | ||||
41 | reports as asserted the last one it sees, or none of them if it has seen | ||||
42 | none. | ||||
43 | |||||
44 | For example, given C<PPIx::Regexp::Token::Modifier> C<$elem> | ||||
45 | representing the invalid regular expression fragment C<(?dul)>, | ||||
46 | C<< $elem->asserted( 'l' ) >> would return true, but | ||||
47 | C<< $elem->asserted( 'u' ) >> would return false. Note that | ||||
48 | C<< $elem->negated( 'u' ) >> would also return false, since C<u> is not | ||||
49 | explicitly negated. | ||||
50 | |||||
51 | If C<$elem> represented regular expression fragment C<(?i)>, | ||||
52 | C<< $elem->asserted( 'd' ) >> would return false, since even though C<d> | ||||
53 | represents the default behavior it is not explicitly asserted. | ||||
54 | |||||
55 | =head2 The caret (C<^>) modifier | ||||
56 | |||||
57 | Calling C<^> a modifier is a bit of a misnomer. The C<(?^...)> | ||||
58 | construction was introduced in Perl 5.13.6, to prevent the inheritance | ||||
59 | of modifiers. The documentation calls the caret a shorthand equivalent | ||||
60 | for C<d-imsx>, and that it the way this class handles it. | ||||
61 | |||||
62 | For example, given C<PPIx::Regexp::Token::Modifier> C<$elem> | ||||
63 | representing regular expression fragment C<(?^i)>, | ||||
64 | C<< $elem->asserted( 'd' ) >> would return true, since in the absence of | ||||
65 | an explicit C<l> or C<u> this class considers the C<^> to explicitly | ||||
66 | assert C<d>. | ||||
67 | |||||
68 | B<Note> that if this is retracted before Perl 5.14 is released, this | ||||
69 | support will disappear. See L<PPIx::Regexp/NOTICE> for some explanation. | ||||
70 | |||||
71 | =head1 METHODS | ||||
72 | |||||
73 | This class provides the following public methods. Methods not documented | ||||
74 | here are private, and unsupported in the sense that the author reserves | ||||
75 | the right to change or remove them without notice. | ||||
76 | |||||
77 | =cut | ||||
78 | |||||
79 | package PPIx::Regexp::Token::Modifier; | ||||
80 | |||||
81 | 2 | 31µs | 2 | 57µs | # spent 38µs (18+19) within PPIx::Regexp::Token::Modifier::BEGIN@81 which was called:
# once (18µs+19µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 81 # spent 38µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@81
# spent 19µs making 1 call to strict::import |
82 | 2 | 40µs | 2 | 25µs | # spent 18µs (11+7) within PPIx::Regexp::Token::Modifier::BEGIN@82 which was called:
# once (11µs+7µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 82 # spent 18µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@82
# spent 7µs making 1 call to warnings::import |
83 | |||||
84 | 2 | 46µs | 2 | 98µs | # spent 98µs (11+86) within PPIx::Regexp::Token::Modifier::BEGIN@84 which was called:
# once (11µs+86µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 84 # spent 98µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@84
# spent 86µs making 1 call to base::import, recursion: max depth 1, sum of overlapping time 86µs |
85 | |||||
86 | 1 | 500ns | # spent 48µs (11+37) within PPIx::Regexp::Token::Modifier::BEGIN@86 which was called:
# once (11µs+37µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 89 | ||
87 | MINIMUM_PERL | ||||
88 | MODIFIER_GROUP_MATCH_SEMANTICS | ||||
89 | 1 | 1.36ms | 2 | 86µs | }; # spent 48µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@86
# spent 37µs making 1 call to Exporter::import |
90 | |||||
91 | 1 | 1µs | our $VERSION = '0.036'; | ||
92 | |||||
93 | # Define modifiers that are to be aggregated internally for ease of | ||||
94 | # computation. | ||||
95 | 1 | 4µs | my %aggregate = ( | ||
96 | a => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
97 | aa => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
98 | d => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
99 | l => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
100 | u => MODIFIER_GROUP_MATCH_SEMANTICS, | ||||
101 | ); | ||||
102 | 1 | 300ns | my %de_aggregate; | ||
103 | 1 | 3µs | foreach my $value ( values %aggregate ) { | ||
104 | 5 | 4µs | $de_aggregate{$value}++; | ||
105 | } | ||||
106 | |||||
107 | =head2 asserts | ||||
108 | |||||
109 | $token->asserts( 'i' ) and print "token asserts i"; | ||||
110 | foreach ( $token->asserts() ) { print "token asserts $_\n" } | ||||
111 | |||||
112 | This method returns true if the token explicitly asserts the given | ||||
113 | modifier. The example would return true for the modifier in | ||||
114 | C<(?i:foo)>, but false for C<(?-i:foo)>. | ||||
115 | |||||
116 | If called without an argument, or with an undef argument, all modifiers | ||||
117 | explicitly asserted by this token are returned. | ||||
118 | |||||
119 | =cut | ||||
120 | |||||
121 | sub asserts { | ||||
122 | my ( $self, $modifier ) = @_; | ||||
123 | $self->{modifiers} ||= $self->_decode(); | ||||
124 | if ( defined $modifier ) { | ||||
125 | return __asserts( $self->{modifiers}, $modifier ); | ||||
126 | } else { | ||||
127 | return ( sort grep { defined $_ && $self->{modifiers}{$_} } | ||||
128 | map { $de_aggregate{$_} ? $self->{modifiers}{$_} : $_ } | ||||
129 | keys %{ $self->{modifiers} } ); | ||||
130 | } | ||||
131 | } | ||||
132 | |||||
133 | # This is a kluge for both determining whether the object asserts | ||||
134 | # modifiers (hence the 'ductype') and determining whether the given | ||||
135 | # modifier is actually asserted. The signature is the invocant and the | ||||
136 | # modifier name, which must not be undef. The return is a boolean. | ||||
137 | 1 | 2µs | *__ducktype_modifier_asserted = \&asserts; | ||
138 | |||||
139 | sub __asserts { | ||||
140 | my ( $present, $modifier ) = @_; | ||||
141 | my $bin = $aggregate{$modifier} | ||||
142 | or return $present->{$modifier}; | ||||
143 | return defined $present->{$bin} && $modifier eq $present->{$bin}; | ||||
144 | } | ||||
145 | |||||
146 | sub can_be_quantified { return }; | ||||
147 | |||||
148 | =head2 match_semantics | ||||
149 | |||||
150 | my $sem = $token->match_semantics(); | ||||
151 | defined $sem or $sem = 'undefined'; | ||||
152 | print "This token has $sem match semantics\n"; | ||||
153 | |||||
154 | This method returns the match semantics asserted by the token, as one of | ||||
155 | the strings C<'a'>, C<'aa'>, C<'d'>, C<'l'>, or C<'u'>. If no explicit | ||||
156 | match semantics are asserted, this method returns C<undef>. | ||||
157 | |||||
158 | =cut | ||||
159 | |||||
160 | sub match_semantics { | ||||
161 | my ( $self ) = @_; | ||||
162 | $self->{modifiers} ||= $self->_decode(); | ||||
163 | return $self->{modifiers}{ MODIFIER_GROUP_MATCH_SEMANTICS() }; | ||||
164 | } | ||||
165 | |||||
166 | =head2 modifiers | ||||
167 | |||||
168 | my %mods = $token->modifiers(); | ||||
169 | |||||
170 | Returns all modifiers asserted or negated by this token, and the values | ||||
171 | set (true for asserted, false for negated). If called in scalar context, | ||||
172 | returns a reference to a hash containing the values. | ||||
173 | |||||
174 | =cut | ||||
175 | |||||
176 | sub modifiers { | ||||
177 | my ( $self ) = @_; | ||||
178 | $self->{modifiers} ||= $self->_decode(); | ||||
179 | my %mods = %{ $self->{modifiers} }; | ||||
180 | foreach my $bin ( keys %de_aggregate ) { | ||||
181 | defined ( my $val = delete $mods{$bin} ) | ||||
182 | or next; | ||||
183 | $mods{$bin} = $val; | ||||
184 | } | ||||
185 | return wantarray ? %mods : \%mods; | ||||
186 | } | ||||
187 | |||||
188 | =head2 negates | ||||
189 | |||||
190 | $token->negates( 'i' ) and print "token negates i\n"; | ||||
191 | foreach ( $token->negates() ) { print "token negates $_\n" } | ||||
192 | |||||
193 | This method returns true if the token explicitly negates the given | ||||
194 | modifier. The example would return true for the modifier in | ||||
195 | C<(?-i:foo)>, but false for C<(?i:foo)>. | ||||
196 | |||||
197 | If called without an argument, or with an undef argument, all modifiers | ||||
198 | explicitly negated by this token are returned. | ||||
199 | |||||
200 | =cut | ||||
201 | |||||
202 | sub negates { | ||||
203 | my ( $self, $modifier ) = @_; | ||||
204 | $self->{modifiers} ||= $self->_decode(); | ||||
205 | # Note that since the values of hash entries that represent | ||||
206 | # aggregated modifiers will never be false (at least, not unless '0' | ||||
207 | # becomes a modifier) we need no special logic to handle them. | ||||
208 | defined $modifier | ||||
209 | or return ( sort grep { ! $self->{modifiers}{$_} } | ||||
210 | keys %{ $self->{modifiers} } ); | ||||
211 | return exists $self->{modifiers}{$modifier} | ||||
212 | && ! $self->{modifiers}{$modifier}; | ||||
213 | } | ||||
214 | |||||
215 | sub perl_version_introduced { | ||||
216 | my ( $self ) = @_; | ||||
217 | return ( $self->{perl_version_introduced} ||= | ||||
218 | $self->_perl_version_introduced() ); | ||||
219 | } | ||||
220 | |||||
221 | sub _perl_version_introduced { | ||||
222 | my ( $self ) = @_; | ||||
223 | my $content = $self->content(); | ||||
224 | my $is_statement_modifier = ( $content !~ m/ \A [(]? [?] /smx ); | ||||
225 | my $match_semantics = $self->match_semantics(); | ||||
226 | |||||
227 | # Match semantics modifiers became available as regular expression | ||||
228 | # modifiers in 5.13.10. | ||||
229 | defined $match_semantics | ||||
230 | and $is_statement_modifier | ||||
231 | and return '5.013010'; | ||||
232 | |||||
233 | # /aa was introduced in 5.13.10. | ||||
234 | defined $match_semantics | ||||
235 | and 'aa' eq $match_semantics | ||||
236 | and return '5.013010'; | ||||
237 | |||||
238 | # /a was introduced in 5.13.9, but only in (?...), not as modifier | ||||
239 | # of the entire regular expression. | ||||
240 | defined $match_semantics | ||||
241 | and not $is_statement_modifier | ||||
242 | and 'a' eq $match_semantics | ||||
243 | and return '5.013009'; | ||||
244 | |||||
245 | # /d, /l, and /u were introduced in 5.13.6, but only in (?...), not | ||||
246 | # as modifiers of the entire regular expression. | ||||
247 | defined $match_semantics | ||||
248 | and not $is_statement_modifier | ||||
249 | and return '5.013006'; | ||||
250 | |||||
251 | # The '^' reassert-defaults modifier in embedded modifiers was | ||||
252 | # introduced in 5.13.6. | ||||
253 | not $is_statement_modifier | ||||
254 | and $content =~ m/ \^ /smx | ||||
255 | and return '5.013006'; | ||||
256 | |||||
257 | $self->asserts( 'r' ) and return '5.013002'; | ||||
258 | $self->asserts( 'p' ) and return '5.009005'; | ||||
259 | $self->content() =~ m/ \A [(]? [?] .* - /smx | ||||
260 | and return '5.005'; | ||||
261 | $self->asserts( 'c' ) and return '5.004'; | ||||
262 | return MINIMUM_PERL; | ||||
263 | } | ||||
264 | |||||
265 | # Return true if the token can be quantified, and false otherwise | ||||
266 | # sub can_be_quantified { return }; | ||||
267 | |||||
268 | |||||
269 | # $present => __aggregate_modifiers( 'modifiers', ... ); | ||||
270 | # | ||||
271 | # This subroutine is private to the PPIx::Regexp package. It may change | ||||
272 | # or be retracted without notice. Its purpose is to support defaulted | ||||
273 | # modifiers. | ||||
274 | # | ||||
275 | # Aggregate the given modifiers left-to-right, returning a hash of those | ||||
276 | # present and their values. | ||||
277 | |||||
278 | sub __aggregate_modifiers { | ||||
279 | my ( @mods ) = @_; | ||||
280 | my %present; | ||||
281 | foreach my $content ( @mods ) { | ||||
282 | $content =~ s{ [?/]+ }{}smxg; | ||||
283 | if ( $content =~ m/ \A \^ /smx ) { | ||||
284 | @present{ MODIFIER_GROUP_MATCH_SEMANTICS(), qw{ i s m x } } | ||||
285 | = qw{ d 0 0 0 0 }; | ||||
286 | } | ||||
287 | |||||
288 | # Have to do the global match rather than a split, because the | ||||
289 | # expression modifiers come through here too, and we need to | ||||
290 | # distinguish between s/.../.../e and s/.../.../ee. But the | ||||
291 | # modifiers can be randomized (that is, /eie is the same as | ||||
292 | # /eei), so we reorder the content first. | ||||
293 | $content = join '', sort split qr{}smx, $content; | ||||
294 | my $value = 1; | ||||
295 | while ( $content =~ m/ ( ( [[:alpha:]-] ) \2* ) /smxg ) { | ||||
296 | if ( '-' eq $1 ) { | ||||
297 | $value = 0; | ||||
298 | } elsif ( my $bin = $aggregate{$1} ) { | ||||
299 | # Yes, technically the match semantics stuff can't be | ||||
300 | # negated in a regex. But it can in a 'use re', which | ||||
301 | # also comes through here, so we have to handle it. | ||||
302 | $present{$bin} = $value ? $1 : undef; | ||||
303 | } else { | ||||
304 | $present{$1} = $value; | ||||
305 | } | ||||
306 | } | ||||
307 | } | ||||
308 | return \%present; | ||||
309 | } | ||||
310 | |||||
311 | # This must be implemented by tokens which do not recognize themselves. | ||||
312 | # The return is a list of list references. Each list reference must | ||||
313 | # contain a regular expression that recognizes the token, and optionally | ||||
314 | # a reference to a hash to pass to make_token as the class-specific | ||||
315 | # arguments. The regular expression MUST be anchored to the beginning of | ||||
316 | # the string. | ||||
317 | # spent 13µs (10+2) within PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize which was called:
# once (10µs+2µs) by base::import at line 102 of PPIx/Regexp/Token/Structure.pm | ||||
318 | return ( | ||||
319 | 1 | 15µs | 2 | 2µs | [ qr{ \A [(] [?] [[:lower:]]* -? [[:lower:]]* [)] }smx ], # spent 2µs making 2 calls to PPIx::Regexp::Token::Modifier::CORE:qr, avg 1µs/call |
320 | [ qr{ \A [(] [?] \^ [[:lower:]]* [)] }smx ], | ||||
321 | ); | ||||
322 | } | ||||
323 | |||||
324 | # After the token is made, figure out what it asserts or negates. | ||||
325 | |||||
326 | sub __PPIX_TOKEN__post_make { | ||||
327 | my ( $self, $tokenizer ) = @_; | ||||
328 | defined $tokenizer | ||||
329 | and $tokenizer->modifier_modify( $self->modifiers() ); | ||||
330 | return; | ||||
331 | } | ||||
332 | |||||
333 | { | ||||
334 | |||||
335 | # Called by the tokenizer to modify the current modifiers with a new | ||||
336 | # set. Both are passed as hash references, and a reference to the | ||||
337 | # new hash is returned. | ||||
338 | 1 | 500ns | sub __PPIX_TOKENIZER__modifier_modify { | ||
339 | my ( @args ) = @_; | ||||
340 | |||||
341 | my %merged; | ||||
342 | foreach my $hash ( @args ) { | ||||
343 | while ( my ( $key, $val ) = each %{ $hash } ) { | ||||
344 | if ( $val ) { | ||||
345 | $merged{$key} = $val; | ||||
346 | } else { | ||||
347 | delete $merged{$key}; | ||||
348 | } | ||||
349 | } | ||||
350 | } | ||||
351 | |||||
352 | return \%merged; | ||||
353 | |||||
354 | } | ||||
355 | |||||
356 | # Decode modifiers from the content of the token. | ||||
357 | sub _decode { | ||||
358 | my ( $self ) = @_; | ||||
359 | return __aggregate_modifiers( $self->content() ); | ||||
360 | } | ||||
361 | } | ||||
362 | |||||
363 | 1 | 8µs | 1; | ||
364 | |||||
365 | __END__ | ||||
# spent 2µs within PPIx::Regexp::Token::Modifier::CORE:qr which was called 2 times, avg 1µs/call:
# 2 times (2µs+0s) by PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize at line 319, avg 1µs/call |