Profile of PPIx/Regexp/Token/Modifier.pm

Filename	/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Modifier.pm
Statements	Executed 21 statements in 1.52ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	18µs	38µs	PPIx::Regexp::Token::Modifier::BEGIN@81
1	1	1	11µs	98µs	PPIx::Regexp::Token::Modifier::BEGIN@84
1	1	1	11µs	48µs	PPIx::Regexp::Token::Modifier::BEGIN@86
1	1	1	11µs	18µs	PPIx::Regexp::Token::Modifier::BEGIN@82
1	1	1	10µs	13µs	PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize
2	1	1	2µs	2µs	PPIx::Regexp::Token::Modifier::CORE:qr (opcode)
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::__PPIX_TOKENIZER__modifier_modify
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__post_make
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::__aggregate_modifiers
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::__asserts
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::_decode
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::_perl_version_introduced
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::asserts
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::can_be_quantified
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::match_semantics
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::modifiers
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::negates
0	0	0	0s	0s	PPIx::Regexp::Token::Modifier::perl_version_introduced

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					=head1 NAME
2
3					PPIx::Regexp::Token::Modifier - Represent modifiers.
4
5					=head1 SYNOPSIS
6
7					use PPIx::Regexp::Dumper;
8					PPIx::Regexp::Dumper->new( 'qr{foo}smx' )
9					->print();
10
11					The trailing C<smx> will be represented by this class.
12
13					This class also represents the whole of things like C<(?ismx)>. But the
14					modifiers in something like C<(?i:foo)> are represented by a
15					L<PPIx::Regexp::Token::GroupType::Modifier\|PPIx::Regexp::Token::GroupType::Modifier>.
16
17					=head1 INHERITANCE
18
19					C<PPIx::Regexp::Token::Modifier> is a
20					L<PPIx::Regexp::Token\|PPIx::Regexp::Token>.
21
22					C<PPIx::Regexp::Token::Modifier> is the parent of
23					L<PPIx::Regexp::Token::GroupType::Modifier\|PPIx::Regexp::Token::GroupType::Modifier>.
24
25					=head1 DESCRIPTION
26
27					This class represents modifier characters at the end of the regular
28					expression. For example, in C<qr{foo}smx> this class would represent
29					the terminal C<smx>.
30
31					=head2 The C<a>, C<aa>, C<d>, C<l>, and C<u> modifiers
32
33					The C<a>, C<aa>, C<d>, C<l>, and C<u> modifiers, introduced starting in
34					Perl 5.13.6, are used to force either Unicode pattern semantics (C<u>),
35					locale semantics (C<l>) default semantics (C<d> the traditional Perl
36					semantics, which can also mean 'dual' since it means Unicode if the
37					string's UTF-8 bit is on, and locale if the UTF-8 bit is off), or
38					restricted default semantics (C<a>). These are mutually exclusive, and
39					only one can be asserted at a time. Asserting any of these overrides
40					the inherited value of any of the others. The C<asserted()> method
41					reports as asserted the last one it sees, or none of them if it has seen
42					none.
43
44					For example, given C<PPIx::Regexp::Token::Modifier> C<$elem>
45					representing the invalid regular expression fragment C<(?dul)>,
46					C<< $elem->asserted( 'l' ) >> would return true, but
47					C<< $elem->asserted( 'u' ) >> would return false. Note that
48					C<< $elem->negated( 'u' ) >> would also return false, since C<u> is not
49					explicitly negated.
50
51					If C<$elem> represented regular expression fragment C<(?i)>,
52					C<< $elem->asserted( 'd' ) >> would return false, since even though C<d>
53					represents the default behavior it is not explicitly asserted.
54
55					=head2 The caret (C<^>) modifier
56
57					Calling C<^> a modifier is a bit of a misnomer. The C<(?^...)>
58					construction was introduced in Perl 5.13.6, to prevent the inheritance
59					of modifiers. The documentation calls the caret a shorthand equivalent
60					for C<d-imsx>, and that it the way this class handles it.
61
62					For example, given C<PPIx::Regexp::Token::Modifier> C<$elem>
63					representing regular expression fragment C<(?^i)>,
64					C<< $elem->asserted( 'd' ) >> would return true, since in the absence of
65					an explicit C<l> or C<u> this class considers the C<^> to explicitly
66					assert C<d>.
67
68					B<Note> that if this is retracted before Perl 5.14 is released, this
69					support will disappear. See L<PPIx::Regexp/NOTICE> for some explanation.
70
71					=head1 METHODS
72
73					This class provides the following public methods. Methods not documented
74					here are private, and unsupported in the sense that the author reserves
75					the right to change or remove them without notice.
76
77					=cut
78
79					package PPIx::Regexp::Token::Modifier;
80
81	2	31µs	2	57µs	# spent 38µs (18+19) within PPIx::Regexp::Token::Modifier::BEGIN@81 which was called: # once (18µs+19µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 81 use strict; # spent 38µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@81 # spent 19µs making 1 call to strict::import
82	2	40µs	2	25µs	# spent 18µs (11+7) within PPIx::Regexp::Token::Modifier::BEGIN@82 which was called: # once (11µs+7µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 82 use warnings; # spent 18µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@82 # spent 7µs making 1 call to warnings::import
83
84	2	46µs	2	98µs	# spent 98µs (11+86) within PPIx::Regexp::Token::Modifier::BEGIN@84 which was called: # once (11µs+86µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 84 use base qw{ PPIx::Regexp::Token }; # spent 98µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@84 # spent 86µs making 1 call to base::import, recursion: max depth 1, sum of overlapping time 86µs
85
86	1	500ns			# spent 48µs (11+37) within PPIx::Regexp::Token::Modifier::BEGIN@86 which was called: # once (11µs+37µs) by PPIx::Regexp::Token::Structure::BEGIN@51 at line 89 use PPIx::Regexp::Constant qw{
87					MINIMUM_PERL
88					MODIFIER_GROUP_MATCH_SEMANTICS
89	1	1.36ms	2	86µs	}; # spent 48µs making 1 call to PPIx::Regexp::Token::Modifier::BEGIN@86 # spent 37µs making 1 call to Exporter::import
90
91	1	1µs			our $VERSION = '0.036';
92
93					# Define modifiers that are to be aggregated internally for ease of
94					# computation.
95	1	4µs			my %aggregate = (
96					a => MODIFIER_GROUP_MATCH_SEMANTICS,
97					aa => MODIFIER_GROUP_MATCH_SEMANTICS,
98					d => MODIFIER_GROUP_MATCH_SEMANTICS,
99					l => MODIFIER_GROUP_MATCH_SEMANTICS,
100					u => MODIFIER_GROUP_MATCH_SEMANTICS,
101					);
102	1	300ns			my %de_aggregate;
103	1	3µs			foreach my $value ( values %aggregate ) {
104	5	4µs			$de_aggregate{$value}++;
105					}
106
107					=head2 asserts
108
109					$token->asserts( 'i' ) and print "token asserts i";
110					foreach ( $token->asserts() ) { print "token asserts $_\n" }
111
112					This method returns true if the token explicitly asserts the given
113					modifier. The example would return true for the modifier in
114					C<(?i:foo)>, but false for C<(?-i:foo)>.
115
116					If called without an argument, or with an undef argument, all modifiers
117					explicitly asserted by this token are returned.
118
119					=cut
120
121					sub asserts {
122					my ( $self, $modifier ) = @_;
123					$self->{modifiers} \|\|= $self->_decode();
124					if ( defined $modifier ) {
125					return __asserts( $self->{modifiers}, $modifier );
126					} else {
127					return ( sort grep { defined $_ && $self->{modifiers}{$_} }
128					map { $de_aggregate{$_} ? $self->{modifiers}{$_} : $_ }
129					keys %{ $self->{modifiers} } );
130					}
131					}
132
133					# This is a kluge for both determining whether the object asserts
134					# modifiers (hence the 'ductype') and determining whether the given
135					# modifier is actually asserted. The signature is the invocant and the
136					# modifier name, which must not be undef. The return is a boolean.
137	1	2µs			*__ducktype_modifier_asserted = \&asserts;
138
139					sub __asserts {
140					my ( $present, $modifier ) = @_;
141					my $bin = $aggregate{$modifier}
142					or return $present->{$modifier};
143					return defined $present->{$bin} && $modifier eq $present->{$bin};
144					}
145
146					sub can_be_quantified { return };
147
148					=head2 match_semantics
149
150					my $sem = $token->match_semantics();
151					defined $sem or $sem = 'undefined';
152					print "This token has $sem match semantics\n";
153
154					This method returns the match semantics asserted by the token, as one of
155					the strings C<'a'>, C<'aa'>, C<'d'>, C<'l'>, or C<'u'>. If no explicit
156					match semantics are asserted, this method returns C<undef>.
157
158					=cut
159
160					sub match_semantics {
161					my ( $self ) = @_;
162					$self->{modifiers} \|\|= $self->_decode();
163					return $self->{modifiers}{ MODIFIER_GROUP_MATCH_SEMANTICS() };
164					}
165
166					=head2 modifiers
167
168					my %mods = $token->modifiers();
169
170					Returns all modifiers asserted or negated by this token, and the values
171					set (true for asserted, false for negated). If called in scalar context,
172					returns a reference to a hash containing the values.
173
174					=cut
175
176					sub modifiers {
177					my ( $self ) = @_;
178					$self->{modifiers} \|\|= $self->_decode();
179					my %mods = %{ $self->{modifiers} };
180					foreach my $bin ( keys %de_aggregate ) {
181					defined ( my $val = delete $mods{$bin} )
182					or next;
183					$mods{$bin} = $val;
184					}
185					return wantarray ? %mods : \%mods;
186					}
187
188					=head2 negates
189
190					$token->negates( 'i' ) and print "token negates i\n";
191					foreach ( $token->negates() ) { print "token negates $_\n" }
192
193					This method returns true if the token explicitly negates the given
194					modifier. The example would return true for the modifier in
195					C<(?-i:foo)>, but false for C<(?i:foo)>.
196
197					If called without an argument, or with an undef argument, all modifiers
198					explicitly negated by this token are returned.
199
200					=cut
201
202					sub negates {
203					my ( $self, $modifier ) = @_;
204					$self->{modifiers} \|\|= $self->_decode();
205					# Note that since the values of hash entries that represent
206					# aggregated modifiers will never be false (at least, not unless '0'
207					# becomes a modifier) we need no special logic to handle them.
208					defined $modifier
209					or return ( sort grep { ! $self->{modifiers}{$_} }
210					keys %{ $self->{modifiers} } );
211					return exists $self->{modifiers}{$modifier}
212					&& ! $self->{modifiers}{$modifier};
213					}
214
215					sub perl_version_introduced {
216					my ( $self ) = @_;
217					return ( $self->{perl_version_introduced} \|\|=
218					$self->_perl_version_introduced() );
219					}
220
221					sub _perl_version_introduced {
222					my ( $self ) = @_;
223					my $content = $self->content();
224					my $is_statement_modifier = ( $content !~ m/ \A [(]? [?] /smx );
225					my $match_semantics = $self->match_semantics();
226
227					# Match semantics modifiers became available as regular expression
228					# modifiers in 5.13.10.
229					defined $match_semantics
230					and $is_statement_modifier
231					and return '5.013010';
232
233					# /aa was introduced in 5.13.10.
234					defined $match_semantics
235					and 'aa' eq $match_semantics
236					and return '5.013010';
237
238					# /a was introduced in 5.13.9, but only in (?...), not as modifier
239					# of the entire regular expression.
240					defined $match_semantics
241					and not $is_statement_modifier
242					and 'a' eq $match_semantics
243					and return '5.013009';
244
245					# /d, /l, and /u were introduced in 5.13.6, but only in (?...), not
246					# as modifiers of the entire regular expression.
247					defined $match_semantics
248					and not $is_statement_modifier
249					and return '5.013006';
250
251					# The '^' reassert-defaults modifier in embedded modifiers was
252					# introduced in 5.13.6.
253					not $is_statement_modifier
254					and $content =~ m/ \^ /smx
255					and return '5.013006';
256
257					$self->asserts( 'r' ) and return '5.013002';
258					$self->asserts( 'p' ) and return '5.009005';
259					$self->content() =~ m/ \A [(]? [?] .* - /smx
260					and return '5.005';
261					$self->asserts( 'c' ) and return '5.004';
262					return MINIMUM_PERL;
263					}
264
265					# Return true if the token can be quantified, and false otherwise
266					# sub can_be_quantified { return };
267
268
269					# $present => __aggregate_modifiers( 'modifiers', ... );
270					#
271					# This subroutine is private to the PPIx::Regexp package. It may change
272					# or be retracted without notice. Its purpose is to support defaulted
273					# modifiers.
274					#
275					# Aggregate the given modifiers left-to-right, returning a hash of those
276					# present and their values.
277
278					sub __aggregate_modifiers {
279					my ( @mods ) = @_;
280					my %present;
281					foreach my $content ( @mods ) {
282					$content =~ s{ [?/]+ }{}smxg;
283					if ( $content =~ m/ \A \^ /smx ) {
284					@present{ MODIFIER_GROUP_MATCH_SEMANTICS(), qw{ i s m x } }
285					= qw{ d 0 0 0 0 };
286					}
287
288					# Have to do the global match rather than a split, because the
289					# expression modifiers come through here too, and we need to
290					# distinguish between s/.../.../e and s/.../.../ee. But the
291					# modifiers can be randomized (that is, /eie is the same as
292					# /eei), so we reorder the content first.
293					$content = join '', sort split qr{}smx, $content;
294					my $value = 1;
295					while ( $content =~ m/ ( ( [[:alpha:]-] ) \2* ) /smxg ) {
296					if ( '-' eq $1 ) {
297					$value = 0;
298					} elsif ( my $bin = $aggregate{$1} ) {
299					# Yes, technically the match semantics stuff can't be
300					# negated in a regex. But it can in a 'use re', which
301					# also comes through here, so we have to handle it.
302					$present{$bin} = $value ? $1 : undef;
303					} else {
304					$present{$1} = $value;
305					}
306					}
307					}
308					return \%present;
309					}
310
311					# This must be implemented by tokens which do not recognize themselves.
312					# The return is a list of list references. Each list reference must
313					# contain a regular expression that recognizes the token, and optionally
314					# a reference to a hash to pass to make_token as the class-specific
315					# arguments. The regular expression MUST be anchored to the beginning of
316					# the string.
317					# spent 13µs (10+2) within PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize which was called: # once (10µs+2µs) by base::import at line 102 of PPIx/Regexp/Token/Structure.pm sub __PPIX_TOKEN__recognize {
318					return (
319	1	15µs	2	2µs	[ qr{ \A [(] [?] [[:lower:]]* -? [[:lower:]]* [)] }smx ], # spent 2µs making 2 calls to PPIx::Regexp::Token::Modifier::CORE:qr, avg 1µs/call
320					[ qr{ \A [(] [?] \^ [[:lower:]]* [)] }smx ],
321					);
322					}
323
324					# After the token is made, figure out what it asserts or negates.
325
326					sub __PPIX_TOKEN__post_make {
327					my ( $self, $tokenizer ) = @_;
328					defined $tokenizer
329					and $tokenizer->modifier_modify( $self->modifiers() );
330					return;
331					}
332
333					{
334
335					# Called by the tokenizer to modify the current modifiers with a new
336					# set. Both are passed as hash references, and a reference to the
337					# new hash is returned.
338	1	500ns			sub __PPIX_TOKENIZER__modifier_modify {
339					my ( @args ) = @_;
340
341					my %merged;
342					foreach my $hash ( @args ) {
343					while ( my ( $key, $val ) = each %{ $hash } ) {
344					if ( $val ) {
345					$merged{$key} = $val;
346					} else {
347					delete $merged{$key};
348					}
349					}
350					}
351
352					return \%merged;
353
354					}
355
356					# Decode modifiers from the content of the token.
357					sub _decode {
358					my ( $self ) = @_;
359					return __aggregate_modifiers( $self->content() );
360					}
361					}
362
363	1	8µs			1;
364
365					__END__

					# spent 2µs within PPIx::Regexp::Token::Modifier::CORE:qr which was called 2 times, avg 1µs/call: # 2 times (2µs+0s) by PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize at line 319, avg 1µs/call sub PPIx::Regexp::Token::Modifier::CORE:qr; # opcode