Profile of PPIx/Regexp/Token/Structure.pm

Filename	/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Structure.pm
Statements	Executed 31 statements in 1.57ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	1.60ms	1.80ms	PPIx::Regexp::Token::Structure::BEGIN@51
1	1	1	403µs	655µs	PPIx::Regexp::Token::Structure::BEGIN@54
1	1	1	19µs	39µs	PPIx::Regexp::Token::Structure::BEGIN@35
1	1	1	13µs	20µs	PPIx::Regexp::Token::Structure::BEGIN@36
1	1	1	11µs	96µs	PPIx::Regexp::Token::Structure::BEGIN@38
1	1	1	11µs	66µs	PPIx::Regexp::Token::Structure::BEGIN@40
1	1	1	7µs	7µs	PPIx::Regexp::Token::Structure::BEGIN@52
1	1	1	5µs	5µs	PPIx::Regexp::Token::Structure::BEGIN@50
1	1	1	5µs	5µs	PPIx::Regexp::Token::Structure::BEGIN@53
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::__ANON__[:152]
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::__ANON__[:220]
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::__ANON__[:245]
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::__PPIX_LEXER__finalize
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::__PPIX_TOKENIZER__regexp
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::can_be_quantified
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::is_quantifier
0	0	0	0s	0s	PPIx::Regexp::Token::Structure::perl_version_introduced

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					=head1 NAME
2
3					PPIx::Regexp::Token::Structure - Represent structural elements.
4
5					=head1 SYNOPSIS
6
7					use PPIx::Regexp::Dumper;
8					PPIx::Regexp::Dumper->new( 'qr{(foo)}smx' )
9					->print();
10
11					=head1 INHERITANCE
12
13					C<PPIx::Regexp::Token::Structure> is a
14					L<PPIx::Regexp::Token\|PPIx::Regexp::Token>.
15
16					C<PPIx::Regexp::Token::Structure> is the parent of
17					L<PPIx::Regexp::Token::Delimiter\|PPIx::Regexp::Token::Delimiter>.
18
19					=head1 DESCRIPTION
20
21					This class represents things that define the structure of the regular
22					expression. This typically means brackets of various sorts, but to
23					prevent proliferation of token classes the type of the regular
24					expression is stored here.
25
26					=head1 METHODS
27
28					This class provides no public methods beyond those provided by its
29					superclass.
30
31					=cut
32
33					package PPIx::Regexp::Token::Structure;
34
35	2	33µs	2	58µs	# spent 39µs (19+19) within PPIx::Regexp::Token::Structure::BEGIN@35 which was called: # once (19µs+19µs) by base::import at line 35 use strict; # spent 39µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@35 # spent 19µs making 1 call to strict::import
36	2	33µs	2	27µs	# spent 20µs (13+7) within PPIx::Regexp::Token::Structure::BEGIN@36 which was called: # once (13µs+7µs) by base::import at line 36 use warnings; # spent 20µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@36 # spent 7µs making 1 call to warnings::import
37
38	2	43µs	2	96µs	# spent 96µs (11+85) within PPIx::Regexp::Token::Structure::BEGIN@38 which was called: # once (11µs+85µs) by base::import at line 38 use base qw{ PPIx::Regexp::Token }; # spent 96µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@38 # spent 85µs making 1 call to base::import, recursion: max depth 1, sum of overlapping time 85µs
39
40	1	400ns			# spent 66µs (11+55) within PPIx::Regexp::Token::Structure::BEGIN@40 which was called: # once (11µs+55µs) by base::import at line 46 use PPIx::Regexp::Constant qw{
41					COOKIE_CLASS
42					COOKIE_QUANT
43					COOKIE_REGEX_SET
44					MINIMUM_PERL
45					TOKEN_LITERAL
46	1	34µs	2	121µs	}; # spent 66µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@40 # spent 55µs making 1 call to Exporter::import
47
48					# Tokens we are responsible for making, under at least some
49					# circumstances.
50	2	26µs	1	5µs	# spent 5µs within PPIx::Regexp::Token::Structure::BEGIN@50 which was called: # once (5µs+0s) by base::import at line 50 use PPIx::Regexp::Token::Comment (); # spent 5µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@50
51	2	161µs	1	1.80ms	# spent 1.80ms (1.60+202µs) within PPIx::Regexp::Token::Structure::BEGIN@51 which was called: # once (1.60ms+202µs) by base::import at line 51 use PPIx::Regexp::Token::Modifier (); # spent 1.80ms making 1 call to PPIx::Regexp::Token::Structure::BEGIN@51
52	2	28µs	1	7µs	# spent 7µs within PPIx::Regexp::Token::Structure::BEGIN@52 which was called: # once (7µs+0s) by base::import at line 52 use PPIx::Regexp::Token::Backreference (); # spent 7µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@52
53	2	26µs	1	5µs	# spent 5µs within PPIx::Regexp::Token::Structure::BEGIN@53 which was called: # once (5µs+0s) by base::import at line 53 use PPIx::Regexp::Token::Backtrack (); # spent 5µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@53
54	2	1.14ms	1	655µs	# spent 655µs (403+252) within PPIx::Regexp::Token::Structure::BEGIN@54 which was called: # once (403µs+252µs) by base::import at line 54 use PPIx::Regexp::Token::Recursion (); # spent 655µs making 1 call to PPIx::Regexp::Token::Structure::BEGIN@54
55
56	1	800ns			our $VERSION = '0.036';
57
58					# Return true if the token can be quantified, and false otherwise
59
60	1	4µs			my %quant = map { $_ => 1 } ')', ']';
61					sub can_be_quantified {
62					my ( $self ) = @_;
63					ref $self or return;
64					return $quant{ $self->content() };
65					};
66
67					sub is_quantifier {
68					my ( $self ) = @_;
69					ref $self or return;
70					return $self->{is_quantifier};
71					}
72
73					{
74
75					# Note that the implementation equivocates on the ::Token::Structure
76					# class, using it both for the initial token that determines the
77					# type of the regex and things like parentheses internal to the
78					# regex. Rather than sort out this equivocation, I have relied on
79					# the currently-true assumption that 'qr' will not satisfy the
80					# ::Token::Structure recognition logic, and the only way this class
81					# can acquire this content is by the brute-force approach used to
82					# generate the initial token object.
83
84	2	3µs			my %perl_version_introduced = (
85					qr => '5.005',
86					'(?[' => '5.017008',
87					);
88
89					sub perl_version_introduced {
90					my ( $self ) = @_;
91					return $perl_version_introduced{ $self->content() } \|\| MINIMUM_PERL;
92					}
93					}
94
95					{
96
97	2	5µs			my %delim = map { $_ => 1 } qw/ ( ) { } [ ] /;
98
99					# Regular expressions to match various parenthesized tokens, and the
100					# classes to make them into.
101
102	5	15µs	5	106µs	my @paren_token = map { # spent 53µs making 1 call to PPIx::Regexp::Token::Recursion::__PPIX_TOKEN__recognize # spent 14µs making 1 call to PPIx::Regexp::Token::Backtrack::__PPIX_TOKEN__recognize # spent 13µs making 1 call to PPIx::Regexp::Token::Comment::__PPIX_TOKEN__recognize # spent 13µs making 1 call to PPIx::Regexp::Token::Backreference::__PPIX_TOKEN__recognize # spent 13µs making 1 call to PPIx::Regexp::Token::Modifier::__PPIX_TOKEN__recognize
103	1	5µs			[ $_ => $_->__PPIX_TOKEN__recognize() ]
104					}
105					'PPIx::Regexp::Token::Comment',
106					'PPIx::Regexp::Token::Modifier',
107					'PPIx::Regexp::Token::Backreference',
108					'PPIx::Regexp::Token::Backtrack',
109					'PPIx::Regexp::Token::Recursion',
110					;
111
112					sub __PPIX_TOKENIZER__regexp {
113					my ( $class, $tokenizer, $character ) = @_;
114
115					# We are not interested in anything but delimiters.
116					$delim{$character} or return;
117
118					# Inside a character class, all the delimiters are normal characters
119					# except for the close square bracket.
120					if ( $tokenizer->cookie( COOKIE_CLASS ) ) {
121					$character eq ']'
122					or return $tokenizer->make_token( 1, TOKEN_LITERAL );
123					}
124
125					# Open parentheses have various interesting possibilities ...
126					if ( $character eq '(' ) {
127
128					# Sometimes the whole bunch of parenthesized characters seems
129					# naturally to be a token.
130					foreach ( @paren_token ) {
131					my ( $class, @recognize ) = @{ $_ };
132					foreach ( @recognize ) {
133					my ( $regexp, $arg ) = @{ $_ };
134					my $accept = $tokenizer->find_regexp( $regexp ) or next;
135					return $tokenizer->make_token( $accept, $class, $arg );
136					}
137					}
138
139					# Modifier changes are local to this parenthesis group
140					$tokenizer->modifier_duplicate();
141
142					# The regex-set functionality introduced with 5.17.8 is most
143					# conveniently handled by treating the initial '(?[' and
144					# final '])' as ::Structure tokens. Fortunately for us,
145					# perl5178delta documents that these may not have interior
146					# spaces.
147
148					if ( my $accept = $tokenizer->find_regexp(
149					qr{ \A [(] [?] [[] }smx # ] ) - help for vim
150					)
151					) {
152					$tokenizer->cookie( COOKIE_REGEX_SET, sub { return 1 } );
153					$tokenizer->modifier_modify( x => 1 ); # Implicitly /x
154					return $accept;
155					}
156
157					# We expect certain tokens only after a left paren.
158					$tokenizer->expect(
159					'PPIx::Regexp::Token::GroupType::Modifier',
160					'PPIx::Regexp::Token::GroupType::NamedCapture',
161					'PPIx::Regexp::Token::GroupType::Assertion',
162					'PPIx::Regexp::Token::GroupType::Code',
163					'PPIx::Regexp::Token::GroupType::BranchReset',
164					'PPIx::Regexp::Token::GroupType::Subexpression',
165					'PPIx::Regexp::Token::GroupType::Switch',
166					);
167
168					# Accept the parenthesis.
169					return 1;
170					}
171
172					# Close parentheses end modifier localization
173					if ( $character eq ')' ) {
174					$tokenizer->modifier_pop();
175					return 1;
176					}
177
178					# Open curlys are complicated because they may or may not represent
179					# the beginning of a quantifier, depending on what comes before the
180					# close curly. So we set a cookie to monitor the token stream for
181					# interlopers. If all goes well, the right curly will find the
182					# cookie and know it is supposed to be a quantifier.
183					if ( $character eq '{' ) {
184
185					# If the prior token can not be quantified, all this is
186					# unnecessary.
187					$tokenizer->prior( 'can_be_quantified' )
188					or return 1;
189
190					# We make our token now, before setting the cookie. Otherwise
191					# the cookie has to deal with this token.
192					my $token = $tokenizer->make_token( 1 );
193
194					# A cookie for the next '}'.
195					my $commas = 0;
196					$tokenizer->cookie( COOKIE_QUANT, sub {
197					my ( $tokenizer, $token ) = @_;
198					$token or return 1;
199
200					# Of literals, we accept exactly one comma provided it
201					# is not immediately after a '{'. We also accept
202					# anything that matches '\d';
203					if ( $token->isa( TOKEN_LITERAL ) ) {
204					my $character = $token->content();
205					if ( $character eq ',' ) {
206					$commas++ and return;
207					return $tokenizer->prior( 'content' ) ne '{';
208					}
209					return $character =~ m/ \A \d \z /smx;
210					}
211
212					# Since we do not know what is in an interpolation, we
213					# trustingly accept it.
214					if ( $token->isa( 'PPIx::Regexp::Token::Interpolation' )
215					) {
216					return 1;
217					}
218
219					return;
220					},
221					);
222
223					return $token;
224					}
225
226					# The close curly bracket is a little complicated because if the
227					# cookie posted by the left curly bracket is still around, we are a
228					# quantifier, otherwise not.
229					if ( $character eq '}' ) {
230					$tokenizer->cookie( COOKIE_QUANT, undef )
231					or return 1;
232					$tokenizer->prior( 'class' )->isa( __PACKAGE__ )
233					and return 1;
234					my $token = $tokenizer->make_token( 1 );
235					$token->{is_quantifier} = 1;
236					return $token;
237					}
238
239					# The parse rules are different inside a character class, so we set
240					# another cookie. Sigh. If your tool is a hammer ...
241					if ( $character eq '[' ) {
242
243					# Set our cookie. Since it always returns 1, it does not matter
244					# where in the following mess we set it.
245					$tokenizer->cookie( COOKIE_CLASS, sub { return 1 } );
246
247					# Make our token now, since the easiest place to deal with the
248					# beginning-of-character-class strangeness seems to be right
249					# here.
250					my @tokens = $tokenizer->make_token( 1 );
251
252					# Get the next character, returning tokens if there is none.
253					defined ( $character = $tokenizer->peek() )
254					or return @tokens;
255
256					# If we have a caret, it is a negation operator. Make its token
257					# and fetch the next character, returning if none.
258					if ( $character eq '^' ) {
259					push @tokens, $tokenizer->make_token(
260					1, 'PPIx::Regexp::Token::Operator' );
261					defined ( $character = $tokenizer->peek() )
262					or return @tokens;
263					}
264
265					# If we have a close square at this point, it is not the end of
266					# the class, but just a literal. Make its token.
267					$character eq ']'
268					and push @tokens, $tokenizer->make_token( 1, TOKEN_LITERAL );
269
270					# Return all tokens made.
271					return @tokens;
272					}
273					# per perlop, the metas inside a [] are -]\^$.
274					# per perlop, the metas outside a [] are {}[]()^$.\|*+?\
275					# The difference is that {}[().\|*+? are not metas in [], but - is.
276
277					# Close bracket is complicated by the addition of regex sets.
278					# And more complicated by the fact that you can have an
279					# old-style character class inside a regex set. Fortunately they
280					# have not (yet!) permitted nested regex sets.
281					if ( $character eq ']' ) {
282
283					# If we find '])' and COOKIE_REGEX_SET is present, we have a
284					# regex set. We need to delete the cookie and accept both
285					# characters.
286					if ( ( my $accept = $tokenizer->find_regexp(
287					# help vim - ( [
288					qr{ \A []] [)] }smx
289					) )
290					&& $tokenizer->cookie( COOKIE_REGEX_SET )
291
292					) {
293					$tokenizer->cookie( COOKIE_REGEX_SET, undef );
294					return $accept;
295					}
296
297					# Otherwise we assume we're in a bracketed character class,
298					# delete the cookie, and accept the close bracket.
299					$tokenizer->cookie( COOKIE_CLASS, undef );
300					return 1;
301					}
302
303					return 1;
304					}
305
306					}
307
308					# Called by the lexer once it has done its worst to all the tokens.
309					# Called as a method with no arguments. The return is the number of
310					# parse failures discovered when finalizing.
311					sub __PPIX_LEXER__finalize {
312					my ( $self ) = @_;
313					delete $self->{is_quantifier};
314					return 0;
315					}
316
317	1	10µs			1;
318
319					__END__