← Index
NYTProf Performance Profile   « line view »
For /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/bin/perlcritic
  Run on Sat Mar 19 22:12:22 2016
Reported on Sat Mar 19 22:14:14 2016

Filename/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/Literal.pm
StatementsExecuted 22 statements in 1.62ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
11114µs29µsPPIx::Regexp::Token::Literal::::BEGIN@32PPIx::Regexp::Token::Literal::BEGIN@32
1119µs43µsPPIx::Regexp::Token::Literal::::BEGIN@37PPIx::Regexp::Token::Literal::BEGIN@37
1118µs14µsPPIx::Regexp::Token::Literal::::BEGIN@33PPIx::Regexp::Token::Literal::BEGIN@33
1118µs72µsPPIx::Regexp::Token::Literal::::BEGIN@35PPIx::Regexp::Token::Literal::BEGIN@35
2224µs4µsPPIx::Regexp::Token::Literal::::CORE:qrPPIx::Regexp::Token::Literal::CORE:qr (opcode)
111900ns900nsPPIx::Regexp::Token::Literal::::CORE:regcompPPIx::Regexp::Token::Literal::CORE:regcomp (opcode)
0000s0sPPIx::Regexp::Token::Literal::::__PPIX_TOKENIZER__regexpPPIx::Regexp::Token::Literal::__PPIX_TOKENIZER__regexp
0000s0sPPIx::Regexp::Token::Literal::::_escapedPPIx::Regexp::Token::Literal::_escaped
0000s0sPPIx::Regexp::Token::Literal::::_have_charnames_vianamePPIx::Regexp::Token::Literal::_have_charnames_vianame
0000s0sPPIx::Regexp::Token::Literal::::_ordinalPPIx::Regexp::Token::Literal::_ordinal
0000s0sPPIx::Regexp::Token::Literal::::ordinalPPIx::Regexp::Token::Literal::ordinal
0000s0sPPIx::Regexp::Token::Literal::::perl_version_introducedPPIx::Regexp::Token::Literal::perl_version_introduced
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1=head1 NAME
2
3PPIx::Regexp::Token::Literal - Represent a literal character
4
5=head1 SYNOPSIS
6
7 use PPIx::Regexp::Dumper;
8 PPIx::Regexp::Dumper->new( 'qr{foo}smx' )
9 ->print();
10
11=head1 INHERITANCE
12
13C<PPIx::Regexp::Token::Literal> is a
14L<PPIx::Regexp::Token|PPIx::Regexp::Token>.
15
16C<PPIx::Regexp::Token::Literal> has no descendants.
17
18=head1 DESCRIPTION
19
20This class represents a literal character, no matter how specified.
21
22=head1 METHODS
23
24This class provides the following public methods. Methods not documented
25here are private, and unsupported in the sense that the author reserves
26the right to change or remove them without notice.
27
28=cut
29
30package PPIx::Regexp::Token::Literal;
31
32225µs243µs
# spent 29µs (14+14) within PPIx::Regexp::Token::Literal::BEGIN@32 which was called: # once (14µs+14µs) by PPIx::Regexp::Tokenizer::BEGIN@34 at line 32
use strict;
# spent 29µs making 1 call to PPIx::Regexp::Token::Literal::BEGIN@32 # spent 14µs making 1 call to strict::import
33223µs220µs
# spent 14µs (8+6) within PPIx::Regexp::Token::Literal::BEGIN@33 which was called: # once (8µs+6µs) by PPIx::Regexp::Tokenizer::BEGIN@34 at line 33
use warnings;
# spent 14µs making 1 call to PPIx::Regexp::Token::Literal::BEGIN@33 # spent 6µs making 1 call to warnings::import
34
35249µs2136µs
# spent 72µs (8+64) within PPIx::Regexp::Token::Literal::BEGIN@35 which was called: # once (8µs+64µs) by PPIx::Regexp::Tokenizer::BEGIN@34 at line 35
use base qw{ PPIx::Regexp::Token };
# spent 72µs making 1 call to PPIx::Regexp::Token::Literal::BEGIN@35 # spent 64µs making 1 call to base::import
36
371300ns
# spent 43µs (9+34) within PPIx::Regexp::Token::Literal::BEGIN@37 which was called: # once (9µs+34µs) by PPIx::Regexp::Tokenizer::BEGIN@34 at line 39
use PPIx::Regexp::Constant qw{
38 COOKIE_CLASS COOKIE_REGEX_SET MINIMUM_PERL TOKEN_UNKNOWN
3911.39ms278µs};
# spent 43µs making 1 call to PPIx::Regexp::Token::Literal::BEGIN@37 # spent 34µs making 1 call to Exporter::import
40
411700nsour $VERSION = '0.036';
42
43# Return true if the token can be quantified, and false otherwise
44# sub can_be_quantified { return };
45
46sub perl_version_introduced {
47 my ( $self ) = @_;
48 exists $self->{perl_version_introduced}
49 and return $self->{perl_version_introduced};
50 ( my $content = $self->content() ) =~ m/ \A \\ o /smx
51 and return ( $self->{perl_version_introduced} = '5.013003' );
52 $content =~ m/ \A \\ N [{] U [+] /smx
53 and return ( $self->{perl_version_introduced} = '5.008' );
54 $content =~ m/ \A \\ x [{] /smx # }
55 and return ( $self->{perl_version_introduced} = '5.006' );
56 $content =~ m/ \A \\ N /smx
57 and return ( $self->{perl_version_introduced} = '5.006001' );
58 return ( $self->{perl_version_introduced} = MINIMUM_PERL );
59}
60
61# Some characters may or may not be literals depending on whether we are
62# inside a character class. The following hash identifies those
63# characters and says what we should return when outside (index 0) or
64# inside (index 1) a character class, as judged by the presence of the
65# relevant cookie.
6615µsmy %double_agent = (
67 '.' => [ undef, 1 ],
68 '*' => [ undef, 1 ],
69 '?' => [ undef, 1 ],
70 '+' => [ undef, 1 ],
71 '-' => [ 1, undef ],
72 '|' => [ undef, 1 ],
73);
74
75# These are the characters that other external tokenizers need to see,
76# or at least that we need to take a closer look at. All others can be
77# unconditionally made into single-character literals.
78131µs23µsmy %extra_ordinary = map { $_ => 1 }
# spent 2µs making 1 call to PPIx::Regexp::Token::Literal::CORE:qr # spent 900ns making 1 call to PPIx::Regexp::Token::Literal::CORE:regcomp
79 split qr{}smx, '$@*+?.\\(){}[]^|-#';
80# $ -> Token::Interpolation, Token::Assertion
81# @ -> Token::Interpolation
82# * -> Token::Quantifier
83# + ? -> Token::Quantifier, Token::Greediness
84# . -> Token::CharClass::Simple
85# \ -> Token::Control, Token::CharClass::Simple, Token::Assertion,
86# Token::Backreference
87# ( ) { } [ ] -> Token::Structure
88# ^ -> Token::Assertion
89# | - -> Token::Operator
90
9114µsmy %regex_set_operator = map { $_ => 1 } qw{ & + | - ^ ! };
92
93# The regex for the extended white space available under regex sets in
94# Perl 5.17.8 and in general in perl 5.17.9. I have been unable to get
95# this to work under Perl 5.6.2, so for that we fall back to ASCII white
96# space. The stringy eval is because I have been unable to get
97# satisfaction out of either interpolated characters (in general) or
98# eval-ed "\N{U+...}" (under 5.6.2) or \x{...} (ditto).
99#
100# See PPIx::Regexp::Structure::RegexSet for the documentation of this
101# mess.
102# my $white_space_re = $] >= 5.008 ?
103# 'qr< \\A [\\s\\N{U+0085}\\N{U+200E}\\N{U+200F}\\N{U+2028}\\N{U+2029}]+ >smx' :
104# 'qr< \\A \\s+ >smx';
105#
106# RT #91798
107# The above turns out to be wrong, because \s matches too many
108# characters. We need the following to get the right match. Note that
109# \cK was added experimentally in 5.17.0 and made it into 5.18. The \N{}
110# characters were NOT added (as I originally thought) but were simply
111# made characters that generated warnings when escaped, in preparation
112# for adding them. When they actually get added, I will have to add back
113# the trinary operator. Sigh.
1141400nsmy $white_space_re = 'qr< \A [\t\n\cK\f\r ] >smx';
115124µs$white_space_re = eval $white_space_re; ## no critic (ProhibitStringyEval)
# spent 8µs executing statements in string eval
116
11714µsmy %regex_pass_on = map { $_ => 1 } qw{ [ ] ( ) $ \ };
118
119sub __PPIX_TOKENIZER__regexp {
120 my ( $class, $tokenizer, $character, $char_type ) = @_;
121
122 if ( $tokenizer->cookie( COOKIE_REGEX_SET ) ) {
123 # If we're inside a regex set no literals are allowed, but not
124 # all characters that get here are seen as literals.
125
126 $regex_set_operator{$character}
127 and return $tokenizer->make_token(
128 length $character, 'PPIx::Regexp::Token::Operator' );
129
130 my $accept;
131
132 $accept = $tokenizer->find_regexp( $white_space_re )
133 and return $tokenizer->make_token(
134 $accept, 'PPIx::Regexp::Token::Whitespace' );
135
136 $accept = _escaped( $tokenizer, $character )
137 and return $accept;
138
139 $regex_pass_on{$character}
140 and return;
141
142 # At this point we have a single character which is poised to be
143 # interpreted as a literal. These are not legal in a regex set
144 # except when also in a bracketed class.
145 return $tokenizer->cookie( COOKIE_CLASS ) ?
146 length $character :
147 $tokenizer->make_token(
148 length $character, TOKEN_UNKNOWN, {
149 error => 'Literal not valid in Regex set',
150 },
151 );
152
153 } else {
154
155 # Otherwise handle the characters that may or may not be
156 # literals depending on whether or not we are in a character
157 # class.
158 if ( my $class = $double_agent{$character} ) {
159 my $inx = $tokenizer->cookie( COOKIE_CLASS ) ? 1 : 0;
160 return $class->[$inx];
161 }
162 }
163
164 # If /x is in effect _and_ we are not inside a character class, \s
165 # is whitespace, and '#' introduces a comment. Otherwise they are
166 # both literals.
167 if ( $tokenizer->modifier( 'x' ) &&
168 ! $tokenizer->cookie( COOKIE_CLASS ) ) {
169 my $accept;
170 $accept = $tokenizer->find_regexp( $white_space_re )
171 and return $tokenizer->make_token(
172 $accept, 'PPIx::Regexp::Token::Whitespace' );
173 $accept = $tokenizer->find_regexp(
174 qr{ \A \# [^\n]* (?: \n | \z) }smx )
175 and return $tokenizer->make_token(
176 $accept, 'PPIx::Regexp::Token::Comment' );
177 } else {
178 ( $character eq '#' || $character =~ m/ \A \s \z /smx )
179 and return 1;
180 }
181
182 my $accept;
183 $accept = _escaped( $tokenizer, $character )
184 and return $accept;
185
186 # All other characters which are not extra ordinary get accepted.
187 $extra_ordinary{$character} or return 1;
188
189 return;
190}
191
192
193=begin comment
194
195The following is from perlop:
196
197 The character following "\c" is mapped to some other character by
198 converting letters to upper case and then (on ASCII systems) by
199 inverting the 7th bit (0x40). The most interesting range is from '@' to
200 '_' (0x40 through 0x5F), resulting in a control character from 0x00
201 through 0x1F. A '?' maps to the DEL character. On EBCDIC systems only
202 '@', the letters, '[', '\', ']', '^', '_' and '?' will work, resulting
203 in 0x00 through 0x1F and 0x7F.
204
205=end comment
206
207=cut
208
209# Recognize all the escaped constructions that generate literal
210# characters in one gigantic regexp. Technically \1.. through \7.. are
211# octal literals too, but we can not disambiguate these from back
212# references until we know how many there are. So the lexer gets another
213# dirty job.
214sub _escaped {
215 my ( $tokenizer, $character ) = @_;
216
217 $character eq '\\'
218 or return;
219
220 if ( my $accept = $tokenizer->find_regexp(
221 qr< \A \\ (?:
222 [^\w\s] | # delimiters/metas
223 [tnrfae] | # C-style escapes
224 0 [01234567]{0,2} | # octal
225# [01234567]{1,3} | # made from backref by lexer
226 c [][[:alpha:]\@\\^_?] | # control characters
227 x (?: \{ [[:xdigit:]]* \} | [[:xdigit:]]{0,2} ) | # hex
228 o [{] [01234567]+ [}] | # octal as of 5.13.3
229## N (?: \{ (?: [[:alpha:]] [\w\s:()-]* | # must begin w/ alpha
230## U [+] [[:xdigit:]]+ ) \} ) | # unicode
231 N (?: [{] (?= \D ) [^\}]+ [}] ) # unicode
232 ) >smx ) ) {
233 return $accept;
234 }
235 return;
236}
237
238=head2 ordinal
239
240 print 'The ordinal of ', $token->content(),
241 ' is ', $token->ordinal(), "\n";
242
243This method returns the ordinal of the literal if it can figure it out.
244It is analogous to the C<ord> built-in.
245
246It will not attempt to determine the ordinal of a unicode name
247(C<\N{...}>) unless L<charnames|charnames> has been loaded, and supports
248the L<vianame()|charnames/vianame> function. Instead, it will return
249C<undef>. Users of Perl 5.6.2 and older may be out of luck here.
250
251Unicode code points (e.g. C<\N{U+abcd}>) should work independently of
252L<charnames|charnames>, and just return the value of C<abcd>.
253
254It will never attempt to return the ordinal of an octet (C<\C{...}>)
255because I don't understand the syntax.
256
257=cut
258
259{
260
261232µs my %escapes = (
262 '\\t' => ord "\t",
263 '\\n' => ord "\n",
264 '\\r' => ord "\r",
265 '\\f' => ord "\f",
266 '\\a' => ord "\a",
267 '\\b' => ord "\b",
268 '\\e' => ord "\e",
269 '\\c?' => ord "\c?",
270 '\\c@' => ord "\c@",
271 '\\cA' => ord "\cA",
272 '\\ca' => ord "\cA",
273 '\\cB' => ord "\cB",
274 '\\cb' => ord "\cB",
275 '\\cC' => ord "\cC",
276 '\\cc' => ord "\cC",
277 '\\cD' => ord "\cD",
278 '\\cd' => ord "\cD",
279 '\\cE' => ord "\cE",
280 '\\ce' => ord "\cE",
281 '\\cF' => ord "\cF",
282 '\\cf' => ord "\cF",
283 '\\cG' => ord "\cG",
284 '\\cg' => ord "\cG",
285 '\\cH' => ord "\cH",
286 '\\ch' => ord "\cH",
287 '\\cI' => ord "\cI",
288 '\\ci' => ord "\cI",
289 '\\cJ' => ord "\cJ",
290 '\\cj' => ord "\cJ",
291 '\\cK' => ord "\cK",
292 '\\ck' => ord "\cK",
293 '\\cL' => ord "\cL",
294 '\\cl' => ord "\cL",
295 '\\cM' => ord "\cM",
296 '\\cm' => ord "\cM",
297 '\\cN' => ord "\cN",
298 '\\cn' => ord "\cN",
299 '\\cO' => ord "\cO",
300 '\\co' => ord "\cO",
301 '\\cP' => ord "\cP",
302 '\\cp' => ord "\cP",
303 '\\cQ' => ord "\cQ",
304 '\\cq' => ord "\cQ",
305 '\\cR' => ord "\cR",
306 '\\cr' => ord "\cR",
307 '\\cS' => ord "\cS",
308 '\\cs' => ord "\cS",
309 '\\cT' => ord "\cT",
310 '\\ct' => ord "\cT",
311 '\\cU' => ord "\cU",
312 '\\cu' => ord "\cU",
313 '\\cV' => ord "\cV",
314 '\\cv' => ord "\cV",
315 '\\cW' => ord "\cW",
316 '\\cw' => ord "\cW",
317 '\\cX' => ord "\cX",
318 '\\cx' => ord "\cX",
319 '\\cY' => ord "\cY",
320 '\\cy' => ord "\cY",
321 '\\cZ' => ord "\cZ",
322 '\\cz' => ord "\cZ",
323 '\\c[' => ord "\c[",
324 '\\c\\\\' => ord "\c\\", # " # Get Vim's head straight.
325 '\\c]' => ord "\c]",
326 '\\c^' => ord "\c^",
327 '\\c_' => ord "\c_",
328 );
329
330 sub ordinal {
331 my ( $self ) = @_;
332 exists $self->{ordinal} and return $self->{ordinal};
333 return ( $self->{ordinal} = $self->_ordinal() );
334 }
335
33618µs my %octal = map {; "$_" => 1 } ( 0 .. 7 );
337
338 sub _ordinal {
339 my ( $self ) = @_;
340 my $content = $self->content();
341
342 $content =~ m/ \A \\ /smx or return ord $content;
343
344 exists $escapes{$content} and return $escapes{$content};
345
346 my $indicator = substr $content, 1, 1;
347
348 $octal{$indicator} and return oct substr $content, 1;
349
350 if ( $indicator eq 'x' ) {
351 $content =~ m/ \A \\ x \{ ( [[:xdigit:]]+ ) \} \z /smx
352 and return hex $1;
353 $content =~ m/ \A \\ x ( [[:xdigit:]]{0,2} ) \z /smx
354 and return hex $1;
355 return;
356 }
357
358 if ( $indicator eq 'o' ) {
359 $content =~ m/ \A \\ o [{] ( [01234567]+ ) [}] \z /smx
360 and return oct $1;
361 return; # Shouldn't happen, but ...
362 }
363
364 if ( $indicator eq 'N' ) {
365 $content =~ m/ \A \\ N \{ U [+] ( [[:xdigit:]]+ ) \} \z /smx
366 and return hex $1;
367 $content =~ m/ \A \\ N [{] ( .+ ) [}] \z /smx
368 and return (
369 _have_charnames_vianame() ?
370 charnames::vianame( $1 ) :
371 undef
372 );
373 return; # Shouldn't happen, but ...
374 }
375
376 return ord $indicator;
377 }
378
379}
380
381{
3822400ns my $have_charnames_vianame;
383
384 sub _have_charnames_vianame {
385 defined $have_charnames_vianame
386 and return $have_charnames_vianame;
387 return (
388 $have_charnames_vianame =
389 charnames->can( 'vianame' ) ? 1 : 0
390 );
391
392 }
393}
394
395
39611µs*__PPIX_TOKENIZER__repl = \&__PPIX_TOKENIZER__regexp;
397
398122µs1;
399
400__END__
 
# spent 4µs within PPIx::Regexp::Token::Literal::CORE:qr which was called 2 times, avg 2µs/call: # once (2µs+0s) by PPIx::Regexp::Tokenizer::BEGIN@34 at line 78 # once (2µs+0s) by PPIx::Regexp::Tokenizer::BEGIN@34 at line 1 of (eval 442)[PPIx/Regexp/Token/Literal.pm:115]
sub PPIx::Regexp::Token::Literal::CORE:qr; # opcode
# spent 900ns within PPIx::Regexp::Token::Literal::CORE:regcomp which was called: # once (900ns+0s) by PPIx::Regexp::Tokenizer::BEGIN@34 at line 78
sub PPIx::Regexp::Token::Literal::CORE:regcomp; # opcode