← Index
NYTProf Performance Profile   « line view »
For /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/bin/perlcritic
  Run on Sat Mar 19 22:12:22 2016
Reported on Sat Mar 19 22:14:13 2016

Filename/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Tokenizer.pm
StatementsExecuted 77 statements in 5.34ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1111.69ms1.85msPPIx::Regexp::Tokenizer::::BEGIN@34PPIx::Regexp::Tokenizer::BEGIN@34
1111.20ms1.39msPPIx::Regexp::Tokenizer::::BEGIN@33PPIx::Regexp::Tokenizer::BEGIN@33
111618µs747µsPPIx::Regexp::Tokenizer::::BEGIN@23PPIx::Regexp::Tokenizer::BEGIN@23
111600µs925µsPPIx::Regexp::Tokenizer::::BEGIN@30PPIx::Regexp::Tokenizer::BEGIN@30
111598µs728µsPPIx::Regexp::Tokenizer::::BEGIN@19PPIx::Regexp::Tokenizer::BEGIN@19
111595µs1.55msPPIx::Regexp::Tokenizer::::BEGIN@15PPIx::Regexp::Tokenizer::BEGIN@15
111471µs690µsPPIx::Regexp::Tokenizer::::BEGIN@29PPIx::Regexp::Tokenizer::BEGIN@29
111440µs589µsPPIx::Regexp::Tokenizer::::BEGIN@20PPIx::Regexp::Tokenizer::BEGIN@20
111377µs543µsPPIx::Regexp::Tokenizer::::BEGIN@28PPIx::Regexp::Tokenizer::BEGIN@28
111362µs519µsPPIx::Regexp::Tokenizer::::BEGIN@22PPIx::Regexp::Tokenizer::BEGIN@22
111358µs524µsPPIx::Regexp::Tokenizer::::BEGIN@25PPIx::Regexp::Tokenizer::BEGIN@25
111327µs504µsPPIx::Regexp::Tokenizer::::BEGIN@36PPIx::Regexp::Tokenizer::BEGIN@36
111317µs451µsPPIx::Regexp::Tokenizer::::BEGIN@14PPIx::Regexp::Tokenizer::BEGIN@14
111301µs722µsPPIx::Regexp::Tokenizer::::BEGIN@17PPIx::Regexp::Tokenizer::BEGIN@17
111285µs1.16msPPIx::Regexp::Tokenizer::::BEGIN@26PPIx::Regexp::Tokenizer::BEGIN@26
111282µs414µsPPIx::Regexp::Tokenizer::::BEGIN@32PPIx::Regexp::Tokenizer::BEGIN@32
111266µs375µsPPIx::Regexp::Tokenizer::::BEGIN@37PPIx::Regexp::Tokenizer::BEGIN@37
111248µs386µsPPIx::Regexp::Tokenizer::::BEGIN@40PPIx::Regexp::Tokenizer::BEGIN@40
111246µs376µsPPIx::Regexp::Tokenizer::::BEGIN@41PPIx::Regexp::Tokenizer::BEGIN@41
111237µs398µsPPIx::Regexp::Tokenizer::::BEGIN@18PPIx::Regexp::Tokenizer::BEGIN@18
111234µs334µsPPIx::Regexp::Tokenizer::::BEGIN@16PPIx::Regexp::Tokenizer::BEGIN@16
111217µs354µsPPIx::Regexp::Tokenizer::::BEGIN@31PPIx::Regexp::Tokenizer::BEGIN@31
111206µs4.54msPPIx::Regexp::Tokenizer::::BEGIN@24PPIx::Regexp::Tokenizer::BEGIN@24
111206µs338µsPPIx::Regexp::Tokenizer::::BEGIN@27PPIx::Regexp::Tokenizer::BEGIN@27
111201µs297µsPPIx::Regexp::Tokenizer::::BEGIN@21PPIx::Regexp::Tokenizer::BEGIN@21
11112µs23µsPPIx::Regexp::Tokenizer::::BEGIN@3PPIx::Regexp::Tokenizer::BEGIN@3
1118µs28µsPPIx::Regexp::Tokenizer::::BEGIN@42PPIx::Regexp::Tokenizer::BEGIN@42
1118µs32µsPPIx::Regexp::Tokenizer::::BEGIN@9PPIx::Regexp::Tokenizer::BEGIN@9
1118µs30µsPPIx::Regexp::Tokenizer::::BEGIN@8PPIx::Regexp::Tokenizer::BEGIN@8
1117µs27µsPPIx::Regexp::Tokenizer::::BEGIN@43PPIx::Regexp::Tokenizer::BEGIN@43
1117µs12µsPPIx::Regexp::Tokenizer::::BEGIN@4PPIx::Regexp::Tokenizer::BEGIN@4
1117µs53µsPPIx::Regexp::Tokenizer::::BEGIN@6PPIx::Regexp::Tokenizer::BEGIN@6
1115µs5µsPPIx::Regexp::Tokenizer::::BEGIN@35PPIx::Regexp::Tokenizer::BEGIN@35
1114µs4µsPPIx::Regexp::Tokenizer::::BEGIN@38PPIx::Regexp::Tokenizer::BEGIN@38
1114µs4µsPPIx::Regexp::Tokenizer::::BEGIN@39PPIx::Regexp::Tokenizer::BEGIN@39
0000s0sPPIx::Regexp::Tokenizer::::__PPIX_TOKENIZER__finishPPIx::Regexp::Tokenizer::__PPIX_TOKENIZER__finish
0000s0sPPIx::Regexp::Tokenizer::::__PPIX_TOKENIZER__initPPIx::Regexp::Tokenizer::__PPIX_TOKENIZER__init
0000s0sPPIx::Regexp::Tokenizer::::__PPIX_TOKENIZER__regexpPPIx::Regexp::Tokenizer::__PPIX_TOKENIZER__regexp
0000s0sPPIx::Regexp::Tokenizer::::__PPIX_TOKEN_FALLBACK__regexpPPIx::Regexp::Tokenizer::__PPIX_TOKEN_FALLBACK__regexp
0000s0sPPIx::Regexp::Tokenizer::::__PPIX_TOKEN_FALLBACK__replPPIx::Regexp::Tokenizer::__PPIX_TOKEN_FALLBACK__repl
0000s0sPPIx::Regexp::Tokenizer::::__effective_modifiersPPIx::Regexp::Tokenizer::__effective_modifiers
0000s0sPPIx::Regexp::Tokenizer::::_known_tokenizer_checkPPIx::Regexp::Tokenizer::_known_tokenizer_check
0000s0sPPIx::Regexp::Tokenizer::::_known_tokenizersPPIx::Regexp::Tokenizer::_known_tokenizers
0000s0sPPIx::Regexp::Tokenizer::::_remainderPPIx::Regexp::Tokenizer::_remainder
0000s0sPPIx::Regexp::Tokenizer::::capturePPIx::Regexp::Tokenizer::capture
0000s0sPPIx::Regexp::Tokenizer::::contentPPIx::Regexp::Tokenizer::content
0000s0sPPIx::Regexp::Tokenizer::::cookiePPIx::Regexp::Tokenizer::cookie
0000s0sPPIx::Regexp::Tokenizer::::default_modifiersPPIx::Regexp::Tokenizer::default_modifiers
0000s0sPPIx::Regexp::Tokenizer::::encodingPPIx::Regexp::Tokenizer::encoding
0000s0sPPIx::Regexp::Tokenizer::::errstrPPIx::Regexp::Tokenizer::errstr
0000s0sPPIx::Regexp::Tokenizer::::expectPPIx::Regexp::Tokenizer::expect
0000s0sPPIx::Regexp::Tokenizer::::failuresPPIx::Regexp::Tokenizer::failures
0000s0sPPIx::Regexp::Tokenizer::::find_matching_delimiterPPIx::Regexp::Tokenizer::find_matching_delimiter
0000s0sPPIx::Regexp::Tokenizer::::find_regexpPPIx::Regexp::Tokenizer::find_regexp
0000s0sPPIx::Regexp::Tokenizer::::get_start_delimiterPPIx::Regexp::Tokenizer::get_start_delimiter
0000s0sPPIx::Regexp::Tokenizer::::get_tokenPPIx::Regexp::Tokenizer::get_token
0000s0sPPIx::Regexp::Tokenizer::::interpolatesPPIx::Regexp::Tokenizer::interpolates
0000s0sPPIx::Regexp::Tokenizer::::make_tokenPPIx::Regexp::Tokenizer::make_token
0000s0sPPIx::Regexp::Tokenizer::::matchPPIx::Regexp::Tokenizer::match
0000s0sPPIx::Regexp::Tokenizer::::modifierPPIx::Regexp::Tokenizer::modifier
0000s0sPPIx::Regexp::Tokenizer::::modifier_duplicatePPIx::Regexp::Tokenizer::modifier_duplicate
0000s0sPPIx::Regexp::Tokenizer::::modifier_modifyPPIx::Regexp::Tokenizer::modifier_modify
0000s0sPPIx::Regexp::Tokenizer::::modifier_popPPIx::Regexp::Tokenizer::modifier_pop
0000s0sPPIx::Regexp::Tokenizer::::newPPIx::Regexp::Tokenizer::new
0000s0sPPIx::Regexp::Tokenizer::::next_tokenPPIx::Regexp::Tokenizer::next_token
0000s0sPPIx::Regexp::Tokenizer::::peekPPIx::Regexp::Tokenizer::peek
0000s0sPPIx::Regexp::Tokenizer::::ppi_documentPPIx::Regexp::Tokenizer::ppi_document
0000s0sPPIx::Regexp::Tokenizer::::priorPPIx::Regexp::Tokenizer::prior
0000s0sPPIx::Regexp::Tokenizer::::significantPPIx::Regexp::Tokenizer::significant
0000s0sPPIx::Regexp::Tokenizer::::tokensPPIx::Regexp::Tokenizer::tokens
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package PPIx::Regexp::Tokenizer;
2
3219µs235µs
# spent 23µs (12+12) within PPIx::Regexp::Tokenizer::BEGIN@3 which was called: # once (12µs+12µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 3
use strict;
# spent 23µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@3 # spent 12µs making 1 call to strict::import
4220µs216µs
# spent 12µs (7+4) within PPIx::Regexp::Tokenizer::BEGIN@4 which was called: # once (7µs+4µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 4
use warnings;
# spent 12µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@4 # spent 4µs making 1 call to warnings::import
5
6226µs299µs
# spent 53µs (7+46) within PPIx::Regexp::Tokenizer::BEGIN@6 which was called: # once (7µs+46µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 6
use base qw{ PPIx::Regexp::Support };
# spent 53µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@6 # spent 46µs making 1 call to base::import
7
8227µs252µs
# spent 30µs (8+22) within PPIx::Regexp::Tokenizer::BEGIN@8 which was called: # once (8µs+22µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 8
use Carp qw{ confess };
# spent 30µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@8 # spent 22µs making 1 call to Exporter::import
91300ns
# spent 32µs (8+24) within PPIx::Regexp::Tokenizer::BEGIN@9 which was called: # once (8µs+24µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 13
use PPIx::Regexp::Constant qw{
10 MINIMUM_PERL
11 TOKEN_LITERAL
12 TOKEN_UNKNOWN
13119µs256µs};
# spent 32µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@9 # spent 24µs making 1 call to Exporter::import
14291µs1451µs
# spent 451µs (317+134) within PPIx::Regexp::Tokenizer::BEGIN@14 which was called: # once (317µs+134µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 14
use PPIx::Regexp::Token::Assertion ();
# spent 451µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@14
15290µs11.55ms
# spent 1.55ms (595µs+955µs) within PPIx::Regexp::Tokenizer::BEGIN@15 which was called: # once (595µs+955µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 15
use PPIx::Regexp::Token::Backreference ();
# spent 1.55ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@15
162135µs1334µs
# spent 334µs (234+100) within PPIx::Regexp::Tokenizer::BEGIN@16 which was called: # once (234µs+100µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 16
use PPIx::Regexp::Token::Backtrack ();
# spent 334µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@16
172102µs1722µs
# spent 722µs (301+421) within PPIx::Regexp::Tokenizer::BEGIN@17 which was called: # once (301µs+421µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 17
use PPIx::Regexp::Token::CharClass::POSIX ();
# spent 722µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@17
18295µs1398µs
# spent 398µs (237+161) within PPIx::Regexp::Tokenizer::BEGIN@18 which was called: # once (237µs+161µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 18
use PPIx::Regexp::Token::CharClass::POSIX::Unknown ();
# spent 398µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@18
19299µs1728µs
# spent 728µs (598+131) within PPIx::Regexp::Tokenizer::BEGIN@19 which was called: # once (598µs+131µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 19
use PPIx::Regexp::Token::CharClass::Simple ();
# spent 728µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@19
20292µs1589µs
# spent 589µs (440+150) within PPIx::Regexp::Tokenizer::BEGIN@20 which was called: # once (440µs+150µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 20
use PPIx::Regexp::Token::Code ();
# spent 589µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@20
21289µs1297µs
# spent 297µs (201+97) within PPIx::Regexp::Tokenizer::BEGIN@21 which was called: # once (201µs+97µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 21
use PPIx::Regexp::Token::Comment ();
# spent 297µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@21
22285µs1519µs
# spent 519µs (362+157) within PPIx::Regexp::Tokenizer::BEGIN@22 which was called: # once (362µs+157µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 22
use PPIx::Regexp::Token::Condition ();
# spent 519µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@22
232113µs1747µs
# spent 747µs (618+130) within PPIx::Regexp::Tokenizer::BEGIN@23 which was called: # once (618µs+130µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 23
use PPIx::Regexp::Token::Control ();
# spent 747µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@23
242144µs14.54ms
# spent 4.54ms (206µs+4.34) within PPIx::Regexp::Tokenizer::BEGIN@24 which was called: # once (206µs+4.34ms) by PPIx::Regexp::Lexer::BEGIN@61 at line 24
use PPIx::Regexp::Token::Delimiter ();
# spent 4.54ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@24
252123µs1524µs
# spent 524µs (358+166) within PPIx::Regexp::Tokenizer::BEGIN@25 which was called: # once (358µs+166µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 25
use PPIx::Regexp::Token::Greediness ();
# spent 524µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@25
262128µs11.16ms
# spent 1.16ms (285µs+873µs) within PPIx::Regexp::Tokenizer::BEGIN@26 which was called: # once (285µs+873µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 26
use PPIx::Regexp::Token::GroupType::Assertion ();
# spent 1.16ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@26
272120µs1338µs
# spent 338µs (206+133) within PPIx::Regexp::Tokenizer::BEGIN@27 which was called: # once (206µs+133µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 27
use PPIx::Regexp::Token::GroupType::BranchReset ();
# spent 338µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@27
282124µs1543µs
# spent 543µs (377+166) within PPIx::Regexp::Tokenizer::BEGIN@28 which was called: # once (377µs+166µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 28
use PPIx::Regexp::Token::GroupType::Code ();
# spent 543µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@28
292128µs1690µs
# spent 690µs (471+220) within PPIx::Regexp::Tokenizer::BEGIN@29 which was called: # once (471µs+220µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 29
use PPIx::Regexp::Token::GroupType::Modifier ();
# spent 690µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@29
302123µs1925µs
# spent 925µs (600+325) within PPIx::Regexp::Tokenizer::BEGIN@30 which was called: # once (600µs+325µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 30
use PPIx::Regexp::Token::GroupType::NamedCapture ();
# spent 925µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@30
312123µs1354µs
# spent 354µs (217+136) within PPIx::Regexp::Tokenizer::BEGIN@31 which was called: # once (217µs+136µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 31
use PPIx::Regexp::Token::GroupType::Subexpression ();
# spent 354µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@31
322137µs1414µs
# spent 414µs (282+133) within PPIx::Regexp::Tokenizer::BEGIN@32 which was called: # once (282µs+133µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 32
use PPIx::Regexp::Token::GroupType::Switch ();
# spent 414µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@32
332124µs11.39ms
# spent 1.39ms (1.20+185µs) within PPIx::Regexp::Tokenizer::BEGIN@33 which was called: # once (1.20ms+185µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 33
use PPIx::Regexp::Token::Interpolation ();
# spent 1.39ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@33
342109µs11.85ms
# spent 1.85ms (1.69+163µs) within PPIx::Regexp::Tokenizer::BEGIN@34 which was called: # once (1.69ms+163µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 34
use PPIx::Regexp::Token::Literal ();
# spent 1.85ms making 1 call to PPIx::Regexp::Tokenizer::BEGIN@34
35221µs15µs
# spent 5µs within PPIx::Regexp::Tokenizer::BEGIN@35 which was called: # once (5µs+0s) by PPIx::Regexp::Lexer::BEGIN@61 at line 35
use PPIx::Regexp::Token::Modifier ();
# spent 5µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@35
362103µs1504µs
# spent 504µs (327+177) within PPIx::Regexp::Tokenizer::BEGIN@36 which was called: # once (327µs+177µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 36
use PPIx::Regexp::Token::Operator ();
# spent 504µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@36
37297µs1375µs
# spent 375µs (266+109) within PPIx::Regexp::Tokenizer::BEGIN@37 which was called: # once (266µs+109µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 37
use PPIx::Regexp::Token::Quantifier ();
# spent 375µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@37
38216µs14µs
# spent 4µs within PPIx::Regexp::Tokenizer::BEGIN@38 which was called: # once (4µs+0s) by PPIx::Regexp::Lexer::BEGIN@61 at line 38
use PPIx::Regexp::Token::Recursion ();
# spent 4µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@38
39217µs14µs
# spent 4µs within PPIx::Regexp::Tokenizer::BEGIN@39 which was called: # once (4µs+0s) by PPIx::Regexp::Lexer::BEGIN@61 at line 39
use PPIx::Regexp::Token::Structure ();
# spent 4µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@39
40290µs1386µs
# spent 386µs (248+139) within PPIx::Regexp::Tokenizer::BEGIN@40 which was called: # once (248µs+139µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 40
use PPIx::Regexp::Token::Unknown ();
# spent 386µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@40
41289µs1376µs
# spent 376µs (246+130) within PPIx::Regexp::Tokenizer::BEGIN@41 which was called: # once (246µs+130µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 41
use PPIx::Regexp::Token::Whitespace ();
# spent 376µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@41
42221µs249µs
# spent 28µs (8+21) within PPIx::Regexp::Tokenizer::BEGIN@42 which was called: # once (8µs+21µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 42
use PPIx::Regexp::Util qw{ __instance };
# spent 28µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@42 # spent 21µs making 1 call to Exporter::import
4322.40ms247µs
# spent 27µs (7+20) within PPIx::Regexp::Tokenizer::BEGIN@43 which was called: # once (7µs+20µs) by PPIx::Regexp::Lexer::BEGIN@61 at line 43
use Scalar::Util qw{ looks_like_number };
# spent 27µs making 1 call to PPIx::Regexp::Tokenizer::BEGIN@43 # spent 20µs making 1 call to Exporter::import
44
451700nsour $VERSION = '0.036';
46
47{
48 # Names of classes containing tokenization machinery. There are few
49 # known ordering requirements, since each class recognizes its own,
50 # and I have tried to prevent overlap. Absent such constraints, the
51 # order is in perceived frequency of acceptance, to keep the search
52 # as short as possible. If I were conscientious I would gather
53 # statistics on this.
5423µs my @classes = ( # TODO make readonly when acceptable way appears
55 'PPIx::Regexp::Token::Literal',
56 'PPIx::Regexp::Token::Interpolation',
57 'PPIx::Regexp::Token::Control', # Note 1
58 'PPIx::Regexp::Token::CharClass::Simple', # Note 2
59 'PPIx::Regexp::Token::Quantifier',
60 'PPIx::Regexp::Token::Greediness',
61 'PPIx::Regexp::Token::CharClass::POSIX', # Note 3
62 'PPIx::Regexp::Token::Structure',
63 'PPIx::Regexp::Token::Assertion',
64 'PPIx::Regexp::Token::Backreference',
65 'PPIx::Regexp::Token::Operator', # Note 4
66 );
67
68 # Note 1: If we are in quote mode ( \Q ... \E ), Control makes a
69 # literal out of anything it sees other than \E. So it
70 # needs to come before almost all other tokenizers. Not
71 # Literal, which already makes literals, and not
72 # Interpolation, which is legal in quote mode, but
73 # everything else.
74
75 # Note 2: CharClass::Simple must come after Literal, because it
76 # relies on Literal to recognize a Unicode named character
77 # ( \N{something} ), so any \N that comes through to it
78 # must be the \N simple character class (which represents
79 # anything but a newline, and was introduced in Perl
80 # 5.11.0.
81
82 # Note 3: CharClass::POSIX has to come before Structure, since both
83 # look for square brackets, and CharClass::POSIX is the
84 # more particular.
85
86 # Note 4: Operator relies on Literal making the characters literal
87 # if they appear in a context where they can not be
88 # operators, and Control making them literals if quoting,
89 # so it must come after both.
90
91 sub _known_tokenizers {
92 my ( $self ) = @_;
93
94 my $mode = $self->{mode};
95
96 my @expect;
97 if ( $self->{expect_next} ) {
98 $self->{expect} = $self->{expect_next};
99 $self->{expect_next} = undef;
100 }
101 if ( $self->{expect} ) {
102 @expect = $self->_known_tokenizer_check(
103 @{ $self->{expect} } );
104 }
105
106 exists $self->{known}{$mode} and return (
107 @expect, @{ $self->{known}{$mode} } );
108
109 my @found = $self->_known_tokenizer_check( @classes );
110
111 $self->{known}{$mode} = \@found;
112 return (@expect, @found);
113 }
114
115 sub _known_tokenizer_check {
116 my ( $self, @args ) = @_;
117
118 my $mode = $self->{mode};
119
120 my $handler = '__PPIX_TOKENIZER__' . $mode;
121 my @found;
122
123 foreach my $class ( @args ) {
124
125 $class->can( $handler ) or next;
126 push @found, $class;
127
128 }
129
130 return @found;
131 }
132
133}
134
135{
1362100ns my $errstr;
137
138 sub new {
139 my ( $class, $re, %args ) = @_;
140 ref $class and $class = ref $class;
141
142 $errstr = undef;
143
144 exists $args{default_modifiers}
145 and 'ARRAY' ne ref $args{default_modifiers}
146 and do {
147 $errstr = 'default_modifiers must be an array reference';
148 return;
149 };
150
151 my $self = {
152 capture => undef, # Captures from find_regexp.
153 content => undef, # The string we are tokenizing.
154 cookie => {}, # Cookies
155 cursor_curr => 0, # The current position in the string.
156 cursor_limit => undef, # The end of the portion of the
157 # string being tokenized.
158 cursor_orig => undef, # Position of cursor when tokenizer
159 # called. Used by get_token to prevent
160 # recursion.
161 cursor_modifiers => undef, # Position of modifiers.
162 default_modifiers => $args{default_modifiers} || [],
163 delimiter_finish => undef, # Finishing delimiter of regexp.
164 delimiter_re => undef, # Recognize finishing delimiter.
165 delimiter_start => undef, # Starting delimiter of regexp.
166 encoding => $args{encoding}, # Character encoding.
167 expect => undef, # Extra classes to expect.
168 expect_next => undef, # Extra classes as of next parse cycle
169 failures => 0, # Number of parse failures.
170 find => undef, # String for find_regexp
171 known => {}, # Known tokenizers, by mode.
172 match => undef, # Match from find_regexp.
173 mode => 'init', # Initialize
174 modifiers => [{}], # Modifier hash.
175 pending => [], # Tokens made but not returned.
176 prior => TOKEN_UNKNOWN, # Prior significant token.
177 source => $re, # The object we were initialized with.
178 trace => __PACKAGE__->_defined_or(
179 $args{trace}, $ENV{PPIX_REGEXP_TOKENIZER_TRACE}, 0 ),
180 };
181
182 if ( __instance( $re, 'PPI::Element' ) ) {
183 $self->{content} = $re->content();
184 } elsif ( ref $re ) {
185 $errstr = ref( $re ) . ' not supported';
186 return;
187 } else {
188 $self->{content} = $re;
189 }
190
191 bless $self, $class;
192
193 $self->{content} = $self->decode( $self->{content} );
194
195 if ( $self->{content} =~ m/ \s+ \z /smx ) {
196 $self->{cursor_limit} = $-[0];
197 } else {
198 $self->{cursor_limit} = length $self->{content};
199 }
200
201 $self->{trace}
202 and warn "\ntokenizing '$self->{content}'\n";
203
204 return $self;
205 }
206
207 sub errstr {
208 return $errstr;
209 }
210
211}
212
213sub capture {
214 my ( $self ) = @_;
215 $self->{capture} or return;
216 defined wantarray or return;
217 return wantarray ? @{ $self->{capture} } : $self->{capture};
218}
219
220sub content {
221 my ( $self ) = @_;
222 return $self->{content};
223}
224
225sub cookie {
226 my ( $self, $name, @args ) = @_;
227 defined $name
228 or confess "Programming error - undefined cookie name";
229 @args or return $self->{cookie}{$name};
230 my $cookie = shift @args;
231 if ( ref $cookie eq 'CODE' ) {
232 return ( $self->{cookie}{$name} = $cookie );
233 } elsif ( defined $cookie ) {
234 confess "Programming error - cookie must be CODE ref or undef";
235 } else {
236 return delete $self->{cookie}{$name};
237 }
238}
239
240sub default_modifiers {
241 my ( $self ) = @_;
242 return [ @{ $self->{default_modifiers} } ];
243}
244
245sub __effective_modifiers {
246 my ( $self ) = @_;
247 'HASH' eq ref $self->{effective_modifiers}
248 or return {};
249 return { %{ $self->{effective_modifiers} } };
250}
251
252sub encoding {
253 my ( $self ) = @_;
254 return $self->{encoding};
255}
256
257sub expect {
258 my ( $self, @args ) = @_;
259
260 @args
261 or return;
262
263 $self->{expect_next} = [
264 map { m/ \A PPIx::Regexp:: /smx ? $_ : 'PPIx::Regexp::' . $_ }
265 @args
266 ];
267 $self->{expect} = undef;
268 return;
269}
270
271sub failures {
272 my ( $self ) = @_;
273 return $self->{failures};
274}
275
276sub find_matching_delimiter {
277 my ( $self ) = @_;
278 $self->{cursor_curr} ||= 0;
279 my $start = substr
280 $self->{content},
281 $self->{cursor_curr},
282 1;
283
284 my $inx = $self->{cursor_curr};
285 my $finish = (
286 my $bracketed = $self->close_bracket( $start ) ) || $start;
287 my $nest = 0;
288
289 while ( ++$inx < $self->{cursor_limit} ) {
290 my $char = substr $self->{content}, $inx, 1;
291 if ( $char eq '\\' && $finish ne '\\' ) {
292 ++$inx;
293 } elsif ( $bracketed && $char eq $start ) {
294 ++$nest;
295 } elsif ( $char eq $finish ) {
296 --$nest < 0
297 and return $inx - $self->{cursor_curr};
298 }
299 }
300
301 return;
302}
303
304sub find_regexp {
305 my ( $self, $regexp ) = @_;
306
307 ref $regexp eq 'Regexp'
308 or confess
309 'Argument is a ', ( ref $regexp || 'scalar' ), ' not a Regexp';
310
311 defined $self->{find} or $self->_remainder();
312
313 $self->{find} =~ $regexp
314 or return;
315
316 my @capture;
317 foreach my $inx ( 0 .. $#+ ) {
318 if ( defined $-[$inx] && defined $+[$inx] ) {
319 push @capture, $self->{capture} = substr
320 $self->{find},
321 $-[$inx],
322 $+[$inx] - $-[$inx];
323 } else {
324 push @capture, undef;
325 }
326 }
327 $self->{match} = shift @capture;
328 $self->{capture} = \@capture;
329
330 # The following circumlocution seems to be needed under Perl 5.13.0
331 # for reasons I do not fathom -- at least in the case where
332 # wantarray is false. RT 56864 details the symptoms, which I was
333 # never able to reproduce outside Perl::Critic. But returning $+[0]
334 # directly, the value could transmogrify between here and the
335 # calling module.
336## my @data = ( $-[0], $+[0] );
337## return wantarray ? @data : $data[1];
338 return wantarray ? ( $-[0] + 0, $+[0] + 0 ) : $+[0] + 0;
339}
340
341sub get_start_delimiter {
342 my ( $self ) = @_;
343 return $self->{delimiter_start};
344}
345
346sub get_token {
347 my ( $self ) = @_;
348
349 caller eq __PACKAGE__ or $self->{cursor_curr} > $self->{cursor_orig}
350 or confess 'Programming error - get_token() called without ',
351 'first calling make_token()';
352
353 my $handler = '__PPIX_TOKENIZER__' . $self->{mode};
354
355 my $character = substr(
356 $self->{content},
357 $self->{cursor_curr},
358 1
359 );
360
361 return ( __PACKAGE__->$handler( $self, $character ) );
362}
363
364sub interpolates {
365 my ( $self ) = @_;
366 return $self->{delimiter_start} ne q{'};
367}
368
369sub make_token {
370 my ( $self, $length, $class, $arg ) = @_;
371 defined $class or $class = caller;
372
373 if ( $length + $self->{cursor_curr} > $self->{cursor_limit} ) {
374 $length = $self->{cursor_limit} - $self->{cursor_curr}
375 or return;
376 }
377
378 $class =~ m/ \A PPIx::Regexp:: /smx
379 or $class = 'PPIx::Regexp::' . $class;
380 my $content = substr
381 $self->{content},
382 $self->{cursor_curr},
383 $length;
384
385 $self->{trace}
386 and warn "make_token( $length, '$class' ) => '$content'\n";
387 $self->{trace} > 1
388 and warn " make_token: cursor_curr = $self->{cursor_curr}; ",
389 "cursor_limit = $self->{cursor_limit}\n";
390 my $token = $class->_new( $content ) or return;
391 $token->significant() and $self->{expect} = undef;
392 $token->__PPIX_TOKEN__post_make( $self, $arg );
393
394 $token->isa( TOKEN_UNKNOWN ) and $self->{failures}++;
395
396 $self->{cursor_curr} += $length;
397 $self->{find} = undef;
398 $self->{match} = undef;
399 $self->{capture} = undef;
400
401 foreach my $name ( keys %{ $self->{cookie} } ) {
402 my $cookie = $self->{cookie}{$name};
403 $cookie->( $self, $token )
404 or delete $self->{cookie}{$name};
405 }
406
407 # Record this token as the prior token if it is significant. We must
408 # do this after processing cookies, so that the cookies have access
409 # to the old token if they want.
410 $token->significant()
411 and $self->{prior} = $token;
412
413 return $token;
414}
415
416sub match {
417 my ( $self ) = @_;
418 return $self->{match};
419}
420
421sub modifier {
422 my ( $self, $modifier ) = @_;
423 return $self->{modifiers}[-1]{$modifier};
424}
425
426sub modifier_duplicate {
427 my ( $self ) = @_;
428 push @{ $self->{modifiers} },
429 { %{ $self->{modifiers}[-1] } };
430 return;
431}
432
433sub modifier_modify {
434 my ( $self, %args ) = @_;
435
436 # Modifier code is centralized in PPIx::Regexp::Token::Modifier
437 $self->{modifiers}[-1] =
438 PPIx::Regexp::Token::Modifier::__PPIX_TOKENIZER__modifier_modify(
439 $self->{modifiers}[-1], \%args );
440
441 return;
442
443}
444
445sub modifier_pop {
446 my ( $self ) = @_;
447 @{ $self->{modifiers} } > 1
448 and pop @{ $self->{modifiers} };
449 return;
450}
451
452sub next_token {
453 my ( $self ) = @_;
454
455 {
456
457 if ( @{ $self->{pending} } ) {
458 return shift @{ $self->{pending} };
459 }
460
461 if ( $self->{cursor_curr} >= $self->{cursor_limit} ) {
462 $self->{cursor_limit} >= length $self->{content}
463 and return;
464 $self->{mode} eq 'finish' and return;
465 $self->{mode} = 'finish';
466 $self->{cursor_limit}++;
467 }
468
469 if ( my @tokens = $self->get_token() ) {
470 push @{ $self->{pending} }, @tokens;
471 redo;
472
473 }
474
475 }
476
477 return;
478
479}
480
481sub peek {
482 my ( $self, $offset ) = @_;
483 defined $offset or $offset = 0;
484 $offset < 0 and return;
485 $offset += $self->{cursor_curr};
486 $offset >= $self->{cursor_limit} and return;
487 return substr $self->{content}, $offset, 1;
488}
489
490sub ppi_document {
491 my ( $self ) = @_;
492
493 defined $self->{find} or $self->_remainder();
494
495 return PPI::Document->new( \"$self->{find}" );
496}
497
498sub prior {
499 my ( $self, $method, @args ) = @_;
500 defined $method or return $self->{prior};
501 $self->{prior}->can( $method )
502 or confess 'Programming error - ',
503 ( ref $self->{prior} || $self->{prior} ),
504 ' does not support method ', $method;
505 return $self->{prior}->$method( @args );
506}
507
508sub significant {
509 return 1;
510}
511
512sub tokens {
513 my ( $self ) = @_;
514
515 my @rslt;
516 while ( my $token = $self->next_token() ) {
517 push @rslt, $token;
518 }
519
520 return @rslt;
521}
522
523sub _remainder {
524 my ( $self ) = @_;
525
526 $self->{cursor_curr} > $self->{cursor_limit}
527 and confess "Programming error - Trying to find past end of string";
528 $self->{find} = substr(
529 $self->{content},
530 $self->{cursor_curr},
531 $self->{cursor_limit} - $self->{cursor_curr}
532 );
533
534 return;
535}
536
537sub __PPIX_TOKENIZER__init {
538 my ( $class, $tokenizer, $character ) = @_;
539
540 $tokenizer->{mode} = 'kaput';
541 $tokenizer->{content} =~ m/ \A \s* ( qr | m | s )? ( \s* ) ( [^\w\s] ) /smx
542 or return $tokenizer->make_token(
543 length( $tokenizer->{content} ), TOKEN_UNKNOWN, {
544 error => 'Tokenizer found illegal first characters',
545 },
546 );
547# my ( $type, $white, $delim ) = ( $1, $2, $3 );
548 my ( $type, $white ) = ( $1, $2 );
549 my $start_pos = defined $-[1] ? $-[1] :
550 defined $-[2] ? $-[2] :
551 defined $-[3] ? $-[3] : 0;
552
553 defined $type or $type = '';
554 $tokenizer->{type} = $type;
555
556 my @tokens;
557 $start_pos
558 and push @tokens, $tokenizer->make_token( $start_pos,
559 'PPIx::Regexp::Token::Whitespace' );
560 push @tokens, $tokenizer->make_token( length $type,
561 'PPIx::Regexp::Token::Structure' );
562 length $white > 0
563 and push @tokens, $tokenizer->make_token( length $white,
564 'PPIx::Regexp::Token::Whitespace' );
565
566 {
567 my @mods = @{ $tokenizer->{default_modifiers} };
568 if ( $tokenizer->{content} =~ m/ ( [[:lower:]]* ) \s* \z /smx ) {
569 my $mod = $1;
570 $tokenizer->{cursor_limit} -= length $mod;
571 push @mods, $mod;
572 }
573 $tokenizer->{effective_modifiers} =
574 PPIx::Regexp::Token::Modifier::__aggregate_modifiers (
575 @mods );
576 $tokenizer->{modifiers} = [
577 { %{ $tokenizer->{effective_modifiers} } },
578 ];
579 $tokenizer->{cursor_modifiers} = $tokenizer->{cursor_limit};
580 }
581
582 $tokenizer->{delimiter_start} = substr
583 $tokenizer->{content},
584 $tokenizer->{cursor_curr},
585 1;
586
587 if ( $type eq 's' and my $offset = $tokenizer->find_matching_delimiter() ) {
588 $tokenizer->{cursor_limit} = $tokenizer->{cursor_curr} + $offset;
589 } else {
590 $tokenizer->{cursor_limit} = $tokenizer->{cursor_modifiers} - 1;
591 }
592
593 $tokenizer->{delimiter_finish} = substr
594 $tokenizer->{content},
595 $tokenizer->{cursor_limit},
596 1;
597 $tokenizer->{delimiter_re} = undef;
598
599 push @tokens, $tokenizer->make_token( 1,
600 'PPIx::Regexp::Token::Delimiter' );
601
602 $tokenizer->{mode} = 'regexp';
603
604 return @tokens;
605}
606
607sub __PPIX_TOKENIZER__regexp {
608 my ( $class, $tokenizer, $character ) = @_;
609
610 my $mode = $tokenizer->{mode};
611 my $handler = '__PPIX_TOKENIZER__' . $mode;
612
613 $tokenizer->{cursor_orig} = $tokenizer->{cursor_curr};
614 foreach my $class( $tokenizer->_known_tokenizers() ) {
615 my @tokens = grep { $_ } $class->$handler( $tokenizer, $character );
616 $tokenizer->{trace}
617 and warn $class, "->$handler( \$tokenizer, '$character' )",
618 " => (@tokens)\n";
619 @tokens
620 and return ( map {
621 ref $_ ? $_ : $tokenizer->make_token( $_,
622 $class ) } @tokens );
623 }
624
625 # Find a fallback processor for the character.
626 my $fallback = __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__' . $mode )
627 || __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__regexp' )
628 || confess "Programming error - unable to find fallback for $mode";
629 return $fallback->( $class, $tokenizer, $character );
630}
631
63211µs*__PPIX_TOKENIZER__repl = \&__PPIX_TOKENIZER__regexp;
633
634sub __PPIX_TOKEN_FALLBACK__regexp {
635 my ( $class, $tokenizer, $character ) = @_;
636
637 # As a fallback in regexp mode, any escaped character is a literal.
638 if ( $character eq '\\'
639 && $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr} > 1
640 ) {
641 return $tokenizer->make_token( 2, TOKEN_LITERAL );
642 }
643
644 # Any normal character is unknown.
645 return $tokenizer->make_token( 1, TOKEN_UNKNOWN, {
646 error => 'Tokenizer found unexpected literal',
647 },
648 );
649}
650
651sub __PPIX_TOKEN_FALLBACK__repl {
652 my ( $class, $tokenizer, $character ) = @_;
653
654 # As a fallback in replacement mode, any escaped character is a literal.
655 if ( $character eq '\\'
656 && defined ( my $next = $tokenizer->peek( 1 ) ) ) {
657
658 if ( $tokenizer->interpolates() || $next eq q<'> || $next eq '\\' ) {
659 return $tokenizer->make_token( 2, TOKEN_LITERAL );
660 }
661 return $tokenizer->make_token( 1, TOKEN_LITERAL );
662 }
663
664 # So is any normal character.
665 return $tokenizer->make_token( 1, TOKEN_LITERAL );
666}
667
668sub __PPIX_TOKENIZER__finish {
669 my ( $class, $tokenizer, $character ) = @_;
670
671 $tokenizer->{cursor_limit} > length $tokenizer->{content}
672 and confess "Programming error - ran off string";
673 my @tokens = $tokenizer->make_token( 1,
674 'PPIx::Regexp::Token::Delimiter' );
675
676 if ( $tokenizer->{cursor_curr} eq $tokenizer->{cursor_modifiers} ) {
677
678 # We are out of string. Make the modifier token and close up
679 # shop.
680 my $trailer;
681 if ( $tokenizer->{content} =~ m/ \s+ \z /smx ) {
682 $tokenizer->{cursor_limit} = $-[0];
683 $trailer = length( $tokenizer->{content} ) -
684 $tokenizer->{cursor_curr};
685 } else {
686 $tokenizer->{cursor_limit} = length $tokenizer->{content};
687 }
688 push @tokens, $tokenizer->make_token(
689 $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr},
690 'PPIx::Regexp::Token::Modifier' );
691 if ( $trailer ) {
692 $tokenizer->{cursor_limit} = length $tokenizer->{content};
693 push @tokens, $tokenizer->make_token(
694 $trailer, 'PPIx::Regexp::Token::Whitespace' );
695 }
696 $tokenizer->{mode} = 'kaput';
697
698 } else {
699
700 # Clear the cookies, because we are going around again.
701 $tokenizer->{cookie} = {};
702
703 # Move the cursor limit to just before the modifiers.
704 $tokenizer->{cursor_limit} = $tokenizer->{cursor_modifiers} - 1;
705
706 # If the preceding regular expression was bracketed, we need to
707 # consume possible whitespace and find another delimiter.
708
709 if ( $tokenizer->close_bracket( $tokenizer->{delimiter_start} ) ) {
710 my $accept;
711 $accept = $tokenizer->find_regexp( qr{ \A \s+ }smx )
712 and push @tokens, $tokenizer->make_token(
713 $accept, 'PPIx::Regexp::Token::Whitespace' );
714 my $character = $tokenizer->peek();
715 $tokenizer->{delimiter_start} = $character;
716 push @tokens, $tokenizer->make_token(
717 1, 'PPIx::Regexp::Token::Delimiter' );
718 $tokenizer->{delimiter_finish} = substr
719 $tokenizer->{content},
720 $tokenizer->{cursor_limit} - 1,
721 1;
722 $tokenizer->{delimiter_re} = undef;
723 }
724
725 if ( $tokenizer->modifier( 'e' ) ) {
726 # With /e, the replacement portion is code. We make it all
727 # into one big PPIx::Regexp::Token::Code, slap on the
728 # trailing delimiter and modifiers, and return it all.
729 push @tokens, $tokenizer->make_token(
730 $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr},
731 'PPIx::Regexp::Token::Code',
732 { perl_version_introduced => MINIMUM_PERL },
733 );
734 $tokenizer->{cursor_limit} = length $tokenizer->{content};
735 push @tokens, $tokenizer->make_token( 1,
736 'PPIx::Regexp::Token::Delimiter' );
737 push @tokens, $tokenizer->make_token(
738 $tokenizer->{cursor_limit} - $tokenizer->{cursor_curr},
739 'PPIx::Regexp::Token::Modifier' );
740 $tokenizer->{mode} = 'kaput';
741 } else {
742 # Put our mode to replacement.
743 $tokenizer->{mode} = 'repl';
744 }
745
746 }
747
748 return @tokens;
749
750}
751
75215µs1;
753
754__END__