Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPI/Token/Whitespace.pm |
Statements | Executed 761603 statements in 1.43s |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
140043 | 17 | 13 | 482ms | 2.23s | __TOKENIZER__on_char (recurses: max depth 1, inclusive time 164ms) | PPI::Token::Whitespace::
15087 | 2 | 2 | 179ms | 272ms | __TOKENIZER__on_line_start | PPI::Token::Whitespace::
115422 | 17 | 6 | 119ms | 119ms | significant | PPI::Token::Whitespace::
9549 | 1 | 1 | 67.0ms | 94.4ms | __TOKENIZER__on_line_end | PPI::Token::Whitespace::
45841 | 3 | 1 | 28.0ms | 28.0ms | CORE:match (opcode) | PPI::Token::Whitespace::
144 | 1 | 1 | 1.60ms | 3.03ms | null | PPI::Token::Whitespace::
1 | 1 | 1 | 59µs | 59µs | BEGIN@114 | PPI::Token::Whitespace::
1 | 1 | 1 | 12µs | 24µs | BEGIN@44 | PPI::Token::Whitespace::
1 | 1 | 1 | 8µs | 8µs | BEGIN@49 | PPI::Token::Whitespace::
1 | 1 | 1 | 8µs | 47µs | BEGIN@113 | PPI::Token::Whitespace::
1 | 1 | 1 | 7µs | 42µs | BEGIN@48 | PPI::Token::Whitespace::
1 | 1 | 1 | 3µs | 3µs | BEGIN@45 | PPI::Token::Whitespace::
1 | 1 | 1 | 3µs | 3µs | BEGIN@46 | PPI::Token::Whitespace::
0 | 0 | 0 | 0s | 0s | tidy | PPI::Token::Whitespace::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package PPI::Token::Whitespace; | ||||
2 | |||||
3 | =pod | ||||
4 | |||||
5 | =head1 NAME | ||||
6 | |||||
7 | PPI::Token::Whitespace - Tokens representing ordinary white space | ||||
8 | |||||
9 | =head1 INHERITANCE | ||||
10 | |||||
11 | PPI::Token::Whitespace | ||||
12 | isa PPI::Token | ||||
13 | isa PPI::Element | ||||
14 | |||||
15 | =head1 DESCRIPTION | ||||
16 | |||||
17 | As a full "round-trip" parser, PPI records every last byte in a | ||||
18 | file and ensure that it is included in the L<PPI::Document> object. | ||||
19 | |||||
20 | This even includes whitespace. In fact, Perl documents are seen | ||||
21 | as "floating in a sea of whitespace", and thus any document will | ||||
22 | contain vast quantities of C<PPI::Token::Whitespace> objects. | ||||
23 | |||||
24 | For the most part, you shouldn't notice them. Or at least, you | ||||
25 | shouldn't B<have> to notice them. | ||||
26 | |||||
27 | This means doing things like consistently using the "S for significant" | ||||
28 | series of L<PPI::Node> and L<PPI::Element> methods to do things. | ||||
29 | |||||
30 | If you want the nth child element, you should be using C<schild> rather | ||||
31 | than C<child>, and likewise C<snext_sibling>, C<sprevious_sibling>, and | ||||
32 | so on and so forth. | ||||
33 | |||||
34 | =head1 METHODS | ||||
35 | |||||
36 | Again, for the most part you should really B<not> need to do anything | ||||
37 | very significant with whitespace. | ||||
38 | |||||
39 | But there are a couple of convenience methods provided, beyond those | ||||
40 | provided by the parent L<PPI::Token> and L<PPI::Element> classes. | ||||
41 | |||||
42 | =cut | ||||
43 | |||||
44 | 2 | 19µs | 2 | 36µs | # spent 24µs (12+12) within PPI::Token::Whitespace::BEGIN@44 which was called:
# once (12µs+12µs) by PPI::Token::BEGIN@39 at line 44 # spent 24µs making 1 call to PPI::Token::Whitespace::BEGIN@44
# spent 12µs making 1 call to strict::import |
45 | 2 | 16µs | 1 | 3µs | # spent 3µs within PPI::Token::Whitespace::BEGIN@45 which was called:
# once (3µs+0s) by PPI::Token::BEGIN@39 at line 45 # spent 3µs making 1 call to PPI::Token::Whitespace::BEGIN@45 |
46 | 2 | 18µs | 1 | 3µs | # spent 3µs within PPI::Token::Whitespace::BEGIN@46 which was called:
# once (3µs+0s) by PPI::Token::BEGIN@39 at line 46 # spent 3µs making 1 call to PPI::Token::Whitespace::BEGIN@46 |
47 | |||||
48 | 2 | 29µs | 2 | 77µs | # spent 42µs (7+35) within PPI::Token::Whitespace::BEGIN@48 which was called:
# once (7µs+35µs) by PPI::Token::BEGIN@39 at line 48 # spent 42µs making 1 call to PPI::Token::Whitespace::BEGIN@48
# spent 35µs making 1 call to vars::import |
49 | # spent 8µs within PPI::Token::Whitespace::BEGIN@49 which was called:
# once (8µs+0s) by PPI::Token::BEGIN@39 at line 52 | ||||
50 | 1 | 400ns | $VERSION = '1.215'; | ||
51 | 1 | 11µs | @ISA = 'PPI::Token'; | ||
52 | 1 | 104µs | 1 | 8µs | } # spent 8µs making 1 call to PPI::Token::Whitespace::BEGIN@49 |
53 | |||||
54 | =pod | ||||
55 | |||||
56 | =head2 null | ||||
57 | |||||
58 | Because L<PPI> sees documents as sitting on a sort of substrate made of | ||||
59 | whitespace, there are a couple of corner cases that get particularly | ||||
60 | nasty if they don't find whitespace in certain places. | ||||
61 | |||||
62 | Imagine walking down the beach to go into the ocean, and then quite | ||||
63 | unexpectedly falling off the side of the planet. Well it's somewhat | ||||
64 | equivalent to that, including the whole screaming death bit. | ||||
65 | |||||
66 | The C<null> method is a convenience provided to get some internals | ||||
67 | out of some of these corner cases. | ||||
68 | |||||
69 | Specifically it create a whitespace token that represents nothing, | ||||
70 | or at least the null string C<''>. It's a handy way to have some | ||||
71 | "whitespace" right where you need it, without having to have any | ||||
72 | actual characters. | ||||
73 | |||||
74 | =cut | ||||
75 | |||||
76 | 1 | 300ns | my $null = undef; | ||
77 | |||||
78 | # spent 3.03ms (1.60+1.44) within PPI::Token::Whitespace::null which was called 144 times, avg 21µs/call:
# 144 times (1.60ms+1.44ms) by PPI::Tokenizer::_previous_significant_tokens at line 707 of PPI/Tokenizer.pm, avg 21µs/call | ||||
79 | 144 | 1.02ms | 144 | 224µs | $null ||= $_[0]->new(''); # spent 216µs making 143 calls to PPI::Util::TRUE, avg 2µs/call
# spent 8µs making 1 call to PPI::Token::new |
80 | 144 | 2.17ms | 144 | 1.21ms | Clone::clone($null); # spent 1.21ms making 144 calls to Clone::clone, avg 8µs/call |
81 | } | ||||
82 | |||||
83 | ### XS -> PPI/XS.xs:_PPI_Token_Whitespace__significant 0.900+ | ||||
84 | 115422 | 368ms | # spent 119ms within PPI::Token::Whitespace::significant which was called 115422 times, avg 1µs/call:
# 27638 times (29.5ms+0s) by PPI::Lexer::_lex_statement at line 608 of PPI/Lexer.pm, avg 1µs/call
# 25219 times (26.7ms+0s) by PPI::Element::snext_sibling at line 370 of PPI/Element.pm, avg 1µs/call
# 14524 times (15.5ms+0s) by PPI::Lexer::_lex_structure at line 1321 of PPI/Lexer.pm, avg 1µs/call
# 13592 times (13.8ms+0s) by PPI::Tokenizer::_previous_significant_tokens at line 699 of PPI/Tokenizer.pm, avg 1µs/call
# 9921 times (10.0ms+0s) by PPI::Node::schild at line 282 of PPI/Node.pm, avg 1µs/call
# 5893 times (5.53ms+0s) by PPI::Node::schildren at line 232 of PPI/Node.pm, avg 939ns/call
# 5891 times (5.49ms+0s) by PPI::Node::schildren at line 229 of PPI/Node.pm, avg 933ns/call
# 5248 times (5.48ms+0s) by PPI::Lexer::_lex_document at line 272 of PPI/Lexer.pm, avg 1µs/call
# 4232 times (3.87ms+0s) by PPI::Statement::Variable::type at line 67 of PPI/Statement/Variable.pm, avg 915ns/call
# 1022 times (969µs+0s) by PPI::Lexer::_statement at line 501 of PPI/Lexer.pm, avg 948ns/call
# 981 times (1.10ms+0s) by PPI::Lexer::_statement at line 543 of PPI/Lexer.pm, avg 1µs/call
# 385 times (386µs+0s) by PPI::Node::schild at line 277 of PPI/Node.pm, avg 1µs/call
# 270 times (309µs+0s) by PPI::Element::sprevious_sibling at line 418 of PPI/Element.pm, avg 1µs/call
# 205 times (209µs+0s) by Perl::Critic::Policy::ControlStructures::ProhibitMutatingListFunctions::_has_topic_side_effect at line 121 of Perl/Critic/Policy/ControlStructures/ProhibitMutatingListFunctions.pm, avg 1µs/call
# 190 times (202µs+0s) by PPI::Lexer::_curly at line 1264 of PPI/Lexer.pm, avg 1µs/call
# 159 times (161µs+0s) by PPI::Lexer::_statement at line 430 of PPI/Lexer.pm, avg 1µs/call
# 52 times (54µs+0s) by PPI::Tokenizer::_last_significant_token at line 680 of PPI/Tokenizer.pm, avg 1µs/call | ||
85 | |||||
86 | =pod | ||||
87 | |||||
88 | =head2 tidy | ||||
89 | |||||
90 | C<tidy> is a convenience method for removing unneeded whitespace. | ||||
91 | |||||
92 | Specifically, it removes any whitespace from the end of a line. | ||||
93 | |||||
94 | Note that this B<doesn't> include POD, where you may well need | ||||
95 | to keep certain types of whitespace. The entire POD chunk lives | ||||
96 | in its own L<PPI::Token::Pod> object. | ||||
97 | |||||
98 | =cut | ||||
99 | |||||
100 | sub tidy { | ||||
101 | $_[0]->{content} =~ s/^\s+?(?>\n)//; | ||||
102 | 1; | ||||
103 | } | ||||
104 | |||||
- - | |||||
109 | ##################################################################### | ||||
110 | # Parsing Methods | ||||
111 | |||||
112 | # Build the class and commit maps | ||||
113 | 2 | 208µs | 2 | 86µs | # spent 47µs (8+40) within PPI::Token::Whitespace::BEGIN@113 which was called:
# once (8µs+40µs) by PPI::Token::BEGIN@39 at line 113 # spent 47µs making 1 call to PPI::Token::Whitespace::BEGIN@113
# spent 40µs making 1 call to vars::import |
114 | # spent 59µs within PPI::Token::Whitespace::BEGIN@114 which was called:
# once (59µs+0s) by PPI::Token::BEGIN@39 at line 150 | ||||
115 | 1 | 400ns | @CLASSMAP = (); | ||
116 | 1 | 100ns | @COMMITMAP = (); | ||
117 | 1 | 900ns | foreach ( | ||
118 | 'a' .. 'u', 'w', 'y', 'z', 'A' .. 'Z', '_' | ||||
119 | ) { | ||||
120 | 51 | 19µs | $COMMITMAP[ord $_] = 'PPI::Token::Word'; | ||
121 | } | ||||
122 | 7 | 2µs | foreach ( qw!; [ ] { } )! ) { $COMMITMAP[ord $_] = 'PPI::Token::Structure' } | ||
123 | 11 | 5µs | foreach ( 0 .. 9 ) { $CLASSMAP[ord $_] = 'Number' } | ||
124 | 10 | 3µs | foreach ( qw{= ? | + > . ! ~ ^} ) { $CLASSMAP[ord $_] = 'Operator' } | ||
125 | 7 | 2µs | foreach ( qw{* $ @ & : %} ) { $CLASSMAP[ord $_] = 'Unknown' } | ||
126 | |||||
127 | # Miscellaneous remainder | ||||
128 | 1 | 300ns | $COMMITMAP[ord '#'] = 'PPI::Token::Comment'; | ||
129 | 1 | 200ns | $COMMITMAP[ord 'v'] = 'PPI::Token::Number::Version'; | ||
130 | 1 | 200ns | $CLASSMAP[ord ','] = 'PPI::Token::Operator'; | ||
131 | 1 | 100ns | $CLASSMAP[ord "'"] = 'Quote::Single'; | ||
132 | 1 | 200ns | $CLASSMAP[ord '"'] = 'Quote::Double'; | ||
133 | 1 | 200ns | $CLASSMAP[ord '`'] = 'QuoteLike::Backtick'; | ||
134 | 1 | 200ns | $CLASSMAP[ord '\\'] = 'Cast'; | ||
135 | 1 | 100ns | $CLASSMAP[ord '_'] = 'Word'; | ||
136 | 1 | 200ns | $CLASSMAP[9] = 'Whitespace'; # A horizontal tab | ||
137 | 1 | 200ns | $CLASSMAP[10] = 'Whitespace'; # A newline | ||
138 | 1 | 100ns | $CLASSMAP[13] = 'Whitespace'; # A carriage return | ||
139 | 1 | 100ns | $CLASSMAP[32] = 'Whitespace'; # A normal space | ||
140 | |||||
141 | # Words (functions and keywords) after which a following / is | ||||
142 | # almost certainly going to be a regex | ||||
143 | 1 | 10µs | %MATCHWORD = map { $_ => 1 } qw{ | ||
144 | split | ||||
145 | if | ||||
146 | unless | ||||
147 | grep | ||||
148 | map | ||||
149 | }; | ||||
150 | 1 | 778µs | 1 | 59µs | } # spent 59µs making 1 call to PPI::Token::Whitespace::BEGIN@114 |
151 | |||||
152 | # spent 272ms (179+92.9) within PPI::Token::Whitespace::__TOKENIZER__on_line_start which was called 15087 times, avg 18µs/call:
# 14943 times (177ms+92.5ms) by PPI::Tokenizer::_process_next_line at line 499 of PPI/Tokenizer.pm, avg 18µs/call
# 144 times (1.80ms+440µs) by PPI::Token::BOM::__TOKENIZER__on_line_start at line 90 of PPI/Token/BOM.pm, avg 16µs/call | ||||
153 | 15087 | 3.29ms | my $t = $_[1]; | ||
154 | 15087 | 3.81ms | my $line = $t->{line}; | ||
155 | |||||
156 | # Can we classify the entire line in one go | ||||
157 | 15087 | 147ms | 45838 | 28.0ms | if ( $line =~ /^\s*$/ ) { # spent 28.0ms making 45838 calls to PPI::Token::Whitespace::CORE:match, avg 611ns/call |
158 | # A whitespace line | ||||
159 | 3724 | 4.52ms | 3724 | 34.9ms | $t->_new_token( 'Whitespace', $line ); # spent 34.9ms making 3724 calls to PPI::Tokenizer::_new_token, avg 9µs/call |
160 | 3724 | 26.0ms | return 0; | ||
161 | |||||
162 | } elsif ( $line =~ /^\s*#/ ) { | ||||
163 | # A comment line | ||||
164 | 1668 | 2.00ms | 1668 | 26.3ms | $t->_new_token( 'Comment', $line ); # spent 26.3ms making 1668 calls to PPI::Tokenizer::_new_token, avg 16µs/call |
165 | 1668 | 1.75ms | 1668 | 3.76ms | $t->_finalize_token; # spent 3.76ms making 1668 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call |
166 | 1668 | 7.48ms | return 0; | ||
167 | |||||
168 | } elsif ( $line =~ /^=(\w+)/ ) { | ||||
169 | # A Pod tag... change to pod mode | ||||
170 | 2 | 2µs | 2 | 24µs | $t->_new_token( 'Pod', $line ); # spent 24µs making 2 calls to PPI::Tokenizer::_new_token, avg 12µs/call |
171 | 2 | 2µs | if ( $1 eq 'cut' ) { | ||
172 | # This is an error, but one we'll ignore | ||||
173 | # Don't go into Pod mode, since =cut normally | ||||
174 | # signals the end of Pod mode | ||||
175 | } else { | ||||
176 | 2 | 1µs | $t->{class} = 'PPI::Token::Pod'; | ||
177 | } | ||||
178 | 2 | 4µs | return 0; | ||
179 | |||||
180 | } elsif ( $line =~ /^use v6\-alpha\;/ ) { | ||||
181 | # Indicates a Perl 6 block. Make the initial | ||||
182 | # implementation just suck in the entire rest of the | ||||
183 | # file. | ||||
184 | my @perl6 = (); | ||||
185 | while ( 1 ) { | ||||
186 | my $line6 = $t->_get_line; | ||||
187 | last unless defined $line6; | ||||
188 | push @perl6, $line6; | ||||
189 | } | ||||
190 | push @{ $t->{perl6} }, join '', @perl6; | ||||
191 | |||||
192 | # We only sucked in the block, we don't actially do | ||||
193 | # anything to the "use v6..." line. So return as if | ||||
194 | # we didn't find anything at all. | ||||
195 | return 1; | ||||
196 | } | ||||
197 | |||||
198 | 9693 | 43.2ms | 1; | ||
199 | } | ||||
200 | |||||
201 | # spent 2.23s (482ms+1.75) within PPI::Token::Whitespace::__TOKENIZER__on_char which was called 140043 times, avg 16µs/call:
# 106218 times (325ms+1.54s) by PPI::Tokenizer::_process_next_char at line 554 of PPI/Tokenizer.pm, avg 18µs/call
# 14291 times (79.0ms+-79.0ms) by PPI::Token::Word::__TOKENIZER__commit at line 539 of PPI/Token/Word.pm, avg 0s/call
# 7437 times (24.5ms+194ms) by PPI::Token::Operator::__TOKENIZER__on_char at line 112 of PPI/Token/Operator.pm, avg 29µs/call
# 7245 times (38.3ms+50.2ms) by PPI::Token::Symbol::__TOKENIZER__on_char at line 216 of PPI/Token/Symbol.pm, avg 12µs/call
# 3157 times (9.34ms+18.3ms) by PPI::Token::Structure::__TOKENIZER__on_char at line 70 of PPI/Token/Structure.pm, avg 9µs/call
# 832 times (2.83ms+8.42ms) by PPI::Token::Number::__TOKENIZER__on_char at line 125 of PPI/Token/Number.pm, avg 14µs/call
# 509 times (2.10ms+7.38ms) by PPI::Token::Symbol::__TOKENIZER__on_char at line 174 of PPI/Token/Symbol.pm, avg 19µs/call
# 148 times (689µs+6.10ms) by PPI::Token::Number::Float::__TOKENIZER__on_char at line 108 of PPI/Token/Number/Float.pm, avg 46µs/call
# 85 times (360µs+1.43ms) by PPI::Token::Unknown::__TOKENIZER__on_char at line 179 of PPI/Token/Unknown.pm, avg 21µs/call
# 61 times (137µs+0s) by PPI::Token::Cast::__TOKENIZER__on_char at line 51 of PPI/Token/Cast.pm, avg 2µs/call
# 30 times (101µs+217µs) by PPI::Token::Magic::__TOKENIZER__on_char at line 228 of PPI/Token/Magic.pm, avg 11µs/call
# 22 times (87µs+343µs) by PPI::Token::Unknown::__TOKENIZER__on_char at line 216 of PPI/Token/Unknown.pm, avg 20µs/call
# 3 times (9µs+17µs) by PPI::Token::ArrayIndex::__TOKENIZER__on_char at line 56 of PPI/Token/ArrayIndex.pm, avg 8µs/call
# 2 times (4µs+0s) by PPI::Token::DashedWord::__TOKENIZER__on_char at line 95 of PPI/Token/DashedWord.pm, avg 2µs/call
# once (4µs+15µs) by PPI::Token::Unknown::__TOKENIZER__on_char at line 150 of PPI/Token/Unknown.pm
# once (4µs+15µs) by PPI::Token::Magic::__TOKENIZER__on_char at line 170 of PPI/Token/Magic.pm
# once (2µs+0s) by PPI::Token::HereDoc::__TOKENIZER__on_char at line 218 of PPI/Token/HereDoc.pm | ||||
202 | 140043 | 21.6ms | my $t = $_[1]; | ||
203 | 140043 | 61.9ms | my $char = ord substr $t->{line}, $t->{line_cursor}, 1; | ||
204 | |||||
205 | # Do we definately know what something is? | ||||
206 | 140043 | 168ms | 29239 | 1.78s | return $COMMITMAP[$char]->__TOKENIZER__commit($t) if $COMMITMAP[$char]; # spent 1.42s making 15152 calls to PPI::Token::Word::__TOKENIZER__commit, avg 94µs/call
# spent 313ms making 13365 calls to PPI::Token::Structure::__TOKENIZER__commit, avg 23µs/call
# spent 30.1ms making 336 calls to PPI::Token::Number::Version::__TOKENIZER__commit, avg 90µs/call
# spent 7.72ms making 242 calls to PPI::Token::Comment::__TOKENIZER__commit, avg 32µs/call
# spent 1.31ms making 144 calls to PPI::Element::DESTROY, avg 9µs/call |
207 | |||||
208 | # Handle the simple option first | ||||
209 | 110948 | 419ms | return $CLASSMAP[$char] if $CLASSMAP[$char]; | ||
210 | |||||
211 | 5077 | 2.14ms | if ( $char == 40 ) { # $char eq '(' | ||
212 | # Finalise any whitespace token... | ||||
213 | 3157 | 5.58ms | 2504 | 3.60ms | $t->_finalize_token if $t->{token}; # spent 2.71ms making 1252 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call
# spent 890µs making 1252 calls to PPI::Util::TRUE, avg 711ns/call |
214 | |||||
215 | # Is this the beginning of a sub prototype? | ||||
216 | # We are a sub prototype IF | ||||
217 | # 1. The previous significant token is a bareword. | ||||
218 | # 2. The one before that is the word 'sub'. | ||||
219 | # 3. The one before that is a 'structure' | ||||
220 | |||||
221 | # Get the three previous significant tokens | ||||
222 | 3157 | 3.74ms | 3157 | 86.2ms | my $tokens = $t->_previous_significant_tokens(3); # spent 86.2ms making 3157 calls to PPI::Tokenizer::_previous_significant_tokens, avg 27µs/call |
223 | 3157 | 1.22ms | if ( $tokens ) { | ||
224 | # A normal subroutine declaration | ||||
225 | 3157 | 840µs | my $p1 = $tokens->[1]; | ||
226 | 3157 | 452µs | my $p2 = $tokens->[2]; | ||
227 | 3157 | 19.7ms | 6226 | 6.96ms | if ( # spent 6.78ms making 6110 calls to UNIVERSAL::isa, avg 1µs/call
# spent 178µs making 116 calls to PPI::Token::content, avg 2µs/call |
228 | $tokens->[0]->isa('PPI::Token::Word') | ||||
229 | and | ||||
230 | $p1->isa('PPI::Token::Word') | ||||
231 | and | ||||
232 | $p1->content eq 'sub' | ||||
233 | and ( | ||||
234 | $p2->isa('PPI::Token::Structure') | ||||
235 | or ( | ||||
236 | $p2->isa('PPI::Token::Whitespace') | ||||
237 | and | ||||
238 | $p2->content eq '' | ||||
239 | ) | ||||
240 | ) | ||||
241 | ) { | ||||
242 | # This is a sub prototype | ||||
243 | return 'Prototype'; | ||||
244 | } | ||||
245 | |||||
246 | # An prototyped anonymous subroutine | ||||
247 | 3157 | 911µs | my $p0 = $tokens->[0]; | ||
248 | 3157 | 22.2ms | 6110 | 6.81ms | if ( $p0->isa('PPI::Token::Word') and $p0->content eq 'sub' # spent 4.52ms making 2953 calls to PPI::Token::content, avg 2µs/call
# spent 2.29ms making 3157 calls to UNIVERSAL::isa, avg 726ns/call |
249 | # Maybe it's invoking a method named 'sub' | ||||
250 | and not ( $p1 and $p1->isa('PPI::Token::Operator') and $p1->content eq '->') | ||||
251 | ) { | ||||
252 | return 'Prototype'; | ||||
253 | } | ||||
254 | } | ||||
255 | |||||
256 | # This is a normal open bracket | ||||
257 | 3157 | 7.89ms | return 'Structure'; | ||
258 | |||||
259 | } elsif ( $char == 60 ) { # $char eq '<' | ||||
260 | # Finalise any whitespace token... | ||||
261 | 51 | 220µs | 102 | 139µs | $t->_finalize_token if $t->{token}; # spent 105µs making 51 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call
# spent 34µs making 51 calls to PPI::Util::TRUE, avg 675ns/call |
262 | |||||
263 | # This is either "less than" or "readline quote-like" | ||||
264 | # Do some context stuff to guess which. | ||||
265 | 51 | 84µs | 51 | 577µs | my $prev = $t->_last_significant_token; # spent 577µs making 51 calls to PPI::Tokenizer::_last_significant_token, avg 11µs/call |
266 | |||||
267 | # The most common group of less-thans are used like | ||||
268 | # $foo < $bar | ||||
269 | # 1 < $bar | ||||
270 | # $#foo < $bar | ||||
271 | 51 | 252µs | 51 | 62µs | return 'Operator' if $prev->isa('PPI::Token::Symbol'); # spent 62µs making 51 calls to UNIVERSAL::isa, avg 1µs/call |
272 | 23 | 90µs | 23 | 28µs | return 'Operator' if $prev->isa('PPI::Token::Magic'); # spent 28µs making 23 calls to UNIVERSAL::isa, avg 1µs/call |
273 | 23 | 86µs | 23 | 27µs | return 'Operator' if $prev->isa('PPI::Token::Number'); # spent 27µs making 23 calls to UNIVERSAL::isa, avg 1µs/call |
274 | 20 | 83µs | 20 | 33µs | return 'Operator' if $prev->isa('PPI::Token::ArrayIndex'); # spent 33µs making 20 calls to UNIVERSAL::isa, avg 2µs/call |
275 | |||||
276 | # If it is <<... it's a here-doc instead | ||||
277 | 20 | 23µs | my $next_char = substr( $t->{line}, $t->{line_cursor} + 1, 1 ); | ||
278 | 20 | 9µs | if ( $next_char eq '<' ) { | ||
279 | return 'Operator'; | ||||
280 | } | ||||
281 | |||||
282 | # The most common group of readlines are used like | ||||
283 | # while ( <...> ) | ||||
284 | # while <>; | ||||
285 | 19 | 29µs | 19 | 27µs | my $prec = $prev->content; # spent 27µs making 19 calls to PPI::Token::content, avg 1µs/call |
286 | 19 | 69µs | 19 | 17µs | if ( $prev->isa('PPI::Token::Structure') and $prec eq '(' ) { # spent 17µs making 19 calls to UNIVERSAL::isa, avg 884ns/call |
287 | return 'QuoteLike::Readline'; | ||||
288 | } | ||||
289 | 18 | 70µs | 18 | 18µs | if ( $prev->isa('PPI::Token::Word') and $prec eq 'while' ) { # spent 18µs making 18 calls to UNIVERSAL::isa, avg 1µs/call |
290 | return 'QuoteLike::Readline'; | ||||
291 | } | ||||
292 | 18 | 83µs | 18 | 18µs | if ( $prev->isa('PPI::Token::Operator') and $prec eq '=' ) { # spent 18µs making 18 calls to UNIVERSAL::isa, avg 1µs/call |
293 | return 'QuoteLike::Readline'; | ||||
294 | } | ||||
295 | 14 | 54µs | 14 | 13µs | if ( $prev->isa('PPI::Token::Operator') and $prec eq ',' ) { # spent 13µs making 14 calls to UNIVERSAL::isa, avg 936ns/call |
296 | return 'QuoteLike::Readline'; | ||||
297 | } | ||||
298 | |||||
299 | 14 | 46µs | 14 | 10µs | if ( $prev->isa('PPI::Token::Structure') and $prec eq '}' ) { # spent 10µs making 14 calls to UNIVERSAL::isa, avg 714ns/call |
300 | # Could go either way... do a regex check | ||||
301 | # $foo->{bar} < 2; | ||||
302 | # grep { .. } <foo>; | ||||
303 | 1 | 2µs | my $line = substr( $t->{line}, $t->{line_cursor} ); | ||
304 | 1 | 25µs | 1 | 17µs | if ( $line =~ /^<(?!\d)\w+>/ ) { # spent 17µs making 1 call to PPI::Token::Whitespace::CORE:match |
305 | # Almost definitely readline | ||||
306 | return 'QuoteLike::Readline'; | ||||
307 | } | ||||
308 | } | ||||
309 | |||||
310 | # Otherwise, we guess operator, which has been the default up | ||||
311 | # until this more comprehensive section was created. | ||||
312 | 14 | 39µs | return 'Operator'; | ||
313 | |||||
314 | } elsif ( $char == 47 ) { # $char eq '/' | ||||
315 | # Finalise any whitespace token... | ||||
316 | 1 | 7µs | 2 | 3µs | $t->_finalize_token if $t->{token}; # spent 2µs making 1 call to PPI::Tokenizer::_finalize_token
# spent 900ns making 1 call to PPI::Util::TRUE |
317 | |||||
318 | # This is either a "divided by" or a "start regex" | ||||
319 | # Do some context stuff to guess ( ack ) which. | ||||
320 | # Hopefully the guess will be good enough. | ||||
321 | 1 | 2µs | 1 | 11µs | my $prev = $t->_last_significant_token; # spent 11µs making 1 call to PPI::Tokenizer::_last_significant_token |
322 | 1 | 1µs | 1 | 2µs | my $prec = $prev->content; # spent 2µs making 1 call to PPI::Token::content |
323 | |||||
324 | # Most times following an operator, we are a regex. | ||||
325 | # This includes cases such as: | ||||
326 | # , - As an argument in a list | ||||
327 | # .. - The second condition in a flip flop | ||||
328 | # =~ - A bound regex | ||||
329 | # !~ - Ditto | ||||
330 | 1 | 12µs | 1 | 1µs | return 'Regexp::Match' if $prev->isa('PPI::Token::Operator'); # spent 1µs making 1 call to UNIVERSAL::isa |
331 | |||||
332 | # After a symbol | ||||
333 | 1 | 12µs | 1 | 1µs | return 'Operator' if $prev->isa('PPI::Token::Symbol'); # spent 1µs making 1 call to UNIVERSAL::isa |
334 | 1 | 100ns | if ( $prec eq ']' and $prev->isa('PPI::Token::Structure') ) { | ||
335 | return 'Operator'; | ||||
336 | } | ||||
337 | |||||
338 | # After another number | ||||
339 | 1 | 5µs | 1 | 1µs | return 'Operator' if $prev->isa('PPI::Token::Number'); # spent 1µs making 1 call to UNIVERSAL::isa |
340 | |||||
341 | # After going into scope/brackets | ||||
342 | 1 | 11µs | 1 | 900ns | if ( # spent 900ns making 1 call to UNIVERSAL::isa |
343 | $prev->isa('PPI::Token::Structure') | ||||
344 | and ( | ||||
345 | $prec eq '(' | ||||
346 | or | ||||
347 | $prec eq '{' | ||||
348 | or | ||||
349 | $prec eq ';' | ||||
350 | ) | ||||
351 | ) { | ||||
352 | return 'Regexp::Match'; | ||||
353 | } | ||||
354 | |||||
355 | # Functions and keywords | ||||
356 | 1 | 8µs | 1 | 1µs | if ( # spent 1µs making 1 call to UNIVERSAL::isa |
357 | $MATCHWORD{$prec} | ||||
358 | and | ||||
359 | $prev->isa('PPI::Token::Word') | ||||
360 | ) { | ||||
361 | return 'Regexp::Match'; | ||||
362 | } | ||||
363 | |||||
364 | # Or as the very first thing in a file | ||||
365 | return 'Regexp::Match' if $prec eq ''; | ||||
366 | |||||
367 | # What about the char after the slash? There's some things | ||||
368 | # that would be highly illogical to see if its an operator. | ||||
369 | my $next_char = substr $t->{line}, $t->{line_cursor} + 1, 1; | ||||
370 | if ( defined $next_char and length $next_char ) { | ||||
371 | if ( $next_char =~ /(?:\^|\[|\\)/ ) { | ||||
372 | return 'Regexp::Match'; | ||||
373 | } | ||||
374 | } | ||||
375 | |||||
376 | # Otherwise... erm... assume operator? | ||||
377 | # Add more tests here as potential cases come to light | ||||
378 | return 'Operator'; | ||||
379 | |||||
380 | } elsif ( $char == 120 ) { # $char eq 'x' | ||||
381 | # Handle an arcane special case where "string"x10 means the x is an operator. | ||||
382 | # String in this case means ::Single, ::Double or ::Execute, or the operator versions or same. | ||||
383 | 2 | 2µs | my $nextchar = substr $t->{line}, $t->{line_cursor} + 1, 1; | ||
384 | 2 | 2µs | 2 | 13µs | my $prev = $t->_previous_significant_tokens(1); # spent 13µs making 2 calls to PPI::Tokenizer::_previous_significant_tokens, avg 6µs/call |
385 | 2 | 2µs | $prev = ref $prev->[0]; | ||
386 | 2 | 6µs | 2 | 1µs | if ( $nextchar =~ /\d/ and $prev ) { # spent 1µs making 2 calls to PPI::Token::Whitespace::CORE:match, avg 500ns/call |
387 | if ( $prev =~ /::Quote::(?:Operator)?(?:Single|Double|Execute)$/ ) { | ||||
388 | return 'Operator'; | ||||
389 | } | ||||
390 | } | ||||
391 | |||||
392 | # Otherwise, commit like a normal bareword | ||||
393 | 2 | 7µs | 2 | 127µs | return PPI::Token::Word->__TOKENIZER__commit($t); # spent 127µs making 2 calls to PPI::Token::Word::__TOKENIZER__commit, avg 63µs/call |
394 | |||||
395 | } elsif ( $char == 45 ) { # $char eq '-' | ||||
396 | # Look for an obvious operator operand context | ||||
397 | 1866 | 2.78ms | 1866 | 34.6ms | my $context = $t->_opcontext; # spent 34.6ms making 1866 calls to PPI::Tokenizer::_opcontext, avg 19µs/call |
398 | 1866 | 4.16ms | if ( $context eq 'operator' ) { | ||
399 | return 'Operator'; | ||||
400 | } else { | ||||
401 | # More logic needed | ||||
402 | 119 | 245µs | return 'Unknown'; | ||
403 | } | ||||
404 | |||||
405 | } elsif ( $char >= 128 ) { # Outside ASCII | ||||
406 | return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $t =~ /\w/; | ||||
407 | return 'Whitespace' if $t =~ /\s/; | ||||
408 | } | ||||
409 | |||||
410 | |||||
411 | # All the whitespaces are covered, so what to do | ||||
412 | ### For now, die | ||||
413 | PPI::Exception->throw("Encountered unexpected character '$char'"); | ||||
414 | } | ||||
415 | |||||
416 | # spent 94.4ms (67.0+27.4) within PPI::Token::Whitespace::__TOKENIZER__on_line_end which was called 9549 times, avg 10µs/call:
# 9549 times (67.0ms+27.4ms) by PPI::Tokenizer::_process_next_line at line 518 of PPI/Tokenizer.pm, avg 10µs/call | ||||
417 | 9549 | 73.6ms | 19098 | 27.4ms | $_[1]->_finalize_token if $_[1]->{token}; # spent 20.9ms making 9549 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call
# spent 6.50ms making 9549 calls to PPI::Util::TRUE, avg 681ns/call |
418 | } | ||||
419 | |||||
420 | 1 | 2µs | 1; | ||
421 | |||||
422 | =pod | ||||
423 | |||||
424 | =head1 SUPPORT | ||||
425 | |||||
426 | See the L<support section|PPI/SUPPORT> in the main module. | ||||
427 | |||||
428 | =head1 AUTHOR | ||||
429 | |||||
430 | Adam Kennedy E<lt>adamk@cpan.orgE<gt> | ||||
431 | |||||
432 | =head1 COPYRIGHT | ||||
433 | |||||
434 | Copyright 2001 - 2011 Adam Kennedy. | ||||
435 | |||||
436 | This program is free software; you can redistribute | ||||
437 | it and/or modify it under the same terms as Perl itself. | ||||
438 | |||||
439 | The full text of the license can be found in the | ||||
440 | LICENSE file included with this module. | ||||
441 | |||||
442 | =cut | ||||
# spent 28.0ms within PPI::Token::Whitespace::CORE:match which was called 45841 times, avg 611ns/call:
# 45838 times (28.0ms+0s) by PPI::Token::Whitespace::__TOKENIZER__on_line_start at line 157, avg 611ns/call
# 2 times (1µs+0s) by PPI::Token::Whitespace::__TOKENIZER__on_char at line 386, avg 500ns/call
# once (17µs+0s) by PPI::Token::Whitespace::__TOKENIZER__on_char at line 304 |