| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPI/Token/Whitespace.pm |
| Statements | Executed 761603 statements in 1.43s |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 140043 | 17 | 13 | 482ms | 2.23s | PPI::Token::Whitespace::__TOKENIZER__on_char (recurses: max depth 1, inclusive time 164ms) |
| 15087 | 2 | 2 | 179ms | 272ms | PPI::Token::Whitespace::__TOKENIZER__on_line_start |
| 115422 | 17 | 6 | 119ms | 119ms | PPI::Token::Whitespace::significant |
| 9549 | 1 | 1 | 67.0ms | 94.4ms | PPI::Token::Whitespace::__TOKENIZER__on_line_end |
| 45841 | 3 | 1 | 28.0ms | 28.0ms | PPI::Token::Whitespace::CORE:match (opcode) |
| 144 | 1 | 1 | 1.60ms | 3.03ms | PPI::Token::Whitespace::null |
| 1 | 1 | 1 | 59µs | 59µs | PPI::Token::Whitespace::BEGIN@114 |
| 1 | 1 | 1 | 12µs | 24µs | PPI::Token::Whitespace::BEGIN@44 |
| 1 | 1 | 1 | 8µs | 8µs | PPI::Token::Whitespace::BEGIN@49 |
| 1 | 1 | 1 | 8µs | 47µs | PPI::Token::Whitespace::BEGIN@113 |
| 1 | 1 | 1 | 7µs | 42µs | PPI::Token::Whitespace::BEGIN@48 |
| 1 | 1 | 1 | 3µs | 3µs | PPI::Token::Whitespace::BEGIN@45 |
| 1 | 1 | 1 | 3µs | 3µs | PPI::Token::Whitespace::BEGIN@46 |
| 0 | 0 | 0 | 0s | 0s | PPI::Token::Whitespace::tidy |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package PPI::Token::Whitespace; | ||||
| 2 | |||||
| 3 | =pod | ||||
| 4 | |||||
| 5 | =head1 NAME | ||||
| 6 | |||||
| 7 | PPI::Token::Whitespace - Tokens representing ordinary white space | ||||
| 8 | |||||
| 9 | =head1 INHERITANCE | ||||
| 10 | |||||
| 11 | PPI::Token::Whitespace | ||||
| 12 | isa PPI::Token | ||||
| 13 | isa PPI::Element | ||||
| 14 | |||||
| 15 | =head1 DESCRIPTION | ||||
| 16 | |||||
| 17 | As a full "round-trip" parser, PPI records every last byte in a | ||||
| 18 | file and ensure that it is included in the L<PPI::Document> object. | ||||
| 19 | |||||
| 20 | This even includes whitespace. In fact, Perl documents are seen | ||||
| 21 | as "floating in a sea of whitespace", and thus any document will | ||||
| 22 | contain vast quantities of C<PPI::Token::Whitespace> objects. | ||||
| 23 | |||||
| 24 | For the most part, you shouldn't notice them. Or at least, you | ||||
| 25 | shouldn't B<have> to notice them. | ||||
| 26 | |||||
| 27 | This means doing things like consistently using the "S for significant" | ||||
| 28 | series of L<PPI::Node> and L<PPI::Element> methods to do things. | ||||
| 29 | |||||
| 30 | If you want the nth child element, you should be using C<schild> rather | ||||
| 31 | than C<child>, and likewise C<snext_sibling>, C<sprevious_sibling>, and | ||||
| 32 | so on and so forth. | ||||
| 33 | |||||
| 34 | =head1 METHODS | ||||
| 35 | |||||
| 36 | Again, for the most part you should really B<not> need to do anything | ||||
| 37 | very significant with whitespace. | ||||
| 38 | |||||
| 39 | But there are a couple of convenience methods provided, beyond those | ||||
| 40 | provided by the parent L<PPI::Token> and L<PPI::Element> classes. | ||||
| 41 | |||||
| 42 | =cut | ||||
| 43 | |||||
| 44 | 2 | 19µs | 2 | 36µs | # spent 24µs (12+12) within PPI::Token::Whitespace::BEGIN@44 which was called:
# once (12µs+12µs) by PPI::Token::BEGIN@39 at line 44 # spent 24µs making 1 call to PPI::Token::Whitespace::BEGIN@44
# spent 12µs making 1 call to strict::import |
| 45 | 2 | 16µs | 1 | 3µs | # spent 3µs within PPI::Token::Whitespace::BEGIN@45 which was called:
# once (3µs+0s) by PPI::Token::BEGIN@39 at line 45 # spent 3µs making 1 call to PPI::Token::Whitespace::BEGIN@45 |
| 46 | 2 | 18µs | 1 | 3µs | # spent 3µs within PPI::Token::Whitespace::BEGIN@46 which was called:
# once (3µs+0s) by PPI::Token::BEGIN@39 at line 46 # spent 3µs making 1 call to PPI::Token::Whitespace::BEGIN@46 |
| 47 | |||||
| 48 | 2 | 29µs | 2 | 77µs | # spent 42µs (7+35) within PPI::Token::Whitespace::BEGIN@48 which was called:
# once (7µs+35µs) by PPI::Token::BEGIN@39 at line 48 # spent 42µs making 1 call to PPI::Token::Whitespace::BEGIN@48
# spent 35µs making 1 call to vars::import |
| 49 | # spent 8µs within PPI::Token::Whitespace::BEGIN@49 which was called:
# once (8µs+0s) by PPI::Token::BEGIN@39 at line 52 | ||||
| 50 | 1 | 400ns | $VERSION = '1.215'; | ||
| 51 | 1 | 11µs | @ISA = 'PPI::Token'; | ||
| 52 | 1 | 104µs | 1 | 8µs | } # spent 8µs making 1 call to PPI::Token::Whitespace::BEGIN@49 |
| 53 | |||||
| 54 | =pod | ||||
| 55 | |||||
| 56 | =head2 null | ||||
| 57 | |||||
| 58 | Because L<PPI> sees documents as sitting on a sort of substrate made of | ||||
| 59 | whitespace, there are a couple of corner cases that get particularly | ||||
| 60 | nasty if they don't find whitespace in certain places. | ||||
| 61 | |||||
| 62 | Imagine walking down the beach to go into the ocean, and then quite | ||||
| 63 | unexpectedly falling off the side of the planet. Well it's somewhat | ||||
| 64 | equivalent to that, including the whole screaming death bit. | ||||
| 65 | |||||
| 66 | The C<null> method is a convenience provided to get some internals | ||||
| 67 | out of some of these corner cases. | ||||
| 68 | |||||
| 69 | Specifically it create a whitespace token that represents nothing, | ||||
| 70 | or at least the null string C<''>. It's a handy way to have some | ||||
| 71 | "whitespace" right where you need it, without having to have any | ||||
| 72 | actual characters. | ||||
| 73 | |||||
| 74 | =cut | ||||
| 75 | |||||
| 76 | 1 | 300ns | my $null = undef; | ||
| 77 | |||||
| 78 | # spent 3.03ms (1.60+1.44) within PPI::Token::Whitespace::null which was called 144 times, avg 21µs/call:
# 144 times (1.60ms+1.44ms) by PPI::Tokenizer::_previous_significant_tokens at line 707 of PPI/Tokenizer.pm, avg 21µs/call | ||||
| 79 | 144 | 1.02ms | 144 | 224µs | $null ||= $_[0]->new(''); # spent 216µs making 143 calls to PPI::Util::TRUE, avg 2µs/call
# spent 8µs making 1 call to PPI::Token::new |
| 80 | 144 | 2.17ms | 144 | 1.21ms | Clone::clone($null); # spent 1.21ms making 144 calls to Clone::clone, avg 8µs/call |
| 81 | } | ||||
| 82 | |||||
| 83 | ### XS -> PPI/XS.xs:_PPI_Token_Whitespace__significant 0.900+ | ||||
| 84 | 115422 | 368ms | # spent 119ms within PPI::Token::Whitespace::significant which was called 115422 times, avg 1µs/call:
# 27638 times (29.5ms+0s) by PPI::Lexer::_lex_statement at line 608 of PPI/Lexer.pm, avg 1µs/call
# 25219 times (26.7ms+0s) by PPI::Element::snext_sibling at line 370 of PPI/Element.pm, avg 1µs/call
# 14524 times (15.5ms+0s) by PPI::Lexer::_lex_structure at line 1321 of PPI/Lexer.pm, avg 1µs/call
# 13592 times (13.8ms+0s) by PPI::Tokenizer::_previous_significant_tokens at line 699 of PPI/Tokenizer.pm, avg 1µs/call
# 9921 times (10.0ms+0s) by PPI::Node::schild at line 282 of PPI/Node.pm, avg 1µs/call
# 5893 times (5.53ms+0s) by PPI::Node::schildren at line 232 of PPI/Node.pm, avg 939ns/call
# 5891 times (5.49ms+0s) by PPI::Node::schildren at line 229 of PPI/Node.pm, avg 933ns/call
# 5248 times (5.48ms+0s) by PPI::Lexer::_lex_document at line 272 of PPI/Lexer.pm, avg 1µs/call
# 4232 times (3.87ms+0s) by PPI::Statement::Variable::type at line 67 of PPI/Statement/Variable.pm, avg 915ns/call
# 1022 times (969µs+0s) by PPI::Lexer::_statement at line 501 of PPI/Lexer.pm, avg 948ns/call
# 981 times (1.10ms+0s) by PPI::Lexer::_statement at line 543 of PPI/Lexer.pm, avg 1µs/call
# 385 times (386µs+0s) by PPI::Node::schild at line 277 of PPI/Node.pm, avg 1µs/call
# 270 times (309µs+0s) by PPI::Element::sprevious_sibling at line 418 of PPI/Element.pm, avg 1µs/call
# 205 times (209µs+0s) by Perl::Critic::Policy::ControlStructures::ProhibitMutatingListFunctions::_has_topic_side_effect at line 121 of Perl/Critic/Policy/ControlStructures/ProhibitMutatingListFunctions.pm, avg 1µs/call
# 190 times (202µs+0s) by PPI::Lexer::_curly at line 1264 of PPI/Lexer.pm, avg 1µs/call
# 159 times (161µs+0s) by PPI::Lexer::_statement at line 430 of PPI/Lexer.pm, avg 1µs/call
# 52 times (54µs+0s) by PPI::Tokenizer::_last_significant_token at line 680 of PPI/Tokenizer.pm, avg 1µs/call | ||
| 85 | |||||
| 86 | =pod | ||||
| 87 | |||||
| 88 | =head2 tidy | ||||
| 89 | |||||
| 90 | C<tidy> is a convenience method for removing unneeded whitespace. | ||||
| 91 | |||||
| 92 | Specifically, it removes any whitespace from the end of a line. | ||||
| 93 | |||||
| 94 | Note that this B<doesn't> include POD, where you may well need | ||||
| 95 | to keep certain types of whitespace. The entire POD chunk lives | ||||
| 96 | in its own L<PPI::Token::Pod> object. | ||||
| 97 | |||||
| 98 | =cut | ||||
| 99 | |||||
| 100 | sub tidy { | ||||
| 101 | $_[0]->{content} =~ s/^\s+?(?>\n)//; | ||||
| 102 | 1; | ||||
| 103 | } | ||||
| 104 | |||||
| - - | |||||
| 109 | ##################################################################### | ||||
| 110 | # Parsing Methods | ||||
| 111 | |||||
| 112 | # Build the class and commit maps | ||||
| 113 | 2 | 208µs | 2 | 86µs | # spent 47µs (8+40) within PPI::Token::Whitespace::BEGIN@113 which was called:
# once (8µs+40µs) by PPI::Token::BEGIN@39 at line 113 # spent 47µs making 1 call to PPI::Token::Whitespace::BEGIN@113
# spent 40µs making 1 call to vars::import |
| 114 | # spent 59µs within PPI::Token::Whitespace::BEGIN@114 which was called:
# once (59µs+0s) by PPI::Token::BEGIN@39 at line 150 | ||||
| 115 | 1 | 400ns | @CLASSMAP = (); | ||
| 116 | 1 | 100ns | @COMMITMAP = (); | ||
| 117 | 1 | 900ns | foreach ( | ||
| 118 | 'a' .. 'u', 'w', 'y', 'z', 'A' .. 'Z', '_' | ||||
| 119 | ) { | ||||
| 120 | 51 | 19µs | $COMMITMAP[ord $_] = 'PPI::Token::Word'; | ||
| 121 | } | ||||
| 122 | 7 | 2µs | foreach ( qw!; [ ] { } )! ) { $COMMITMAP[ord $_] = 'PPI::Token::Structure' } | ||
| 123 | 11 | 5µs | foreach ( 0 .. 9 ) { $CLASSMAP[ord $_] = 'Number' } | ||
| 124 | 10 | 3µs | foreach ( qw{= ? | + > . ! ~ ^} ) { $CLASSMAP[ord $_] = 'Operator' } | ||
| 125 | 7 | 2µs | foreach ( qw{* $ @ & : %} ) { $CLASSMAP[ord $_] = 'Unknown' } | ||
| 126 | |||||
| 127 | # Miscellaneous remainder | ||||
| 128 | 1 | 300ns | $COMMITMAP[ord '#'] = 'PPI::Token::Comment'; | ||
| 129 | 1 | 200ns | $COMMITMAP[ord 'v'] = 'PPI::Token::Number::Version'; | ||
| 130 | 1 | 200ns | $CLASSMAP[ord ','] = 'PPI::Token::Operator'; | ||
| 131 | 1 | 100ns | $CLASSMAP[ord "'"] = 'Quote::Single'; | ||
| 132 | 1 | 200ns | $CLASSMAP[ord '"'] = 'Quote::Double'; | ||
| 133 | 1 | 200ns | $CLASSMAP[ord '`'] = 'QuoteLike::Backtick'; | ||
| 134 | 1 | 200ns | $CLASSMAP[ord '\\'] = 'Cast'; | ||
| 135 | 1 | 100ns | $CLASSMAP[ord '_'] = 'Word'; | ||
| 136 | 1 | 200ns | $CLASSMAP[9] = 'Whitespace'; # A horizontal tab | ||
| 137 | 1 | 200ns | $CLASSMAP[10] = 'Whitespace'; # A newline | ||
| 138 | 1 | 100ns | $CLASSMAP[13] = 'Whitespace'; # A carriage return | ||
| 139 | 1 | 100ns | $CLASSMAP[32] = 'Whitespace'; # A normal space | ||
| 140 | |||||
| 141 | # Words (functions and keywords) after which a following / is | ||||
| 142 | # almost certainly going to be a regex | ||||
| 143 | 1 | 10µs | %MATCHWORD = map { $_ => 1 } qw{ | ||
| 144 | split | ||||
| 145 | if | ||||
| 146 | unless | ||||
| 147 | grep | ||||
| 148 | map | ||||
| 149 | }; | ||||
| 150 | 1 | 778µs | 1 | 59µs | } # spent 59µs making 1 call to PPI::Token::Whitespace::BEGIN@114 |
| 151 | |||||
| 152 | # spent 272ms (179+92.9) within PPI::Token::Whitespace::__TOKENIZER__on_line_start which was called 15087 times, avg 18µs/call:
# 14943 times (177ms+92.5ms) by PPI::Tokenizer::_process_next_line at line 499 of PPI/Tokenizer.pm, avg 18µs/call
# 144 times (1.80ms+440µs) by PPI::Token::BOM::__TOKENIZER__on_line_start at line 90 of PPI/Token/BOM.pm, avg 16µs/call | ||||
| 153 | 15087 | 3.29ms | my $t = $_[1]; | ||
| 154 | 15087 | 3.81ms | my $line = $t->{line}; | ||
| 155 | |||||
| 156 | # Can we classify the entire line in one go | ||||
| 157 | 15087 | 147ms | 45838 | 28.0ms | if ( $line =~ /^\s*$/ ) { # spent 28.0ms making 45838 calls to PPI::Token::Whitespace::CORE:match, avg 611ns/call |
| 158 | # A whitespace line | ||||
| 159 | 3724 | 4.52ms | 3724 | 34.9ms | $t->_new_token( 'Whitespace', $line ); # spent 34.9ms making 3724 calls to PPI::Tokenizer::_new_token, avg 9µs/call |
| 160 | 3724 | 26.0ms | return 0; | ||
| 161 | |||||
| 162 | } elsif ( $line =~ /^\s*#/ ) { | ||||
| 163 | # A comment line | ||||
| 164 | 1668 | 2.00ms | 1668 | 26.3ms | $t->_new_token( 'Comment', $line ); # spent 26.3ms making 1668 calls to PPI::Tokenizer::_new_token, avg 16µs/call |
| 165 | 1668 | 1.75ms | 1668 | 3.76ms | $t->_finalize_token; # spent 3.76ms making 1668 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call |
| 166 | 1668 | 7.48ms | return 0; | ||
| 167 | |||||
| 168 | } elsif ( $line =~ /^=(\w+)/ ) { | ||||
| 169 | # A Pod tag... change to pod mode | ||||
| 170 | 2 | 2µs | 2 | 24µs | $t->_new_token( 'Pod', $line ); # spent 24µs making 2 calls to PPI::Tokenizer::_new_token, avg 12µs/call |
| 171 | 2 | 2µs | if ( $1 eq 'cut' ) { | ||
| 172 | # This is an error, but one we'll ignore | ||||
| 173 | # Don't go into Pod mode, since =cut normally | ||||
| 174 | # signals the end of Pod mode | ||||
| 175 | } else { | ||||
| 176 | 2 | 1µs | $t->{class} = 'PPI::Token::Pod'; | ||
| 177 | } | ||||
| 178 | 2 | 4µs | return 0; | ||
| 179 | |||||
| 180 | } elsif ( $line =~ /^use v6\-alpha\;/ ) { | ||||
| 181 | # Indicates a Perl 6 block. Make the initial | ||||
| 182 | # implementation just suck in the entire rest of the | ||||
| 183 | # file. | ||||
| 184 | my @perl6 = (); | ||||
| 185 | while ( 1 ) { | ||||
| 186 | my $line6 = $t->_get_line; | ||||
| 187 | last unless defined $line6; | ||||
| 188 | push @perl6, $line6; | ||||
| 189 | } | ||||
| 190 | push @{ $t->{perl6} }, join '', @perl6; | ||||
| 191 | |||||
| 192 | # We only sucked in the block, we don't actially do | ||||
| 193 | # anything to the "use v6..." line. So return as if | ||||
| 194 | # we didn't find anything at all. | ||||
| 195 | return 1; | ||||
| 196 | } | ||||
| 197 | |||||
| 198 | 9693 | 43.2ms | 1; | ||
| 199 | } | ||||
| 200 | |||||
| 201 | # spent 2.23s (482ms+1.75) within PPI::Token::Whitespace::__TOKENIZER__on_char which was called 140043 times, avg 16µs/call:
# 106218 times (325ms+1.54s) by PPI::Tokenizer::_process_next_char at line 554 of PPI/Tokenizer.pm, avg 18µs/call
# 14291 times (79.0ms+-79.0ms) by PPI::Token::Word::__TOKENIZER__commit at line 539 of PPI/Token/Word.pm, avg 0s/call
# 7437 times (24.5ms+194ms) by PPI::Token::Operator::__TOKENIZER__on_char at line 112 of PPI/Token/Operator.pm, avg 29µs/call
# 7245 times (38.3ms+50.2ms) by PPI::Token::Symbol::__TOKENIZER__on_char at line 216 of PPI/Token/Symbol.pm, avg 12µs/call
# 3157 times (9.34ms+18.3ms) by PPI::Token::Structure::__TOKENIZER__on_char at line 70 of PPI/Token/Structure.pm, avg 9µs/call
# 832 times (2.83ms+8.42ms) by PPI::Token::Number::__TOKENIZER__on_char at line 125 of PPI/Token/Number.pm, avg 14µs/call
# 509 times (2.10ms+7.38ms) by PPI::Token::Symbol::__TOKENIZER__on_char at line 174 of PPI/Token/Symbol.pm, avg 19µs/call
# 148 times (689µs+6.10ms) by PPI::Token::Number::Float::__TOKENIZER__on_char at line 108 of PPI/Token/Number/Float.pm, avg 46µs/call
# 85 times (360µs+1.43ms) by PPI::Token::Unknown::__TOKENIZER__on_char at line 179 of PPI/Token/Unknown.pm, avg 21µs/call
# 61 times (137µs+0s) by PPI::Token::Cast::__TOKENIZER__on_char at line 51 of PPI/Token/Cast.pm, avg 2µs/call
# 30 times (101µs+217µs) by PPI::Token::Magic::__TOKENIZER__on_char at line 228 of PPI/Token/Magic.pm, avg 11µs/call
# 22 times (87µs+343µs) by PPI::Token::Unknown::__TOKENIZER__on_char at line 216 of PPI/Token/Unknown.pm, avg 20µs/call
# 3 times (9µs+17µs) by PPI::Token::ArrayIndex::__TOKENIZER__on_char at line 56 of PPI/Token/ArrayIndex.pm, avg 8µs/call
# 2 times (4µs+0s) by PPI::Token::DashedWord::__TOKENIZER__on_char at line 95 of PPI/Token/DashedWord.pm, avg 2µs/call
# once (4µs+15µs) by PPI::Token::Unknown::__TOKENIZER__on_char at line 150 of PPI/Token/Unknown.pm
# once (4µs+15µs) by PPI::Token::Magic::__TOKENIZER__on_char at line 170 of PPI/Token/Magic.pm
# once (2µs+0s) by PPI::Token::HereDoc::__TOKENIZER__on_char at line 218 of PPI/Token/HereDoc.pm | ||||
| 202 | 140043 | 21.6ms | my $t = $_[1]; | ||
| 203 | 140043 | 61.9ms | my $char = ord substr $t->{line}, $t->{line_cursor}, 1; | ||
| 204 | |||||
| 205 | # Do we definately know what something is? | ||||
| 206 | 140043 | 168ms | 29239 | 1.78s | return $COMMITMAP[$char]->__TOKENIZER__commit($t) if $COMMITMAP[$char]; # spent 1.42s making 15152 calls to PPI::Token::Word::__TOKENIZER__commit, avg 94µs/call
# spent 313ms making 13365 calls to PPI::Token::Structure::__TOKENIZER__commit, avg 23µs/call
# spent 30.1ms making 336 calls to PPI::Token::Number::Version::__TOKENIZER__commit, avg 90µs/call
# spent 7.72ms making 242 calls to PPI::Token::Comment::__TOKENIZER__commit, avg 32µs/call
# spent 1.31ms making 144 calls to PPI::Element::DESTROY, avg 9µs/call |
| 207 | |||||
| 208 | # Handle the simple option first | ||||
| 209 | 110948 | 419ms | return $CLASSMAP[$char] if $CLASSMAP[$char]; | ||
| 210 | |||||
| 211 | 5077 | 2.14ms | if ( $char == 40 ) { # $char eq '(' | ||
| 212 | # Finalise any whitespace token... | ||||
| 213 | 3157 | 5.58ms | 2504 | 3.60ms | $t->_finalize_token if $t->{token}; # spent 2.71ms making 1252 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call
# spent 890µs making 1252 calls to PPI::Util::TRUE, avg 711ns/call |
| 214 | |||||
| 215 | # Is this the beginning of a sub prototype? | ||||
| 216 | # We are a sub prototype IF | ||||
| 217 | # 1. The previous significant token is a bareword. | ||||
| 218 | # 2. The one before that is the word 'sub'. | ||||
| 219 | # 3. The one before that is a 'structure' | ||||
| 220 | |||||
| 221 | # Get the three previous significant tokens | ||||
| 222 | 3157 | 3.74ms | 3157 | 86.2ms | my $tokens = $t->_previous_significant_tokens(3); # spent 86.2ms making 3157 calls to PPI::Tokenizer::_previous_significant_tokens, avg 27µs/call |
| 223 | 3157 | 1.22ms | if ( $tokens ) { | ||
| 224 | # A normal subroutine declaration | ||||
| 225 | 3157 | 840µs | my $p1 = $tokens->[1]; | ||
| 226 | 3157 | 452µs | my $p2 = $tokens->[2]; | ||
| 227 | 3157 | 19.7ms | 6226 | 6.96ms | if ( # spent 6.78ms making 6110 calls to UNIVERSAL::isa, avg 1µs/call
# spent 178µs making 116 calls to PPI::Token::content, avg 2µs/call |
| 228 | $tokens->[0]->isa('PPI::Token::Word') | ||||
| 229 | and | ||||
| 230 | $p1->isa('PPI::Token::Word') | ||||
| 231 | and | ||||
| 232 | $p1->content eq 'sub' | ||||
| 233 | and ( | ||||
| 234 | $p2->isa('PPI::Token::Structure') | ||||
| 235 | or ( | ||||
| 236 | $p2->isa('PPI::Token::Whitespace') | ||||
| 237 | and | ||||
| 238 | $p2->content eq '' | ||||
| 239 | ) | ||||
| 240 | ) | ||||
| 241 | ) { | ||||
| 242 | # This is a sub prototype | ||||
| 243 | return 'Prototype'; | ||||
| 244 | } | ||||
| 245 | |||||
| 246 | # An prototyped anonymous subroutine | ||||
| 247 | 3157 | 911µs | my $p0 = $tokens->[0]; | ||
| 248 | 3157 | 22.2ms | 6110 | 6.81ms | if ( $p0->isa('PPI::Token::Word') and $p0->content eq 'sub' # spent 4.52ms making 2953 calls to PPI::Token::content, avg 2µs/call
# spent 2.29ms making 3157 calls to UNIVERSAL::isa, avg 726ns/call |
| 249 | # Maybe it's invoking a method named 'sub' | ||||
| 250 | and not ( $p1 and $p1->isa('PPI::Token::Operator') and $p1->content eq '->') | ||||
| 251 | ) { | ||||
| 252 | return 'Prototype'; | ||||
| 253 | } | ||||
| 254 | } | ||||
| 255 | |||||
| 256 | # This is a normal open bracket | ||||
| 257 | 3157 | 7.89ms | return 'Structure'; | ||
| 258 | |||||
| 259 | } elsif ( $char == 60 ) { # $char eq '<' | ||||
| 260 | # Finalise any whitespace token... | ||||
| 261 | 51 | 220µs | 102 | 139µs | $t->_finalize_token if $t->{token}; # spent 105µs making 51 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call
# spent 34µs making 51 calls to PPI::Util::TRUE, avg 675ns/call |
| 262 | |||||
| 263 | # This is either "less than" or "readline quote-like" | ||||
| 264 | # Do some context stuff to guess which. | ||||
| 265 | 51 | 84µs | 51 | 577µs | my $prev = $t->_last_significant_token; # spent 577µs making 51 calls to PPI::Tokenizer::_last_significant_token, avg 11µs/call |
| 266 | |||||
| 267 | # The most common group of less-thans are used like | ||||
| 268 | # $foo < $bar | ||||
| 269 | # 1 < $bar | ||||
| 270 | # $#foo < $bar | ||||
| 271 | 51 | 252µs | 51 | 62µs | return 'Operator' if $prev->isa('PPI::Token::Symbol'); # spent 62µs making 51 calls to UNIVERSAL::isa, avg 1µs/call |
| 272 | 23 | 90µs | 23 | 28µs | return 'Operator' if $prev->isa('PPI::Token::Magic'); # spent 28µs making 23 calls to UNIVERSAL::isa, avg 1µs/call |
| 273 | 23 | 86µs | 23 | 27µs | return 'Operator' if $prev->isa('PPI::Token::Number'); # spent 27µs making 23 calls to UNIVERSAL::isa, avg 1µs/call |
| 274 | 20 | 83µs | 20 | 33µs | return 'Operator' if $prev->isa('PPI::Token::ArrayIndex'); # spent 33µs making 20 calls to UNIVERSAL::isa, avg 2µs/call |
| 275 | |||||
| 276 | # If it is <<... it's a here-doc instead | ||||
| 277 | 20 | 23µs | my $next_char = substr( $t->{line}, $t->{line_cursor} + 1, 1 ); | ||
| 278 | 20 | 9µs | if ( $next_char eq '<' ) { | ||
| 279 | return 'Operator'; | ||||
| 280 | } | ||||
| 281 | |||||
| 282 | # The most common group of readlines are used like | ||||
| 283 | # while ( <...> ) | ||||
| 284 | # while <>; | ||||
| 285 | 19 | 29µs | 19 | 27µs | my $prec = $prev->content; # spent 27µs making 19 calls to PPI::Token::content, avg 1µs/call |
| 286 | 19 | 69µs | 19 | 17µs | if ( $prev->isa('PPI::Token::Structure') and $prec eq '(' ) { # spent 17µs making 19 calls to UNIVERSAL::isa, avg 884ns/call |
| 287 | return 'QuoteLike::Readline'; | ||||
| 288 | } | ||||
| 289 | 18 | 70µs | 18 | 18µs | if ( $prev->isa('PPI::Token::Word') and $prec eq 'while' ) { # spent 18µs making 18 calls to UNIVERSAL::isa, avg 1µs/call |
| 290 | return 'QuoteLike::Readline'; | ||||
| 291 | } | ||||
| 292 | 18 | 83µs | 18 | 18µs | if ( $prev->isa('PPI::Token::Operator') and $prec eq '=' ) { # spent 18µs making 18 calls to UNIVERSAL::isa, avg 1µs/call |
| 293 | return 'QuoteLike::Readline'; | ||||
| 294 | } | ||||
| 295 | 14 | 54µs | 14 | 13µs | if ( $prev->isa('PPI::Token::Operator') and $prec eq ',' ) { # spent 13µs making 14 calls to UNIVERSAL::isa, avg 936ns/call |
| 296 | return 'QuoteLike::Readline'; | ||||
| 297 | } | ||||
| 298 | |||||
| 299 | 14 | 46µs | 14 | 10µs | if ( $prev->isa('PPI::Token::Structure') and $prec eq '}' ) { # spent 10µs making 14 calls to UNIVERSAL::isa, avg 714ns/call |
| 300 | # Could go either way... do a regex check | ||||
| 301 | # $foo->{bar} < 2; | ||||
| 302 | # grep { .. } <foo>; | ||||
| 303 | 1 | 2µs | my $line = substr( $t->{line}, $t->{line_cursor} ); | ||
| 304 | 1 | 25µs | 1 | 17µs | if ( $line =~ /^<(?!\d)\w+>/ ) { # spent 17µs making 1 call to PPI::Token::Whitespace::CORE:match |
| 305 | # Almost definitely readline | ||||
| 306 | return 'QuoteLike::Readline'; | ||||
| 307 | } | ||||
| 308 | } | ||||
| 309 | |||||
| 310 | # Otherwise, we guess operator, which has been the default up | ||||
| 311 | # until this more comprehensive section was created. | ||||
| 312 | 14 | 39µs | return 'Operator'; | ||
| 313 | |||||
| 314 | } elsif ( $char == 47 ) { # $char eq '/' | ||||
| 315 | # Finalise any whitespace token... | ||||
| 316 | 1 | 7µs | 2 | 3µs | $t->_finalize_token if $t->{token}; # spent 2µs making 1 call to PPI::Tokenizer::_finalize_token
# spent 900ns making 1 call to PPI::Util::TRUE |
| 317 | |||||
| 318 | # This is either a "divided by" or a "start regex" | ||||
| 319 | # Do some context stuff to guess ( ack ) which. | ||||
| 320 | # Hopefully the guess will be good enough. | ||||
| 321 | 1 | 2µs | 1 | 11µs | my $prev = $t->_last_significant_token; # spent 11µs making 1 call to PPI::Tokenizer::_last_significant_token |
| 322 | 1 | 1µs | 1 | 2µs | my $prec = $prev->content; # spent 2µs making 1 call to PPI::Token::content |
| 323 | |||||
| 324 | # Most times following an operator, we are a regex. | ||||
| 325 | # This includes cases such as: | ||||
| 326 | # , - As an argument in a list | ||||
| 327 | # .. - The second condition in a flip flop | ||||
| 328 | # =~ - A bound regex | ||||
| 329 | # !~ - Ditto | ||||
| 330 | 1 | 12µs | 1 | 1µs | return 'Regexp::Match' if $prev->isa('PPI::Token::Operator'); # spent 1µs making 1 call to UNIVERSAL::isa |
| 331 | |||||
| 332 | # After a symbol | ||||
| 333 | 1 | 12µs | 1 | 1µs | return 'Operator' if $prev->isa('PPI::Token::Symbol'); # spent 1µs making 1 call to UNIVERSAL::isa |
| 334 | 1 | 100ns | if ( $prec eq ']' and $prev->isa('PPI::Token::Structure') ) { | ||
| 335 | return 'Operator'; | ||||
| 336 | } | ||||
| 337 | |||||
| 338 | # After another number | ||||
| 339 | 1 | 5µs | 1 | 1µs | return 'Operator' if $prev->isa('PPI::Token::Number'); # spent 1µs making 1 call to UNIVERSAL::isa |
| 340 | |||||
| 341 | # After going into scope/brackets | ||||
| 342 | 1 | 11µs | 1 | 900ns | if ( # spent 900ns making 1 call to UNIVERSAL::isa |
| 343 | $prev->isa('PPI::Token::Structure') | ||||
| 344 | and ( | ||||
| 345 | $prec eq '(' | ||||
| 346 | or | ||||
| 347 | $prec eq '{' | ||||
| 348 | or | ||||
| 349 | $prec eq ';' | ||||
| 350 | ) | ||||
| 351 | ) { | ||||
| 352 | return 'Regexp::Match'; | ||||
| 353 | } | ||||
| 354 | |||||
| 355 | # Functions and keywords | ||||
| 356 | 1 | 8µs | 1 | 1µs | if ( # spent 1µs making 1 call to UNIVERSAL::isa |
| 357 | $MATCHWORD{$prec} | ||||
| 358 | and | ||||
| 359 | $prev->isa('PPI::Token::Word') | ||||
| 360 | ) { | ||||
| 361 | return 'Regexp::Match'; | ||||
| 362 | } | ||||
| 363 | |||||
| 364 | # Or as the very first thing in a file | ||||
| 365 | return 'Regexp::Match' if $prec eq ''; | ||||
| 366 | |||||
| 367 | # What about the char after the slash? There's some things | ||||
| 368 | # that would be highly illogical to see if its an operator. | ||||
| 369 | my $next_char = substr $t->{line}, $t->{line_cursor} + 1, 1; | ||||
| 370 | if ( defined $next_char and length $next_char ) { | ||||
| 371 | if ( $next_char =~ /(?:\^|\[|\\)/ ) { | ||||
| 372 | return 'Regexp::Match'; | ||||
| 373 | } | ||||
| 374 | } | ||||
| 375 | |||||
| 376 | # Otherwise... erm... assume operator? | ||||
| 377 | # Add more tests here as potential cases come to light | ||||
| 378 | return 'Operator'; | ||||
| 379 | |||||
| 380 | } elsif ( $char == 120 ) { # $char eq 'x' | ||||
| 381 | # Handle an arcane special case where "string"x10 means the x is an operator. | ||||
| 382 | # String in this case means ::Single, ::Double or ::Execute, or the operator versions or same. | ||||
| 383 | 2 | 2µs | my $nextchar = substr $t->{line}, $t->{line_cursor} + 1, 1; | ||
| 384 | 2 | 2µs | 2 | 13µs | my $prev = $t->_previous_significant_tokens(1); # spent 13µs making 2 calls to PPI::Tokenizer::_previous_significant_tokens, avg 6µs/call |
| 385 | 2 | 2µs | $prev = ref $prev->[0]; | ||
| 386 | 2 | 6µs | 2 | 1µs | if ( $nextchar =~ /\d/ and $prev ) { # spent 1µs making 2 calls to PPI::Token::Whitespace::CORE:match, avg 500ns/call |
| 387 | if ( $prev =~ /::Quote::(?:Operator)?(?:Single|Double|Execute)$/ ) { | ||||
| 388 | return 'Operator'; | ||||
| 389 | } | ||||
| 390 | } | ||||
| 391 | |||||
| 392 | # Otherwise, commit like a normal bareword | ||||
| 393 | 2 | 7µs | 2 | 127µs | return PPI::Token::Word->__TOKENIZER__commit($t); # spent 127µs making 2 calls to PPI::Token::Word::__TOKENIZER__commit, avg 63µs/call |
| 394 | |||||
| 395 | } elsif ( $char == 45 ) { # $char eq '-' | ||||
| 396 | # Look for an obvious operator operand context | ||||
| 397 | 1866 | 2.78ms | 1866 | 34.6ms | my $context = $t->_opcontext; # spent 34.6ms making 1866 calls to PPI::Tokenizer::_opcontext, avg 19µs/call |
| 398 | 1866 | 4.16ms | if ( $context eq 'operator' ) { | ||
| 399 | return 'Operator'; | ||||
| 400 | } else { | ||||
| 401 | # More logic needed | ||||
| 402 | 119 | 245µs | return 'Unknown'; | ||
| 403 | } | ||||
| 404 | |||||
| 405 | } elsif ( $char >= 128 ) { # Outside ASCII | ||||
| 406 | return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $t =~ /\w/; | ||||
| 407 | return 'Whitespace' if $t =~ /\s/; | ||||
| 408 | } | ||||
| 409 | |||||
| 410 | |||||
| 411 | # All the whitespaces are covered, so what to do | ||||
| 412 | ### For now, die | ||||
| 413 | PPI::Exception->throw("Encountered unexpected character '$char'"); | ||||
| 414 | } | ||||
| 415 | |||||
| 416 | # spent 94.4ms (67.0+27.4) within PPI::Token::Whitespace::__TOKENIZER__on_line_end which was called 9549 times, avg 10µs/call:
# 9549 times (67.0ms+27.4ms) by PPI::Tokenizer::_process_next_line at line 518 of PPI/Tokenizer.pm, avg 10µs/call | ||||
| 417 | 9549 | 73.6ms | 19098 | 27.4ms | $_[1]->_finalize_token if $_[1]->{token}; # spent 20.9ms making 9549 calls to PPI::Tokenizer::_finalize_token, avg 2µs/call
# spent 6.50ms making 9549 calls to PPI::Util::TRUE, avg 681ns/call |
| 418 | } | ||||
| 419 | |||||
| 420 | 1 | 2µs | 1; | ||
| 421 | |||||
| 422 | =pod | ||||
| 423 | |||||
| 424 | =head1 SUPPORT | ||||
| 425 | |||||
| 426 | See the L<support section|PPI/SUPPORT> in the main module. | ||||
| 427 | |||||
| 428 | =head1 AUTHOR | ||||
| 429 | |||||
| 430 | Adam Kennedy E<lt>adamk@cpan.orgE<gt> | ||||
| 431 | |||||
| 432 | =head1 COPYRIGHT | ||||
| 433 | |||||
| 434 | Copyright 2001 - 2011 Adam Kennedy. | ||||
| 435 | |||||
| 436 | This program is free software; you can redistribute | ||||
| 437 | it and/or modify it under the same terms as Perl itself. | ||||
| 438 | |||||
| 439 | The full text of the license can be found in the | ||||
| 440 | LICENSE file included with this module. | ||||
| 441 | |||||
| 442 | =cut | ||||
# spent 28.0ms within PPI::Token::Whitespace::CORE:match which was called 45841 times, avg 611ns/call:
# 45838 times (28.0ms+0s) by PPI::Token::Whitespace::__TOKENIZER__on_line_start at line 157, avg 611ns/call
# 2 times (1µs+0s) by PPI::Token::Whitespace::__TOKENIZER__on_char at line 386, avg 500ns/call
# once (17µs+0s) by PPI::Token::Whitespace::__TOKENIZER__on_char at line 304 |