| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPI/Token/BOM.pm |
| Statements | Executed 730 statements in 2.59ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 144 | 1 | 1 | 2.16ms | 4.66ms | PPI::Token::BOM::__TOKENIZER__on_line_start |
| 144 | 1 | 1 | 260µs | 260µs | PPI::Token::BOM::CORE:match (opcode) |
| 1 | 1 | 1 | 14µs | 26µs | PPI::Token::BOM::BEGIN@42 |
| 1 | 1 | 1 | 8µs | 8µs | PPI::Token::BOM::BEGIN@46 |
| 1 | 1 | 1 | 6µs | 33µs | PPI::Token::BOM::BEGIN@45 |
| 1 | 1 | 1 | 3µs | 3µs | PPI::Token::BOM::BEGIN@43 |
| 0 | 0 | 0 | 0s | 0s | PPI::Token::BOM::significant |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package PPI::Token::BOM; | ||||
| 2 | |||||
| 3 | =pod | ||||
| 4 | |||||
| 5 | =head1 NAME | ||||
| 6 | |||||
| 7 | PPI::Token::BOM - Tokens representing Unicode byte order marks | ||||
| 8 | |||||
| 9 | =head1 INHERITANCE | ||||
| 10 | |||||
| 11 | PPI::Token::BOM | ||||
| 12 | isa PPI::Token | ||||
| 13 | isa PPI::Element | ||||
| 14 | |||||
| 15 | =head1 DESCRIPTION | ||||
| 16 | |||||
| 17 | This is a special token in that it can only occur at the beginning of | ||||
| 18 | documents. If a BOM byte mark occurs elsewhere in a file, it should | ||||
| 19 | be treated as L<PPI::Token::Whitespace>. We recognize the byte order | ||||
| 20 | marks identified at this URL: | ||||
| 21 | L<http://www.unicode.org/faq/utf_bom.html#BOM> | ||||
| 22 | |||||
| 23 | UTF-32, big-endian 00 00 FE FF | ||||
| 24 | UTF-32, little-endian FF FE 00 00 | ||||
| 25 | UTF-16, big-endian FE FF | ||||
| 26 | UTF-16, little-endian FF FE | ||||
| 27 | UTF-8 EF BB BF | ||||
| 28 | |||||
| 29 | Note that as of this writing, PPI only has support for UTF-8 | ||||
| 30 | (namely, in POD and strings) and no support for UTF-16 or UTF-32. We | ||||
| 31 | support the BOMs of the latter two for completeness only. | ||||
| 32 | |||||
| 33 | The BOM is considered non-significant, like white space. | ||||
| 34 | |||||
| 35 | =head1 METHODS | ||||
| 36 | |||||
| 37 | There are no additional methods beyond those provided by the parent | ||||
| 38 | L<PPI::Token> and L<PPI::Element> classes. | ||||
| 39 | |||||
| 40 | =cut | ||||
| 41 | |||||
| 42 | 2 | 18µs | 2 | 38µs | # spent 26µs (14+12) within PPI::Token::BOM::BEGIN@42 which was called:
# once (14µs+12µs) by PPI::Token::BEGIN@38 at line 42 # spent 26µs making 1 call to PPI::Token::BOM::BEGIN@42
# spent 12µs making 1 call to strict::import |
| 43 | 2 | 18µs | 1 | 3µs | # spent 3µs within PPI::Token::BOM::BEGIN@43 which was called:
# once (3µs+0s) by PPI::Token::BEGIN@38 at line 43 # spent 3µs making 1 call to PPI::Token::BOM::BEGIN@43 |
| 44 | |||||
| 45 | 2 | 27µs | 2 | 60µs | # spent 33µs (6+27) within PPI::Token::BOM::BEGIN@45 which was called:
# once (6µs+27µs) by PPI::Token::BEGIN@38 at line 45 # spent 33µs making 1 call to PPI::Token::BOM::BEGIN@45
# spent 27µs making 1 call to vars::import |
| 46 | # spent 8µs within PPI::Token::BOM::BEGIN@46 which was called:
# once (8µs+0s) by PPI::Token::BEGIN@38 at line 49 | ||||
| 47 | 1 | 400ns | $VERSION = '1.215'; | ||
| 48 | 1 | 8µs | @ISA = 'PPI::Token'; | ||
| 49 | 1 | 181µs | 1 | 8µs | } # spent 8µs making 1 call to PPI::Token::BOM::BEGIN@46 |
| 50 | |||||
| 51 | sub significant { '' } | ||||
| 52 | |||||
| - - | |||||
| 57 | ##################################################################### | ||||
| 58 | # Parsing Methods | ||||
| 59 | |||||
| 60 | 1 | 3µs | my %bom_types = ( | ||
| 61 | "\x00\x00\xfe\xff" => 'UTF-32', | ||||
| 62 | "\xff\xfe\x00\x00" => 'UTF-32', | ||||
| 63 | "\xfe\xff" => 'UTF-16', | ||||
| 64 | "\xff\xfe" => 'UTF-16', | ||||
| 65 | "\xef\xbb\xbf" => 'UTF-8', | ||||
| 66 | ); | ||||
| 67 | |||||
| 68 | # spent 4.66ms (2.16+2.50) within PPI::Token::BOM::__TOKENIZER__on_line_start which was called 144 times, avg 32µs/call:
# 144 times (2.16ms+2.50ms) by PPI::Tokenizer::_process_next_line at line 499 of PPI/Tokenizer.pm, avg 32µs/call | ||||
| 69 | 144 | 56µs | my $t = $_[1]; | ||
| 70 | 144 | 110µs | $_ = $t->{line}; | ||
| 71 | |||||
| 72 | 144 | 715µs | 144 | 260µs | if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian # spent 260µs making 144 calls to PPI::Token::BOM::CORE:match, avg 2µs/call |
| 73 | \xff\xfe\x00\x00 | # UTF-32, little-endian | ||||
| 74 | \xfe\xff | # UTF-16, big-endian | ||||
| 75 | \xff\xfe | # UTF-16, little-endian | ||||
| 76 | \xef\xbb\xbf) # UTF-8 | ||||
| 77 | /xs) { | ||||
| 78 | my $bom = $1; | ||||
| 79 | |||||
| 80 | if ($bom_types{$bom} ne 'UTF-8') { | ||||
| 81 | return $t->_error("$bom_types{$bom} is not supported"); | ||||
| 82 | } | ||||
| 83 | |||||
| 84 | $t->_new_token('BOM', $bom) or return undef; | ||||
| 85 | $t->{line_cursor} += length $bom; | ||||
| 86 | } | ||||
| 87 | |||||
| 88 | # Continue just as if there was no BOM | ||||
| 89 | 144 | 339µs | $t->{class} = 'PPI::Token::Whitespace'; | ||
| 90 | 144 | 1.11ms | 144 | 2.24ms | return $t->{class}->__TOKENIZER__on_line_start($t); # spent 2.24ms making 144 calls to PPI::Token::Whitespace::__TOKENIZER__on_line_start, avg 16µs/call |
| 91 | } | ||||
| 92 | |||||
| 93 | 1 | 4µs | 1; | ||
| 94 | |||||
| 95 | =pod | ||||
| 96 | |||||
| 97 | =head1 SUPPORT | ||||
| 98 | |||||
| 99 | See the L<support section|PPI/SUPPORT> in the main module | ||||
| 100 | |||||
| 101 | =head1 AUTHOR | ||||
| 102 | |||||
| 103 | Chris Dolan E<lt>cdolan@cpan.orgE<gt> | ||||
| 104 | |||||
| 105 | =head1 COPYRIGHT | ||||
| 106 | |||||
| 107 | Copyright 2001 - 2011 Adam Kennedy. | ||||
| 108 | |||||
| 109 | This program is free software; you can redistribute | ||||
| 110 | it and/or modify it under the same terms as Perl itself. | ||||
| 111 | |||||
| 112 | The full text of the license can be found in the | ||||
| 113 | LICENSE file included with this module. | ||||
| 114 | |||||
| 115 | =cut | ||||
# spent 260µs within PPI::Token::BOM::CORE:match which was called 144 times, avg 2µs/call:
# 144 times (260µs+0s) by PPI::Token::BOM::__TOKENIZER__on_line_start at line 72, avg 2µs/call |