Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPI/Token/BOM.pm |
Statements | Executed 730 statements in 2.59ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
144 | 1 | 1 | 2.16ms | 4.66ms | __TOKENIZER__on_line_start | PPI::Token::BOM::
144 | 1 | 1 | 260µs | 260µs | CORE:match (opcode) | PPI::Token::BOM::
1 | 1 | 1 | 14µs | 26µs | BEGIN@42 | PPI::Token::BOM::
1 | 1 | 1 | 8µs | 8µs | BEGIN@46 | PPI::Token::BOM::
1 | 1 | 1 | 6µs | 33µs | BEGIN@45 | PPI::Token::BOM::
1 | 1 | 1 | 3µs | 3µs | BEGIN@43 | PPI::Token::BOM::
0 | 0 | 0 | 0s | 0s | significant | PPI::Token::BOM::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package PPI::Token::BOM; | ||||
2 | |||||
3 | =pod | ||||
4 | |||||
5 | =head1 NAME | ||||
6 | |||||
7 | PPI::Token::BOM - Tokens representing Unicode byte order marks | ||||
8 | |||||
9 | =head1 INHERITANCE | ||||
10 | |||||
11 | PPI::Token::BOM | ||||
12 | isa PPI::Token | ||||
13 | isa PPI::Element | ||||
14 | |||||
15 | =head1 DESCRIPTION | ||||
16 | |||||
17 | This is a special token in that it can only occur at the beginning of | ||||
18 | documents. If a BOM byte mark occurs elsewhere in a file, it should | ||||
19 | be treated as L<PPI::Token::Whitespace>. We recognize the byte order | ||||
20 | marks identified at this URL: | ||||
21 | L<http://www.unicode.org/faq/utf_bom.html#BOM> | ||||
22 | |||||
23 | UTF-32, big-endian 00 00 FE FF | ||||
24 | UTF-32, little-endian FF FE 00 00 | ||||
25 | UTF-16, big-endian FE FF | ||||
26 | UTF-16, little-endian FF FE | ||||
27 | UTF-8 EF BB BF | ||||
28 | |||||
29 | Note that as of this writing, PPI only has support for UTF-8 | ||||
30 | (namely, in POD and strings) and no support for UTF-16 or UTF-32. We | ||||
31 | support the BOMs of the latter two for completeness only. | ||||
32 | |||||
33 | The BOM is considered non-significant, like white space. | ||||
34 | |||||
35 | =head1 METHODS | ||||
36 | |||||
37 | There are no additional methods beyond those provided by the parent | ||||
38 | L<PPI::Token> and L<PPI::Element> classes. | ||||
39 | |||||
40 | =cut | ||||
41 | |||||
42 | 2 | 18µs | 2 | 38µs | # spent 26µs (14+12) within PPI::Token::BOM::BEGIN@42 which was called:
# once (14µs+12µs) by PPI::Token::BEGIN@38 at line 42 # spent 26µs making 1 call to PPI::Token::BOM::BEGIN@42
# spent 12µs making 1 call to strict::import |
43 | 2 | 18µs | 1 | 3µs | # spent 3µs within PPI::Token::BOM::BEGIN@43 which was called:
# once (3µs+0s) by PPI::Token::BEGIN@38 at line 43 # spent 3µs making 1 call to PPI::Token::BOM::BEGIN@43 |
44 | |||||
45 | 2 | 27µs | 2 | 60µs | # spent 33µs (6+27) within PPI::Token::BOM::BEGIN@45 which was called:
# once (6µs+27µs) by PPI::Token::BEGIN@38 at line 45 # spent 33µs making 1 call to PPI::Token::BOM::BEGIN@45
# spent 27µs making 1 call to vars::import |
46 | # spent 8µs within PPI::Token::BOM::BEGIN@46 which was called:
# once (8µs+0s) by PPI::Token::BEGIN@38 at line 49 | ||||
47 | 1 | 400ns | $VERSION = '1.215'; | ||
48 | 1 | 8µs | @ISA = 'PPI::Token'; | ||
49 | 1 | 181µs | 1 | 8µs | } # spent 8µs making 1 call to PPI::Token::BOM::BEGIN@46 |
50 | |||||
51 | sub significant { '' } | ||||
52 | |||||
- - | |||||
57 | ##################################################################### | ||||
58 | # Parsing Methods | ||||
59 | |||||
60 | 1 | 3µs | my %bom_types = ( | ||
61 | "\x00\x00\xfe\xff" => 'UTF-32', | ||||
62 | "\xff\xfe\x00\x00" => 'UTF-32', | ||||
63 | "\xfe\xff" => 'UTF-16', | ||||
64 | "\xff\xfe" => 'UTF-16', | ||||
65 | "\xef\xbb\xbf" => 'UTF-8', | ||||
66 | ); | ||||
67 | |||||
68 | # spent 4.66ms (2.16+2.50) within PPI::Token::BOM::__TOKENIZER__on_line_start which was called 144 times, avg 32µs/call:
# 144 times (2.16ms+2.50ms) by PPI::Tokenizer::_process_next_line at line 499 of PPI/Tokenizer.pm, avg 32µs/call | ||||
69 | 144 | 56µs | my $t = $_[1]; | ||
70 | 144 | 110µs | $_ = $t->{line}; | ||
71 | |||||
72 | 144 | 715µs | 144 | 260µs | if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian # spent 260µs making 144 calls to PPI::Token::BOM::CORE:match, avg 2µs/call |
73 | \xff\xfe\x00\x00 | # UTF-32, little-endian | ||||
74 | \xfe\xff | # UTF-16, big-endian | ||||
75 | \xff\xfe | # UTF-16, little-endian | ||||
76 | \xef\xbb\xbf) # UTF-8 | ||||
77 | /xs) { | ||||
78 | my $bom = $1; | ||||
79 | |||||
80 | if ($bom_types{$bom} ne 'UTF-8') { | ||||
81 | return $t->_error("$bom_types{$bom} is not supported"); | ||||
82 | } | ||||
83 | |||||
84 | $t->_new_token('BOM', $bom) or return undef; | ||||
85 | $t->{line_cursor} += length $bom; | ||||
86 | } | ||||
87 | |||||
88 | # Continue just as if there was no BOM | ||||
89 | 144 | 339µs | $t->{class} = 'PPI::Token::Whitespace'; | ||
90 | 144 | 1.11ms | 144 | 2.24ms | return $t->{class}->__TOKENIZER__on_line_start($t); # spent 2.24ms making 144 calls to PPI::Token::Whitespace::__TOKENIZER__on_line_start, avg 16µs/call |
91 | } | ||||
92 | |||||
93 | 1 | 4µs | 1; | ||
94 | |||||
95 | =pod | ||||
96 | |||||
97 | =head1 SUPPORT | ||||
98 | |||||
99 | See the L<support section|PPI/SUPPORT> in the main module | ||||
100 | |||||
101 | =head1 AUTHOR | ||||
102 | |||||
103 | Chris Dolan E<lt>cdolan@cpan.orgE<gt> | ||||
104 | |||||
105 | =head1 COPYRIGHT | ||||
106 | |||||
107 | Copyright 2001 - 2011 Adam Kennedy. | ||||
108 | |||||
109 | This program is free software; you can redistribute | ||||
110 | it and/or modify it under the same terms as Perl itself. | ||||
111 | |||||
112 | The full text of the license can be found in the | ||||
113 | LICENSE file included with this module. | ||||
114 | |||||
115 | =cut | ||||
# spent 260µs within PPI::Token::BOM::CORE:match which was called 144 times, avg 2µs/call:
# 144 times (260µs+0s) by PPI::Token::BOM::__TOKENIZER__on_line_start at line 72, avg 2µs/call |