Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/CharClass/Simple.pm |
Statements | Executed 14 statements in 541µs |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 13µs | 25µs | BEGIN@33 | PPIx::Regexp::Token::CharClass::Simple::
1 | 1 | 1 | 8µs | 12µs | BEGIN@34 | PPIx::Regexp::Token::CharClass::Simple::
1 | 1 | 1 | 7µs | 36µs | BEGIN@38 | PPIx::Regexp::Token::CharClass::Simple::
1 | 1 | 1 | 7µs | 58µs | BEGIN@36 | PPIx::Regexp::Token::CharClass::Simple::
0 | 0 | 0 | 0s | 0s | __PPIX_TOKENIZER__regexp | PPIx::Regexp::Token::CharClass::Simple::
0 | 0 | 0 | 0s | 0s | _is_case_sensitive | PPIx::Regexp::Token::CharClass::Simple::
0 | 0 | 0 | 0s | 0s | perl_version_introduced | PPIx::Regexp::Token::CharClass::Simple::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | =head1 NAME | ||||
2 | |||||
3 | PPIx::Regexp::Token::CharClass::Simple - This class represents a simple character class | ||||
4 | |||||
5 | =head1 SYNOPSIS | ||||
6 | |||||
7 | use PPIx::Regexp::Dumper; | ||||
8 | PPIx::Regexp::Dumper->new( 'qr{\w}smx' ) | ||||
9 | ->print(); | ||||
10 | |||||
11 | =head1 INHERITANCE | ||||
12 | |||||
13 | C<PPIx::Regexp::Token::CharClass::Simple> is a | ||||
14 | L<PPIx::Regexp::Token::CharClass|PPIx::Regexp::Token::CharClass>. | ||||
15 | |||||
16 | C<PPIx::Regexp::Token::CharClass::Simple> has no descendants. | ||||
17 | |||||
18 | =head1 DESCRIPTION | ||||
19 | |||||
20 | This class represents one of the simple character classes that can occur | ||||
21 | anywhere in a regular expression. This includes not only the truly | ||||
22 | simple things like \w, but also Unicode properties. | ||||
23 | |||||
24 | =head1 METHODS | ||||
25 | |||||
26 | This class provides no public methods beyond those provided by its | ||||
27 | superclass. | ||||
28 | |||||
29 | =cut | ||||
30 | |||||
31 | package PPIx::Regexp::Token::CharClass::Simple; | ||||
32 | |||||
33 | 2 | 24µs | 2 | 37µs | # spent 25µs (13+12) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@33 which was called:
# once (13µs+12µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 33 # spent 25µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@33
# spent 12µs making 1 call to strict::import |
34 | 2 | 20µs | 2 | 17µs | # spent 12µs (8+4) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@34 which was called:
# once (8µs+4µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 34 # spent 12µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@34
# spent 4µs making 1 call to warnings::import |
35 | |||||
36 | 2 | 26µs | 2 | 109µs | # spent 58µs (7+51) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@36 which was called:
# once (7µs+51µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 36 # spent 58µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@36
# spent 51µs making 1 call to base::import |
37 | |||||
38 | 1 | 200ns | # spent 36µs (7+29) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@38 which was called:
# once (7µs+29µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 40 | ||
39 | COOKIE_CLASS MINIMUM_PERL TOKEN_LITERAL TOKEN_UNKNOWN | ||||
40 | 1 | 428µs | 2 | 64µs | }; # spent 36µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@38
# spent 29µs making 1 call to Exporter::import |
41 | |||||
42 | 1 | 600ns | our $VERSION = '0.036'; | ||
43 | |||||
44 | ##=head2 is_case_sensitive | ||||
45 | ## | ||||
46 | ##This override of the superclass method returns true for Unicode | ||||
47 | ##properties that specify case, and false (but defined) for all | ||||
48 | ##other character classes. | ||||
49 | ## | ||||
50 | ##The classes that specify case are documented in | ||||
51 | ##L<perluniprops|/perluniprops>. | ||||
52 | ## | ||||
53 | ##B<Known bug:> This method returns false (but defined) for user-defined | ||||
54 | ##Unicode properties. It should return C<undef>. This bug B<may> be fixed | ||||
55 | ##if I find a way to identify all system-defined Unicode properties. | ||||
56 | ## | ||||
57 | ##=cut | ||||
58 | ## | ||||
59 | ##sub is_case_sensitive { | ||||
60 | ## my ( $self ) = @_; | ||||
61 | ## exists $self->{is_case_sensitive} | ||||
62 | ## and return $self->{is_case_sensitive}; | ||||
63 | ## return ( $self->{is_case_sensitive} = $self->_is_case_sensitive() ); | ||||
64 | ##} | ||||
65 | |||||
66 | { | ||||
67 | 2 | 30µs | my %case_sensitive = map { $_ => 1 } qw{ | ||
68 | generalcategory=lowercaseletter generalcategory=ll | ||||
69 | gc=lowercaseletter gc=ll | ||||
70 | generalcategory=titlecaseletter generalcategory=lt | ||||
71 | gc=titlecaseletter gc=lt | ||||
72 | generalcategory=uppercaseletter generalcategory=lu | ||||
73 | gc=uppercaseletter gc=lu | ||||
74 | lowercaseletter lowercase lower ll | ||||
75 | titlecaseletter titlecase title lt | ||||
76 | uppercaseletter uppercase upper lu | ||||
77 | lowercase=y lower=y lowercase=n lower=n | ||||
78 | titlecase=y title=y titlecase=n title=n | ||||
79 | uppercase=y upper=y uppercase=n upper=n | ||||
80 | }; | ||||
81 | |||||
82 | sub _is_case_sensitive { | ||||
83 | my ( $self ) = @_; | ||||
84 | my $content = $self->content(); | ||||
85 | $content =~ m/ \A \\ p [{] ( .* ) [}] /smxi | ||||
86 | or return 0; | ||||
87 | $content = lc $1; | ||||
88 | $content =~ s/ \A ^ //smx; | ||||
89 | $content =~ s/ [\s_-] //smxg; | ||||
90 | $content =~ s/ \A is //smx; | ||||
91 | $content =~ s/ : /=/smxg; | ||||
92 | $content =~ s/ = (?: yes | t | true ) \b /=y/smxg; | ||||
93 | $content =~ s/ = (?: no | f | false ) \b /=n/smxg; | ||||
94 | return $case_sensitive{$content} || 0; | ||||
95 | } | ||||
96 | |||||
97 | } | ||||
98 | |||||
99 | { | ||||
100 | |||||
101 | 2 | 4µs | my %introduced = ( | ||
102 | '\h' => '5.009005', # Before this, parsed as 'h' | ||||
103 | '\v' => '5.009005', # Before this, parsed as 'v' | ||||
104 | '\H' => '5.009005', # Before this, parsed as 'H' | ||||
105 | '\N' => '5.011', # Before this, an error. | ||||
106 | '\V' => '5.009005', # Before this, parsed as 'V' | ||||
107 | '\R' => '5.009005', | ||||
108 | '\C' => '5.006', | ||||
109 | '\X' => '5.006', | ||||
110 | ); | ||||
111 | |||||
112 | sub perl_version_introduced { | ||||
113 | my ( $self ) = @_; | ||||
114 | my $content = $self->content(); | ||||
115 | if ( defined( my $minver = $introduced{$content} ) ) { | ||||
116 | return $minver; | ||||
117 | } | ||||
118 | if ( $content =~ m/ \A \\ [Pp] /smxg ) { | ||||
119 | # I must have read perl5113delta and thought this | ||||
120 | # represented the change they were talking about, but I sure | ||||
121 | # don't see it now. So, until things become clearer ... | ||||
122 | # $content =~ m/ \G .*? [\s=-] /smxgc | ||||
123 | # and return '5.011003'; | ||||
124 | return '5.006001'; | ||||
125 | } | ||||
126 | return MINIMUM_PERL; | ||||
127 | } | ||||
128 | |||||
129 | } | ||||
130 | |||||
131 | sub __PPIX_TOKENIZER__regexp { | ||||
132 | my ( $class, $tokenizer, $character ) = @_; | ||||
133 | |||||
134 | my $in_class = $tokenizer->cookie( COOKIE_CLASS ); | ||||
135 | |||||
136 | if ( $character eq '.' ) { | ||||
137 | $in_class | ||||
138 | and return $tokenizer->make_token( 1, TOKEN_LITERAL ); | ||||
139 | return 1; | ||||
140 | } | ||||
141 | |||||
142 | if ( my $accept = $tokenizer->find_regexp( | ||||
143 | qr{ \A \\ (?: | ||||
144 | [wWsSdDvVhHXRNC] | | ||||
145 | [Pp] \{ \s* \^? [\w:=\s-]+ \} | ||||
146 | ) }smx | ||||
147 | ) ) { | ||||
148 | if ( $in_class ) { | ||||
149 | my $match = $tokenizer->match(); | ||||
150 | # As of Perl 5.11.5, [\N] is a fatal error. | ||||
151 | '\\N' eq $match | ||||
152 | and return $tokenizer->make_token( | ||||
153 | $accept, TOKEN_UNKNOWN, { | ||||
154 | error => '\\N invalid inside character class', | ||||
155 | }, | ||||
156 | ); | ||||
157 | # \R is not recognized inside a character class. It | ||||
158 | # eventually ends up as a literal. | ||||
159 | '\\R' eq $match and return; | ||||
160 | } | ||||
161 | return $accept; | ||||
162 | } | ||||
163 | |||||
164 | return; | ||||
165 | } | ||||
166 | |||||
167 | 1 | 9µs | 1; | ||
168 | |||||
169 | __END__ |