Profile of PPIx/Regexp/Token/CharClass/Simple.pm

Filename	/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/CharClass/Simple.pm
Statements	Executed 14 statements in 541µs

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	13µs	25µs	PPIx::Regexp::Token::CharClass::Simple::BEGIN@33
1	1	1	8µs	12µs	PPIx::Regexp::Token::CharClass::Simple::BEGIN@34
1	1	1	7µs	36µs	PPIx::Regexp::Token::CharClass::Simple::BEGIN@38
1	1	1	7µs	58µs	PPIx::Regexp::Token::CharClass::Simple::BEGIN@36
0	0	0	0s	0s	PPIx::Regexp::Token::CharClass::Simple::__PPIX_TOKENIZER__regexp
0	0	0	0s	0s	PPIx::Regexp::Token::CharClass::Simple::_is_case_sensitive
0	0	0	0s	0s	PPIx::Regexp::Token::CharClass::Simple::perl_version_introduced

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					=head1 NAME
2
3					PPIx::Regexp::Token::CharClass::Simple - This class represents a simple character class
4
5					=head1 SYNOPSIS
6
7					use PPIx::Regexp::Dumper;
8					PPIx::Regexp::Dumper->new( 'qr{\w}smx' )
9					->print();
10
11					=head1 INHERITANCE
12
13					C<PPIx::Regexp::Token::CharClass::Simple> is a
14					L<PPIx::Regexp::Token::CharClass\|PPIx::Regexp::Token::CharClass>.
15
16					C<PPIx::Regexp::Token::CharClass::Simple> has no descendants.
17
18					=head1 DESCRIPTION
19
20					This class represents one of the simple character classes that can occur
21					anywhere in a regular expression. This includes not only the truly
22					simple things like \w, but also Unicode properties.
23
24					=head1 METHODS
25
26					This class provides no public methods beyond those provided by its
27					superclass.
28
29					=cut
30
31					package PPIx::Regexp::Token::CharClass::Simple;
32
33	2	24µs	2	37µs	# spent 25µs (13+12) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@33 which was called: # once (13µs+12µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 33 use strict; # spent 25µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@33 # spent 12µs making 1 call to strict::import
34	2	20µs	2	17µs	# spent 12µs (8+4) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@34 which was called: # once (8µs+4µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 34 use warnings; # spent 12µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@34 # spent 4µs making 1 call to warnings::import
35
36	2	26µs	2	109µs	# spent 58µs (7+51) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@36 which was called: # once (7µs+51µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 36 use base qw{ PPIx::Regexp::Token::CharClass }; # spent 58µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@36 # spent 51µs making 1 call to base::import
37
38	1	200ns			# spent 36µs (7+29) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@38 which was called: # once (7µs+29µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 40 use PPIx::Regexp::Constant qw{
39					COOKIE_CLASS MINIMUM_PERL TOKEN_LITERAL TOKEN_UNKNOWN
40	1	428µs	2	64µs	}; # spent 36µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@38 # spent 29µs making 1 call to Exporter::import
41
42	1	600ns			our $VERSION = '0.036';
43
44					##=head2 is_case_sensitive
45					##
46					##This override of the superclass method returns true for Unicode
47					##properties that specify case, and false (but defined) for all
48					##other character classes.
49					##
50					##The classes that specify case are documented in
51					##L<perluniprops\|/perluniprops>.
52					##
53					##B<Known bug:> This method returns false (but defined) for user-defined
54					##Unicode properties. It should return C<undef>. This bug B<may> be fixed
55					##if I find a way to identify all system-defined Unicode properties.
56					##
57					##=cut
58					##
59					##sub is_case_sensitive {
60					## my ( $self ) = @_;
61					## exists $self->{is_case_sensitive}
62					## and return $self->{is_case_sensitive};
63					## return ( $self->{is_case_sensitive} = $self->_is_case_sensitive() );
64					##}
65
66					{
67	2	30µs			my %case_sensitive = map { $_ => 1 } qw{
68					generalcategory=lowercaseletter generalcategory=ll
69					gc=lowercaseletter gc=ll
70					generalcategory=titlecaseletter generalcategory=lt
71					gc=titlecaseletter gc=lt
72					generalcategory=uppercaseletter generalcategory=lu
73					gc=uppercaseletter gc=lu
74					lowercaseletter lowercase lower ll
75					titlecaseletter titlecase title lt
76					uppercaseletter uppercase upper lu
77					lowercase=y lower=y lowercase=n lower=n
78					titlecase=y title=y titlecase=n title=n
79					uppercase=y upper=y uppercase=n upper=n
80					};
81
82					sub _is_case_sensitive {
83					my ( $self ) = @_;
84					my $content = $self->content();
85					$content =~ m/ \A \\ p [{] ( .* ) [}] /smxi
86					or return 0;
87					$content = lc $1;
88					$content =~ s/ \A ^ //smx;
89					$content =~ s/ [\s_-] //smxg;
90					$content =~ s/ \A is //smx;
91					$content =~ s/ : /=/smxg;
92					$content =~ s/ = (?: yes \| t \| true ) \b /=y/smxg;
93					$content =~ s/ = (?: no \| f \| false ) \b /=n/smxg;
94					return $case_sensitive{$content} \|\| 0;
95					}
96
97					}
98
99					{
100
101	2	4µs			my %introduced = (
102					'\h' => '5.009005', # Before this, parsed as 'h'
103					'\v' => '5.009005', # Before this, parsed as 'v'
104					'\H' => '5.009005', # Before this, parsed as 'H'
105					'\N' => '5.011', # Before this, an error.
106					'\V' => '5.009005', # Before this, parsed as 'V'
107					'\R' => '5.009005',
108					'\C' => '5.006',
109					'\X' => '5.006',
110					);
111
112					sub perl_version_introduced {
113					my ( $self ) = @_;
114					my $content = $self->content();
115					if ( defined( my $minver = $introduced{$content} ) ) {
116					return $minver;
117					}
118					if ( $content =~ m/ \A \\ [Pp] /smxg ) {
119					# I must have read perl5113delta and thought this
120					# represented the change they were talking about, but I sure
121					# don't see it now. So, until things become clearer ...
122					# $content =~ m/ \G .*? [\s=-] /smxgc
123					# and return '5.011003';
124					return '5.006001';
125					}
126					return MINIMUM_PERL;
127					}
128
129					}
130
131					sub __PPIX_TOKENIZER__regexp {
132					my ( $class, $tokenizer, $character ) = @_;
133
134					my $in_class = $tokenizer->cookie( COOKIE_CLASS );
135
136					if ( $character eq '.' ) {
137					$in_class
138					and return $tokenizer->make_token( 1, TOKEN_LITERAL );
139					return 1;
140					}
141
142					if ( my $accept = $tokenizer->find_regexp(
143					qr{ \A \\ (?:
144					[wWsSdDvVhHXRNC] \|
145					[Pp] \{ \s* \^? [\w:=\s-]+ \}
146					) }smx
147					) ) {
148					if ( $in_class ) {
149					my $match = $tokenizer->match();
150					# As of Perl 5.11.5, [\N] is a fatal error.
151					'\\N' eq $match
152					and return $tokenizer->make_token(
153					$accept, TOKEN_UNKNOWN, {
154					error => '\\N invalid inside character class',
155					},
156					);
157					# \R is not recognized inside a character class. It
158					# eventually ends up as a literal.
159					'\\R' eq $match and return;
160					}
161					return $accept;
162					}
163
164					return;
165					}
166
167	1	9µs			1;
168
169					__END__