| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/PPIx/Regexp/Token/CharClass/Simple.pm |
| Statements | Executed 14 statements in 541µs |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 13µs | 25µs | PPIx::Regexp::Token::CharClass::Simple::BEGIN@33 |
| 1 | 1 | 1 | 8µs | 12µs | PPIx::Regexp::Token::CharClass::Simple::BEGIN@34 |
| 1 | 1 | 1 | 7µs | 36µs | PPIx::Regexp::Token::CharClass::Simple::BEGIN@38 |
| 1 | 1 | 1 | 7µs | 58µs | PPIx::Regexp::Token::CharClass::Simple::BEGIN@36 |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::CharClass::Simple::__PPIX_TOKENIZER__regexp |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::CharClass::Simple::_is_case_sensitive |
| 0 | 0 | 0 | 0s | 0s | PPIx::Regexp::Token::CharClass::Simple::perl_version_introduced |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | =head1 NAME | ||||
| 2 | |||||
| 3 | PPIx::Regexp::Token::CharClass::Simple - This class represents a simple character class | ||||
| 4 | |||||
| 5 | =head1 SYNOPSIS | ||||
| 6 | |||||
| 7 | use PPIx::Regexp::Dumper; | ||||
| 8 | PPIx::Regexp::Dumper->new( 'qr{\w}smx' ) | ||||
| 9 | ->print(); | ||||
| 10 | |||||
| 11 | =head1 INHERITANCE | ||||
| 12 | |||||
| 13 | C<PPIx::Regexp::Token::CharClass::Simple> is a | ||||
| 14 | L<PPIx::Regexp::Token::CharClass|PPIx::Regexp::Token::CharClass>. | ||||
| 15 | |||||
| 16 | C<PPIx::Regexp::Token::CharClass::Simple> has no descendants. | ||||
| 17 | |||||
| 18 | =head1 DESCRIPTION | ||||
| 19 | |||||
| 20 | This class represents one of the simple character classes that can occur | ||||
| 21 | anywhere in a regular expression. This includes not only the truly | ||||
| 22 | simple things like \w, but also Unicode properties. | ||||
| 23 | |||||
| 24 | =head1 METHODS | ||||
| 25 | |||||
| 26 | This class provides no public methods beyond those provided by its | ||||
| 27 | superclass. | ||||
| 28 | |||||
| 29 | =cut | ||||
| 30 | |||||
| 31 | package PPIx::Regexp::Token::CharClass::Simple; | ||||
| 32 | |||||
| 33 | 2 | 24µs | 2 | 37µs | # spent 25µs (13+12) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@33 which was called:
# once (13µs+12µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 33 # spent 25µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@33
# spent 12µs making 1 call to strict::import |
| 34 | 2 | 20µs | 2 | 17µs | # spent 12µs (8+4) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@34 which was called:
# once (8µs+4µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 34 # spent 12µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@34
# spent 4µs making 1 call to warnings::import |
| 35 | |||||
| 36 | 2 | 26µs | 2 | 109µs | # spent 58µs (7+51) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@36 which was called:
# once (7µs+51µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 36 # spent 58µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@36
# spent 51µs making 1 call to base::import |
| 37 | |||||
| 38 | 1 | 200ns | # spent 36µs (7+29) within PPIx::Regexp::Token::CharClass::Simple::BEGIN@38 which was called:
# once (7µs+29µs) by PPIx::Regexp::Tokenizer::BEGIN@19 at line 40 | ||
| 39 | COOKIE_CLASS MINIMUM_PERL TOKEN_LITERAL TOKEN_UNKNOWN | ||||
| 40 | 1 | 428µs | 2 | 64µs | }; # spent 36µs making 1 call to PPIx::Regexp::Token::CharClass::Simple::BEGIN@38
# spent 29µs making 1 call to Exporter::import |
| 41 | |||||
| 42 | 1 | 600ns | our $VERSION = '0.036'; | ||
| 43 | |||||
| 44 | ##=head2 is_case_sensitive | ||||
| 45 | ## | ||||
| 46 | ##This override of the superclass method returns true for Unicode | ||||
| 47 | ##properties that specify case, and false (but defined) for all | ||||
| 48 | ##other character classes. | ||||
| 49 | ## | ||||
| 50 | ##The classes that specify case are documented in | ||||
| 51 | ##L<perluniprops|/perluniprops>. | ||||
| 52 | ## | ||||
| 53 | ##B<Known bug:> This method returns false (but defined) for user-defined | ||||
| 54 | ##Unicode properties. It should return C<undef>. This bug B<may> be fixed | ||||
| 55 | ##if I find a way to identify all system-defined Unicode properties. | ||||
| 56 | ## | ||||
| 57 | ##=cut | ||||
| 58 | ## | ||||
| 59 | ##sub is_case_sensitive { | ||||
| 60 | ## my ( $self ) = @_; | ||||
| 61 | ## exists $self->{is_case_sensitive} | ||||
| 62 | ## and return $self->{is_case_sensitive}; | ||||
| 63 | ## return ( $self->{is_case_sensitive} = $self->_is_case_sensitive() ); | ||||
| 64 | ##} | ||||
| 65 | |||||
| 66 | { | ||||
| 67 | 2 | 30µs | my %case_sensitive = map { $_ => 1 } qw{ | ||
| 68 | generalcategory=lowercaseletter generalcategory=ll | ||||
| 69 | gc=lowercaseletter gc=ll | ||||
| 70 | generalcategory=titlecaseletter generalcategory=lt | ||||
| 71 | gc=titlecaseletter gc=lt | ||||
| 72 | generalcategory=uppercaseletter generalcategory=lu | ||||
| 73 | gc=uppercaseletter gc=lu | ||||
| 74 | lowercaseletter lowercase lower ll | ||||
| 75 | titlecaseletter titlecase title lt | ||||
| 76 | uppercaseletter uppercase upper lu | ||||
| 77 | lowercase=y lower=y lowercase=n lower=n | ||||
| 78 | titlecase=y title=y titlecase=n title=n | ||||
| 79 | uppercase=y upper=y uppercase=n upper=n | ||||
| 80 | }; | ||||
| 81 | |||||
| 82 | sub _is_case_sensitive { | ||||
| 83 | my ( $self ) = @_; | ||||
| 84 | my $content = $self->content(); | ||||
| 85 | $content =~ m/ \A \\ p [{] ( .* ) [}] /smxi | ||||
| 86 | or return 0; | ||||
| 87 | $content = lc $1; | ||||
| 88 | $content =~ s/ \A ^ //smx; | ||||
| 89 | $content =~ s/ [\s_-] //smxg; | ||||
| 90 | $content =~ s/ \A is //smx; | ||||
| 91 | $content =~ s/ : /=/smxg; | ||||
| 92 | $content =~ s/ = (?: yes | t | true ) \b /=y/smxg; | ||||
| 93 | $content =~ s/ = (?: no | f | false ) \b /=n/smxg; | ||||
| 94 | return $case_sensitive{$content} || 0; | ||||
| 95 | } | ||||
| 96 | |||||
| 97 | } | ||||
| 98 | |||||
| 99 | { | ||||
| 100 | |||||
| 101 | 2 | 4µs | my %introduced = ( | ||
| 102 | '\h' => '5.009005', # Before this, parsed as 'h' | ||||
| 103 | '\v' => '5.009005', # Before this, parsed as 'v' | ||||
| 104 | '\H' => '5.009005', # Before this, parsed as 'H' | ||||
| 105 | '\N' => '5.011', # Before this, an error. | ||||
| 106 | '\V' => '5.009005', # Before this, parsed as 'V' | ||||
| 107 | '\R' => '5.009005', | ||||
| 108 | '\C' => '5.006', | ||||
| 109 | '\X' => '5.006', | ||||
| 110 | ); | ||||
| 111 | |||||
| 112 | sub perl_version_introduced { | ||||
| 113 | my ( $self ) = @_; | ||||
| 114 | my $content = $self->content(); | ||||
| 115 | if ( defined( my $minver = $introduced{$content} ) ) { | ||||
| 116 | return $minver; | ||||
| 117 | } | ||||
| 118 | if ( $content =~ m/ \A \\ [Pp] /smxg ) { | ||||
| 119 | # I must have read perl5113delta and thought this | ||||
| 120 | # represented the change they were talking about, but I sure | ||||
| 121 | # don't see it now. So, until things become clearer ... | ||||
| 122 | # $content =~ m/ \G .*? [\s=-] /smxgc | ||||
| 123 | # and return '5.011003'; | ||||
| 124 | return '5.006001'; | ||||
| 125 | } | ||||
| 126 | return MINIMUM_PERL; | ||||
| 127 | } | ||||
| 128 | |||||
| 129 | } | ||||
| 130 | |||||
| 131 | sub __PPIX_TOKENIZER__regexp { | ||||
| 132 | my ( $class, $tokenizer, $character ) = @_; | ||||
| 133 | |||||
| 134 | my $in_class = $tokenizer->cookie( COOKIE_CLASS ); | ||||
| 135 | |||||
| 136 | if ( $character eq '.' ) { | ||||
| 137 | $in_class | ||||
| 138 | and return $tokenizer->make_token( 1, TOKEN_LITERAL ); | ||||
| 139 | return 1; | ||||
| 140 | } | ||||
| 141 | |||||
| 142 | if ( my $accept = $tokenizer->find_regexp( | ||||
| 143 | qr{ \A \\ (?: | ||||
| 144 | [wWsSdDvVhHXRNC] | | ||||
| 145 | [Pp] \{ \s* \^? [\w:=\s-]+ \} | ||||
| 146 | ) }smx | ||||
| 147 | ) ) { | ||||
| 148 | if ( $in_class ) { | ||||
| 149 | my $match = $tokenizer->match(); | ||||
| 150 | # As of Perl 5.11.5, [\N] is a fatal error. | ||||
| 151 | '\\N' eq $match | ||||
| 152 | and return $tokenizer->make_token( | ||||
| 153 | $accept, TOKEN_UNKNOWN, { | ||||
| 154 | error => '\\N invalid inside character class', | ||||
| 155 | }, | ||||
| 156 | ); | ||||
| 157 | # \R is not recognized inside a character class. It | ||||
| 158 | # eventually ends up as a literal. | ||||
| 159 | '\\R' eq $match and return; | ||||
| 160 | } | ||||
| 161 | return $accept; | ||||
| 162 | } | ||||
| 163 | |||||
| 164 | return; | ||||
| 165 | } | ||||
| 166 | |||||
| 167 | 1 | 9µs | 1; | ||
| 168 | |||||
| 169 | __END__ |