| Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/Pod/Wordlist.pm |
| Statements | Executed 2457 statements in 5.10ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 9.55ms | 19.6ms | Pod::Wordlist::BEGIN@4 |
| 1 | 1 | 1 | 1.57ms | 11.5ms | Pod::Wordlist::BEGIN@5 |
| 1 | 1 | 1 | 1.25ms | 1.90ms | Pod::Wordlist::BEGIN@8 |
| 1 | 1 | 1 | 202µs | 727µs | Path::Tiny::BEGIN@816 |
| 1 | 1 | 1 | 11µs | 23µs | Pod::Wordlist::BEGIN@2 |
| 1 | 1 | 1 | 8µs | 48µs | Pod::Wordlist::BEGIN@14 |
| 1 | 1 | 1 | 7µs | 10µs | Pod::Wordlist::BEGIN@3 |
| 0 | 0 | 0 | 0s | 0s | Pod::Wordlist::_copy_wordlist |
| 0 | 0 | 0 | 0s | 0s | Pod::Wordlist::_strip_a_word |
| 0 | 0 | 0 | 0s | 0s | Pod::Wordlist::is_stopword |
| 0 | 0 | 0 | 0s | 0s | Pod::Wordlist::learn_stopwords |
| 0 | 0 | 0 | 0s | 0s | Pod::Wordlist::strip_stopwords |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 0 | 1 | 727µs | Profile data that couldn't be associated with a specific line: # spent 727µs making 1 call to Path::Tiny::BEGIN@816 | ||
| 1 | 1 | 24µs | package Pod::Wordlist; | ||
| 2 | 2 | 18µs | 2 | 34µs | # spent 23µs (11+11) within Pod::Wordlist::BEGIN@2 which was called:
# once (11µs+11µs) by Pod::Spell::BEGIN@10 at line 2 # spent 23µs making 1 call to Pod::Wordlist::BEGIN@2
# spent 11µs making 1 call to strict::import |
| 3 | 2 | 19µs | 2 | 14µs | # spent 10µs (7+4) within Pod::Wordlist::BEGIN@3 which was called:
# once (7µs+4µs) by Pod::Spell::BEGIN@10 at line 3 # spent 10µs making 1 call to Pod::Wordlist::BEGIN@3
# spent 4µs making 1 call to warnings::import |
| 4 | 2 | 103µs | 2 | 19.7ms | # spent 19.6ms (9.55+10.1) within Pod::Wordlist::BEGIN@4 which was called:
# once (9.55ms+10.1ms) by Pod::Spell::BEGIN@10 at line 4 # spent 19.6ms making 1 call to Pod::Wordlist::BEGIN@4
# spent 66µs making 1 call to Exporter::import |
| 5 | # spent 11.5ms (1.57+9.98) within Pod::Wordlist::BEGIN@5 which was called:
# once (1.57ms+9.98ms) by Pod::Spell::BEGIN@10 at line 6 | ||||
| 6 | 3 | 122µs | 3 | 11.6ms | dist_file => defaults => { pathtiny => 1 , strict => 1 }; # spent 11.5ms making 1 call to Pod::Wordlist::BEGIN@5
# spent 20µs making 1 call to File::ShareDir::ProjectDistDir::import
# spent 10µs making 1 call to UNIVERSAL::VERSION |
| 7 | |||||
| 8 | # spent 1.90ms (1.25+657µs) within Pod::Wordlist::BEGIN@8 which was called:
# once (1.25ms+657µs) by Pod::Spell::BEGIN@10 at line 12 | ||||
| 9 | 1 | 60µs | wordlist => \&_copy_wordlist, | ||
| 10 | _is_debug => 0, | ||||
| 11 | no_wide_chars => 0, | ||||
| 12 | 1 | 29µs | 2 | 2.17ms | }; # spent 1.90ms making 1 call to Pod::Wordlist::BEGIN@8
# spent 264µs making 1 call to Class::Tiny::import |
| 13 | |||||
| 14 | 2 | 628µs | 2 | 89µs | # spent 48µs (8+40) within Pod::Wordlist::BEGIN@14 which was called:
# once (8µs+40µs) by Pod::Spell::BEGIN@10 at line 14 # spent 48µs making 1 call to Pod::Wordlist::BEGIN@14
# spent 40µs making 1 call to constant::import |
| 15 | |||||
| 16 | 1 | 600ns | our $VERSION = '1.15'; # VERSION | ||
| 17 | |||||
| 18 | 1 | 100ns | our %Wordlist; ## no critic ( Variables::ProhibitPackageVars ) | ||
| 19 | |||||
| 20 | sub _copy_wordlist { return { %Wordlist } } | ||||
| 21 | |||||
| 22 | 1 | 218µs | 2 | 124ms | foreach ( dist_file('Pod-Spell', 'wordlist')->lines_utf8({ chomp => 1 })) { # spent 118ms making 1 call to File::ShareDir::ProjectDistDir::__ANON__[File/ShareDir/ProjectDistDir.pm:682]
# spent 6.07ms making 1 call to Path::Tiny::lines_utf8 |
| 23 | 1220 | 1.09ms | $Wordlist{$_} = 1; | ||
| 24 | 1220 | 2.78ms | 1220 | 675ms | $Wordlist{PL($_)} = 1; # spent 675ms making 1220 calls to Lingua::EN::Inflect::PL, avg 553µs/call |
| 25 | } | ||||
| 26 | |||||
| 27 | |||||
| 28 | sub learn_stopwords { | ||||
| 29 | my ( $self, $text ) = @_; | ||||
| 30 | my $stopwords = $self->wordlist; | ||||
| 31 | |||||
| 32 | while ( $text =~ m<(\S+)>g ) { | ||||
| 33 | my $word = $1; | ||||
| 34 | if ( $word =~ m/^!(.+)/s ) { | ||||
| 35 | # "!word" deletes from the stopword list | ||||
| 36 | my $negation = $1; | ||||
| 37 | # different $1 from above | ||||
| 38 | delete $stopwords->{$negation}; | ||||
| 39 | delete $stopwords->{PL($negation)}; | ||||
| 40 | print "Unlearning stopword <$negation>\n" if $self->_is_debug; | ||||
| 41 | } | ||||
| 42 | else { | ||||
| 43 | $word =~ s{'s$}{}; # we strip 's when checking so strip here, too | ||||
| 44 | $stopwords->{$word} = 1; | ||||
| 45 | $stopwords->{PL($word)} = 1; | ||||
| 46 | print "Learning stopword <$word>\n" if $self->_is_debug; | ||||
| 47 | } | ||||
| 48 | } | ||||
| 49 | return; | ||||
| 50 | } | ||||
| 51 | |||||
| 52 | |||||
| 53 | sub is_stopword { | ||||
| 54 | my ($self, $word) = @_; | ||||
| 55 | my $stopwords = $self->wordlist; | ||||
| 56 | if ( exists $stopwords->{$word} or exists $stopwords->{ lc $word } ) { | ||||
| 57 | print " Rejecting <$word>\n" if $self->_is_debug; | ||||
| 58 | return 1; | ||||
| 59 | } | ||||
| 60 | return; | ||||
| 61 | } | ||||
| 62 | |||||
| 63 | |||||
| 64 | sub strip_stopwords { | ||||
| 65 | my ($self, $text) = @_; | ||||
| 66 | |||||
| 67 | # Count the things in $text | ||||
| 68 | print "Content: <", $text, ">\n" if $self->_is_debug; | ||||
| 69 | |||||
| 70 | my @words = grep { length($_) < MAXWORDLENGTH } split " ", $text; | ||||
| 71 | |||||
| 72 | for ( @words ) { | ||||
| 73 | print "Parsing word: <$_>\n" if $self->_is_debug; | ||||
| 74 | # some spellcheckers can't cope with anything but Latin1 | ||||
| 75 | $_ = '' if $self->no_wide_chars && /[^\x00-\xFF]/; | ||||
| 76 | |||||
| 77 | # strip leading punctuation | ||||
| 78 | s/^[\(\[\{\'\"\:\;\,\?\!\.]+//; | ||||
| 79 | |||||
| 80 | # keep everything up to trailing punctuation, not counting | ||||
| 81 | # periods (for abbreviations like "Ph.D."), single-quotes | ||||
| 82 | # (for contractions like "don't") or colons (for package | ||||
| 83 | # names like "Foo::Bar") | ||||
| 84 | s/^([^\)\]\}\"\;\,\?\!]+).*$/$1/; | ||||
| 85 | |||||
| 86 | # strip trailing single-quote, periods or colons; after this | ||||
| 87 | # we have a word that could have internal periods or quotes | ||||
| 88 | s/[\.\'\:]+$//; | ||||
| 89 | |||||
| 90 | # strip possessive | ||||
| 91 | s/'s$//i; | ||||
| 92 | |||||
| 93 | # zero out variable names or things with internal symbols, | ||||
| 94 | # since those are probably code expressions outside a C<> | ||||
| 95 | my $is_sigil = /^[\&\%\$\@\:\<\*\\\_]/; | ||||
| 96 | my $is_strange = /[\%\^\&\#\$\@\_\<\>\(\)\[\]\{\}\\\*\:\+\/\=\|\`\~]/; | ||||
| 97 | $_ = '' if $is_sigil || $is_strange; | ||||
| 98 | |||||
| 99 | # stop if there are no "word" characters left; if it's just | ||||
| 100 | # punctuation that we didn't happen to strip or it's weird glyphs, | ||||
| 101 | # the spellchecker won't do any good anyway | ||||
| 102 | next unless /\w/; | ||||
| 103 | |||||
| 104 | print " Checking as <$_>\n" if $self->_is_debug; | ||||
| 105 | |||||
| 106 | # replace it with any stopword or stopword parts stripped | ||||
| 107 | $_ = $self->_strip_a_word($_); | ||||
| 108 | |||||
| 109 | print " Keeping as <$_>\n" if $_ && $self->_is_debug; | ||||
| 110 | } | ||||
| 111 | |||||
| 112 | return join(" ", grep { length } @words ); | ||||
| 113 | } | ||||
| 114 | |||||
| 115 | sub _strip_a_word { | ||||
| 116 | my ($self, $word) = @_; | ||||
| 117 | my $remainder; | ||||
| 118 | |||||
| 119 | # internal period could be abbreviations, so check with | ||||
| 120 | # trailing period restored and drop or keep on that basis | ||||
| 121 | if ( /\./ ) { | ||||
| 122 | my $abbr = "$word."; | ||||
| 123 | $remainder = $self->is_stopword($abbr) ? '' : $abbr; | ||||
| 124 | } | ||||
| 125 | # try word as-is, including possible hyphenation vs stoplist | ||||
| 126 | elsif ($self->is_stopword($word) ) { | ||||
| 127 | $remainder = ''; | ||||
| 128 | } | ||||
| 129 | # check individual parts of hyphenated word, keep whatever isn't a | ||||
| 130 | # stopword as individual words | ||||
| 131 | elsif ( $word =~ /-/ ) { | ||||
| 132 | my @keep; | ||||
| 133 | for my $part ( split /-/, $word ) { | ||||
| 134 | push @keep, $part if ! $self->is_stopword( $part ); | ||||
| 135 | } | ||||
| 136 | $remainder = join(" ", @keep) if @keep; | ||||
| 137 | } | ||||
| 138 | # otherwise, we just keep it | ||||
| 139 | else { | ||||
| 140 | $remainder = $word; | ||||
| 141 | } | ||||
| 142 | return $remainder; | ||||
| 143 | } | ||||
| 144 | |||||
| 145 | 1 | 10µs | 1; | ||
| 146 | |||||
| 147 | # ABSTRACT: English words that come up in Perl documentation | ||||
| 148 | |||||
| 149 | __END__ | ||||
| 150 | |||||
| - - |