← Index
NYTProf Performance Profile   « line view »
For /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/bin/perlcritic
  Run on Sat Mar 19 22:12:22 2016
Reported on Sat Mar 19 22:14:12 2016

Filename/Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/Pod/Wordlist.pm
StatementsExecuted 2457 statements in 5.10ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1119.55ms19.6msPod::Wordlist::::BEGIN@4Pod::Wordlist::BEGIN@4
1111.57ms11.5msPod::Wordlist::::BEGIN@5Pod::Wordlist::BEGIN@5
1111.25ms1.90msPod::Wordlist::::BEGIN@8Pod::Wordlist::BEGIN@8
111202µs727µsPath::Tiny::::BEGIN@816 Path::Tiny::BEGIN@816
11111µs23µsPod::Wordlist::::BEGIN@2Pod::Wordlist::BEGIN@2
1118µs48µsPod::Wordlist::::BEGIN@14Pod::Wordlist::BEGIN@14
1117µs10µsPod::Wordlist::::BEGIN@3Pod::Wordlist::BEGIN@3
0000s0sPod::Wordlist::::_copy_wordlistPod::Wordlist::_copy_wordlist
0000s0sPod::Wordlist::::_strip_a_wordPod::Wordlist::_strip_a_word
0000s0sPod::Wordlist::::is_stopwordPod::Wordlist::is_stopword
0000s0sPod::Wordlist::::learn_stopwordsPod::Wordlist::learn_stopwords
0000s0sPod::Wordlist::::strip_stopwordsPod::Wordlist::strip_stopwords
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
01727µsProfile data that couldn't be associated with a specific line:
# spent 727µs making 1 call to Path::Tiny::BEGIN@816
1124µspackage Pod::Wordlist;
2218µs234µs
# spent 23µs (11+11) within Pod::Wordlist::BEGIN@2 which was called: # once (11µs+11µs) by Pod::Spell::BEGIN@10 at line 2
use strict;
# spent 23µs making 1 call to Pod::Wordlist::BEGIN@2 # spent 11µs making 1 call to strict::import
3219µs214µs
# spent 10µs (7+4) within Pod::Wordlist::BEGIN@3 which was called: # once (7µs+4µs) by Pod::Spell::BEGIN@10 at line 3
use warnings;
# spent 10µs making 1 call to Pod::Wordlist::BEGIN@3 # spent 4µs making 1 call to warnings::import
42103µs219.7ms
# spent 19.6ms (9.55+10.1) within Pod::Wordlist::BEGIN@4 which was called: # once (9.55ms+10.1ms) by Pod::Spell::BEGIN@10 at line 4
use Lingua::EN::Inflect 'PL';
# spent 19.6ms making 1 call to Pod::Wordlist::BEGIN@4 # spent 66µs making 1 call to Exporter::import
5
# spent 11.5ms (1.57+9.98) within Pod::Wordlist::BEGIN@5 which was called: # once (1.57ms+9.98ms) by Pod::Spell::BEGIN@10 at line 6
use File::ShareDir::ProjectDistDir 1.000
63122µs311.6ms dist_file => defaults => { pathtiny => 1 , strict => 1 };
# spent 11.5ms making 1 call to Pod::Wordlist::BEGIN@5 # spent 20µs making 1 call to File::ShareDir::ProjectDistDir::import # spent 10µs making 1 call to UNIVERSAL::VERSION
7
8
# spent 1.90ms (1.25+657µs) within Pod::Wordlist::BEGIN@8 which was called: # once (1.25ms+657µs) by Pod::Spell::BEGIN@10 at line 12
use Class::Tiny {
9160µs wordlist => \&_copy_wordlist,
10 _is_debug => 0,
11 no_wide_chars => 0,
12129µs22.17ms};
# spent 1.90ms making 1 call to Pod::Wordlist::BEGIN@8 # spent 264µs making 1 call to Class::Tiny::import
13
142628µs289µs
# spent 48µs (8+40) within Pod::Wordlist::BEGIN@14 which was called: # once (8µs+40µs) by Pod::Spell::BEGIN@10 at line 14
use constant MAXWORDLENGTH => 50; ## no critic ( ProhibitConstantPragma )
# spent 48µs making 1 call to Pod::Wordlist::BEGIN@14 # spent 40µs making 1 call to constant::import
15
161600nsour $VERSION = '1.15'; # VERSION
17
181100nsour %Wordlist; ## no critic ( Variables::ProhibitPackageVars )
19
20sub _copy_wordlist { return { %Wordlist } }
21
221218µs2124msforeach ( dist_file('Pod-Spell', 'wordlist')->lines_utf8({ chomp => 1 })) {
2312201.09ms $Wordlist{$_} = 1;
2412202.78ms1220675ms $Wordlist{PL($_)} = 1;
# spent 675ms making 1220 calls to Lingua::EN::Inflect::PL, avg 553µs/call
25}
26
27
28sub learn_stopwords {
29 my ( $self, $text ) = @_;
30 my $stopwords = $self->wordlist;
31
32 while ( $text =~ m<(\S+)>g ) {
33 my $word = $1;
34 if ( $word =~ m/^!(.+)/s ) {
35 # "!word" deletes from the stopword list
36 my $negation = $1;
37 # different $1 from above
38 delete $stopwords->{$negation};
39 delete $stopwords->{PL($negation)};
40 print "Unlearning stopword <$negation>\n" if $self->_is_debug;
41 }
42 else {
43 $word =~ s{'s$}{}; # we strip 's when checking so strip here, too
44 $stopwords->{$word} = 1;
45 $stopwords->{PL($word)} = 1;
46 print "Learning stopword <$word>\n" if $self->_is_debug;
47 }
48 }
49 return;
50}
51
52
53sub is_stopword {
54 my ($self, $word) = @_;
55 my $stopwords = $self->wordlist;
56 if ( exists $stopwords->{$word} or exists $stopwords->{ lc $word } ) {
57 print " Rejecting <$word>\n" if $self->_is_debug;
58 return 1;
59 }
60 return;
61}
62
63
64sub strip_stopwords {
65 my ($self, $text) = @_;
66
67 # Count the things in $text
68 print "Content: <", $text, ">\n" if $self->_is_debug;
69
70 my @words = grep { length($_) < MAXWORDLENGTH } split " ", $text;
71
72 for ( @words ) {
73 print "Parsing word: <$_>\n" if $self->_is_debug;
74 # some spellcheckers can't cope with anything but Latin1
75 $_ = '' if $self->no_wide_chars && /[^\x00-\xFF]/;
76
77 # strip leading punctuation
78 s/^[\(\[\{\'\"\:\;\,\?\!\.]+//;
79
80 # keep everything up to trailing punctuation, not counting
81 # periods (for abbreviations like "Ph.D."), single-quotes
82 # (for contractions like "don't") or colons (for package
83 # names like "Foo::Bar")
84 s/^([^\)\]\}\"\;\,\?\!]+).*$/$1/;
85
86 # strip trailing single-quote, periods or colons; after this
87 # we have a word that could have internal periods or quotes
88 s/[\.\'\:]+$//;
89
90 # strip possessive
91 s/'s$//i;
92
93 # zero out variable names or things with internal symbols,
94 # since those are probably code expressions outside a C<>
95 my $is_sigil = /^[\&\%\$\@\:\<\*\\\_]/;
96 my $is_strange = /[\%\^\&\#\$\@\_\<\>\(\)\[\]\{\}\\\*\:\+\/\=\|\`\~]/;
97 $_ = '' if $is_sigil || $is_strange;
98
99 # stop if there are no "word" characters left; if it's just
100 # punctuation that we didn't happen to strip or it's weird glyphs,
101 # the spellchecker won't do any good anyway
102 next unless /\w/;
103
104 print " Checking as <$_>\n" if $self->_is_debug;
105
106 # replace it with any stopword or stopword parts stripped
107 $_ = $self->_strip_a_word($_);
108
109 print " Keeping as <$_>\n" if $_ && $self->_is_debug;
110 }
111
112 return join(" ", grep { length } @words );
113}
114
115sub _strip_a_word {
116 my ($self, $word) = @_;
117 my $remainder;
118
119 # internal period could be abbreviations, so check with
120 # trailing period restored and drop or keep on that basis
121 if ( /\./ ) {
122 my $abbr = "$word.";
123 $remainder = $self->is_stopword($abbr) ? '' : $abbr;
124 }
125 # try word as-is, including possible hyphenation vs stoplist
126 elsif ($self->is_stopword($word) ) {
127 $remainder = '';
128 }
129 # check individual parts of hyphenated word, keep whatever isn't a
130 # stopword as individual words
131 elsif ( $word =~ /-/ ) {
132 my @keep;
133 for my $part ( split /-/, $word ) {
134 push @keep, $part if ! $self->is_stopword( $part );
135 }
136 $remainder = join(" ", @keep) if @keep;
137 }
138 # otherwise, we just keep it
139 else {
140 $remainder = $word;
141 }
142 return $remainder;
143}
144
145110µs1;
146
147# ABSTRACT: English words that come up in Perl documentation
148
149__END__
150
- -