Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/site_perl/5.18.2/Pod/Wordlist.pm |
Statements | Executed 2457 statements in 5.10ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 9.55ms | 19.6ms | BEGIN@4 | Pod::Wordlist::
1 | 1 | 1 | 1.57ms | 11.5ms | BEGIN@5 | Pod::Wordlist::
1 | 1 | 1 | 1.25ms | 1.90ms | BEGIN@8 | Pod::Wordlist::
1 | 1 | 1 | 202µs | 727µs | BEGIN@816 | Path::Tiny::
1 | 1 | 1 | 11µs | 23µs | BEGIN@2 | Pod::Wordlist::
1 | 1 | 1 | 8µs | 48µs | BEGIN@14 | Pod::Wordlist::
1 | 1 | 1 | 7µs | 10µs | BEGIN@3 | Pod::Wordlist::
0 | 0 | 0 | 0s | 0s | _copy_wordlist | Pod::Wordlist::
0 | 0 | 0 | 0s | 0s | _strip_a_word | Pod::Wordlist::
0 | 0 | 0 | 0s | 0s | is_stopword | Pod::Wordlist::
0 | 0 | 0 | 0s | 0s | learn_stopwords | Pod::Wordlist::
0 | 0 | 0 | 0s | 0s | strip_stopwords | Pod::Wordlist::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
0 | 1 | 727µs | Profile data that couldn't be associated with a specific line: # spent 727µs making 1 call to Path::Tiny::BEGIN@816 | ||
1 | 1 | 24µs | package Pod::Wordlist; | ||
2 | 2 | 18µs | 2 | 34µs | # spent 23µs (11+11) within Pod::Wordlist::BEGIN@2 which was called:
# once (11µs+11µs) by Pod::Spell::BEGIN@10 at line 2 # spent 23µs making 1 call to Pod::Wordlist::BEGIN@2
# spent 11µs making 1 call to strict::import |
3 | 2 | 19µs | 2 | 14µs | # spent 10µs (7+4) within Pod::Wordlist::BEGIN@3 which was called:
# once (7µs+4µs) by Pod::Spell::BEGIN@10 at line 3 # spent 10µs making 1 call to Pod::Wordlist::BEGIN@3
# spent 4µs making 1 call to warnings::import |
4 | 2 | 103µs | 2 | 19.7ms | # spent 19.6ms (9.55+10.1) within Pod::Wordlist::BEGIN@4 which was called:
# once (9.55ms+10.1ms) by Pod::Spell::BEGIN@10 at line 4 # spent 19.6ms making 1 call to Pod::Wordlist::BEGIN@4
# spent 66µs making 1 call to Exporter::import |
5 | # spent 11.5ms (1.57+9.98) within Pod::Wordlist::BEGIN@5 which was called:
# once (1.57ms+9.98ms) by Pod::Spell::BEGIN@10 at line 6 | ||||
6 | 3 | 122µs | 3 | 11.6ms | dist_file => defaults => { pathtiny => 1 , strict => 1 }; # spent 11.5ms making 1 call to Pod::Wordlist::BEGIN@5
# spent 20µs making 1 call to File::ShareDir::ProjectDistDir::import
# spent 10µs making 1 call to UNIVERSAL::VERSION |
7 | |||||
8 | # spent 1.90ms (1.25+657µs) within Pod::Wordlist::BEGIN@8 which was called:
# once (1.25ms+657µs) by Pod::Spell::BEGIN@10 at line 12 | ||||
9 | 1 | 60µs | wordlist => \&_copy_wordlist, | ||
10 | _is_debug => 0, | ||||
11 | no_wide_chars => 0, | ||||
12 | 1 | 29µs | 2 | 2.17ms | }; # spent 1.90ms making 1 call to Pod::Wordlist::BEGIN@8
# spent 264µs making 1 call to Class::Tiny::import |
13 | |||||
14 | 2 | 628µs | 2 | 89µs | # spent 48µs (8+40) within Pod::Wordlist::BEGIN@14 which was called:
# once (8µs+40µs) by Pod::Spell::BEGIN@10 at line 14 # spent 48µs making 1 call to Pod::Wordlist::BEGIN@14
# spent 40µs making 1 call to constant::import |
15 | |||||
16 | 1 | 600ns | our $VERSION = '1.15'; # VERSION | ||
17 | |||||
18 | 1 | 100ns | our %Wordlist; ## no critic ( Variables::ProhibitPackageVars ) | ||
19 | |||||
20 | sub _copy_wordlist { return { %Wordlist } } | ||||
21 | |||||
22 | 1 | 218µs | 2 | 124ms | foreach ( dist_file('Pod-Spell', 'wordlist')->lines_utf8({ chomp => 1 })) { # spent 118ms making 1 call to File::ShareDir::ProjectDistDir::__ANON__[File/ShareDir/ProjectDistDir.pm:682]
# spent 6.07ms making 1 call to Path::Tiny::lines_utf8 |
23 | 1220 | 1.09ms | $Wordlist{$_} = 1; | ||
24 | 1220 | 2.78ms | 1220 | 675ms | $Wordlist{PL($_)} = 1; # spent 675ms making 1220 calls to Lingua::EN::Inflect::PL, avg 553µs/call |
25 | } | ||||
26 | |||||
27 | |||||
28 | sub learn_stopwords { | ||||
29 | my ( $self, $text ) = @_; | ||||
30 | my $stopwords = $self->wordlist; | ||||
31 | |||||
32 | while ( $text =~ m<(\S+)>g ) { | ||||
33 | my $word = $1; | ||||
34 | if ( $word =~ m/^!(.+)/s ) { | ||||
35 | # "!word" deletes from the stopword list | ||||
36 | my $negation = $1; | ||||
37 | # different $1 from above | ||||
38 | delete $stopwords->{$negation}; | ||||
39 | delete $stopwords->{PL($negation)}; | ||||
40 | print "Unlearning stopword <$negation>\n" if $self->_is_debug; | ||||
41 | } | ||||
42 | else { | ||||
43 | $word =~ s{'s$}{}; # we strip 's when checking so strip here, too | ||||
44 | $stopwords->{$word} = 1; | ||||
45 | $stopwords->{PL($word)} = 1; | ||||
46 | print "Learning stopword <$word>\n" if $self->_is_debug; | ||||
47 | } | ||||
48 | } | ||||
49 | return; | ||||
50 | } | ||||
51 | |||||
52 | |||||
53 | sub is_stopword { | ||||
54 | my ($self, $word) = @_; | ||||
55 | my $stopwords = $self->wordlist; | ||||
56 | if ( exists $stopwords->{$word} or exists $stopwords->{ lc $word } ) { | ||||
57 | print " Rejecting <$word>\n" if $self->_is_debug; | ||||
58 | return 1; | ||||
59 | } | ||||
60 | return; | ||||
61 | } | ||||
62 | |||||
63 | |||||
64 | sub strip_stopwords { | ||||
65 | my ($self, $text) = @_; | ||||
66 | |||||
67 | # Count the things in $text | ||||
68 | print "Content: <", $text, ">\n" if $self->_is_debug; | ||||
69 | |||||
70 | my @words = grep { length($_) < MAXWORDLENGTH } split " ", $text; | ||||
71 | |||||
72 | for ( @words ) { | ||||
73 | print "Parsing word: <$_>\n" if $self->_is_debug; | ||||
74 | # some spellcheckers can't cope with anything but Latin1 | ||||
75 | $_ = '' if $self->no_wide_chars && /[^\x00-\xFF]/; | ||||
76 | |||||
77 | # strip leading punctuation | ||||
78 | s/^[\(\[\{\'\"\:\;\,\?\!\.]+//; | ||||
79 | |||||
80 | # keep everything up to trailing punctuation, not counting | ||||
81 | # periods (for abbreviations like "Ph.D."), single-quotes | ||||
82 | # (for contractions like "don't") or colons (for package | ||||
83 | # names like "Foo::Bar") | ||||
84 | s/^([^\)\]\}\"\;\,\?\!]+).*$/$1/; | ||||
85 | |||||
86 | # strip trailing single-quote, periods or colons; after this | ||||
87 | # we have a word that could have internal periods or quotes | ||||
88 | s/[\.\'\:]+$//; | ||||
89 | |||||
90 | # strip possessive | ||||
91 | s/'s$//i; | ||||
92 | |||||
93 | # zero out variable names or things with internal symbols, | ||||
94 | # since those are probably code expressions outside a C<> | ||||
95 | my $is_sigil = /^[\&\%\$\@\:\<\*\\\_]/; | ||||
96 | my $is_strange = /[\%\^\&\#\$\@\_\<\>\(\)\[\]\{\}\\\*\:\+\/\=\|\`\~]/; | ||||
97 | $_ = '' if $is_sigil || $is_strange; | ||||
98 | |||||
99 | # stop if there are no "word" characters left; if it's just | ||||
100 | # punctuation that we didn't happen to strip or it's weird glyphs, | ||||
101 | # the spellchecker won't do any good anyway | ||||
102 | next unless /\w/; | ||||
103 | |||||
104 | print " Checking as <$_>\n" if $self->_is_debug; | ||||
105 | |||||
106 | # replace it with any stopword or stopword parts stripped | ||||
107 | $_ = $self->_strip_a_word($_); | ||||
108 | |||||
109 | print " Keeping as <$_>\n" if $_ && $self->_is_debug; | ||||
110 | } | ||||
111 | |||||
112 | return join(" ", grep { length } @words ); | ||||
113 | } | ||||
114 | |||||
115 | sub _strip_a_word { | ||||
116 | my ($self, $word) = @_; | ||||
117 | my $remainder; | ||||
118 | |||||
119 | # internal period could be abbreviations, so check with | ||||
120 | # trailing period restored and drop or keep on that basis | ||||
121 | if ( /\./ ) { | ||||
122 | my $abbr = "$word."; | ||||
123 | $remainder = $self->is_stopword($abbr) ? '' : $abbr; | ||||
124 | } | ||||
125 | # try word as-is, including possible hyphenation vs stoplist | ||||
126 | elsif ($self->is_stopword($word) ) { | ||||
127 | $remainder = ''; | ||||
128 | } | ||||
129 | # check individual parts of hyphenated word, keep whatever isn't a | ||||
130 | # stopword as individual words | ||||
131 | elsif ( $word =~ /-/ ) { | ||||
132 | my @keep; | ||||
133 | for my $part ( split /-/, $word ) { | ||||
134 | push @keep, $part if ! $self->is_stopword( $part ); | ||||
135 | } | ||||
136 | $remainder = join(" ", @keep) if @keep; | ||||
137 | } | ||||
138 | # otherwise, we just keep it | ||||
139 | else { | ||||
140 | $remainder = $word; | ||||
141 | } | ||||
142 | return $remainder; | ||||
143 | } | ||||
144 | |||||
145 | 1 | 10µs | 1; | ||
146 | |||||
147 | # ABSTRACT: English words that come up in Perl documentation | ||||
148 | |||||
149 | __END__ | ||||
150 | |||||
- - |