Filename | /Users/timbo/perl5/perlbrew/perls/perl-5.18.2/lib/5.18.2/Pod/Simple/BlackBox.pm |
Statements | Executed 17 statements in 7.67ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 13µs | 14µs | BEGIN@22 | Pod::Simple::BlackBox::
1 | 1 | 1 | 11µs | 14µs | BEGIN@1183 | Pod::Simple::BlackBox::
1 | 1 | 1 | 6µs | 23µs | BEGIN@25 | Pod::Simple::BlackBox::
1 | 1 | 1 | 6µs | 16µs | BEGIN@23 | Pod::Simple::BlackBox::
1 | 1 | 1 | 4µs | 4µs | BEGIN@28 | Pod::Simple::BlackBox::
1 | 1 | 1 | 3µs | 3µs | BEGIN@24 | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _closers_for_all_curr_open | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _dump_curr_open | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _gen_errata | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _handle_encoding_line | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _handle_encoding_second_level | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Data | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Plain | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Verbatim | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_back | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_begin | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_doc_end | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_end | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_for | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_item | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_over | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_paragraph_buffer | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_pod | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _stringify_lol | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _traverse_treelet_bit | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _treelet_from_formatting_codes | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _verbatim_format | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | parse_line | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | parse_lines | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | pretty | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | reinit | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | stringify_lol | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | text_content_of_treelet | Pod::Simple::BlackBox::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | |||||
2 | package Pod::Simple::BlackBox; | ||||
3 | # | ||||
4 | # "What's in the box?" "Pain." | ||||
5 | # | ||||
6 | ########################################################################### | ||||
7 | # | ||||
8 | # This is where all the scary things happen: parsing lines into | ||||
9 | # paragraphs; and then into directives, verbatims, and then also | ||||
10 | # turning formatting sequences into treelets. | ||||
11 | # | ||||
12 | # Are you really sure you want to read this code? | ||||
13 | # | ||||
14 | #----------------------------------------------------------------------------- | ||||
15 | # | ||||
16 | # The basic work of this module Pod::Simple::BlackBox is doing the dirty work | ||||
17 | # of parsing Pod into treelets (generally one per non-verbatim paragraph), and | ||||
18 | # to call the proper callbacks on the treelets. | ||||
19 | # | ||||
20 | # Every node in a treelet is a ['name', {attrhash}, ...children...] | ||||
21 | |||||
22 | 2 | 19µs | 2 | 16µs | # spent 14µs (13+2) within Pod::Simple::BlackBox::BEGIN@22 which was called:
# once (13µs+2µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22 # spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@22
# spent 2µs making 1 call to integer::import |
23 | 2 | 18µs | 2 | 27µs | # spent 16µs (6+10) within Pod::Simple::BlackBox::BEGIN@23 which was called:
# once (6µs+10µs) by Pod::Simple::LinkSection::BEGIN@9 at line 23 # spent 16µs making 1 call to Pod::Simple::BlackBox::BEGIN@23
# spent 10µs making 1 call to strict::import |
24 | 2 | 18µs | 1 | 3µs | # spent 3µs within Pod::Simple::BlackBox::BEGIN@24 which was called:
# once (3µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 24 # spent 3µs making 1 call to Pod::Simple::BlackBox::BEGIN@24 |
25 | 2 | 37µs | 2 | 40µs | # spent 23µs (6+17) within Pod::Simple::BlackBox::BEGIN@25 which was called:
# once (6µs+17µs) by Pod::Simple::LinkSection::BEGIN@9 at line 25 # spent 23µs making 1 call to Pod::Simple::BlackBox::BEGIN@25
# spent 17µs making 1 call to vars::import |
26 | 1 | 800ns | $VERSION = '3.28'; | ||
27 | #use constant DEBUG => 7; | ||||
28 | # spent 4µs within Pod::Simple::BlackBox::BEGIN@28 which was called:
# once (4µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 31 | ||||
29 | 1 | 200ns | require Pod::Simple; | ||
30 | 1 | 4µs | *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG | ||
31 | 1 | 4.30ms | 1 | 4µs | } # spent 4µs making 1 call to Pod::Simple::BlackBox::BEGIN@28 |
32 | |||||
33 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
34 | |||||
35 | sub parse_line { shift->parse_lines(@_) } # alias | ||||
36 | |||||
37 | # - - - Turn back now! Run away! - - - | ||||
38 | |||||
39 | sub parse_lines { # Usage: $parser->parse_lines(@lines) | ||||
40 | # an undef means end-of-stream | ||||
41 | my $self = shift; | ||||
42 | |||||
43 | my $code_handler = $self->{'code_handler'}; | ||||
44 | my $cut_handler = $self->{'cut_handler'}; | ||||
45 | my $wl_handler = $self->{'whiteline_handler'}; | ||||
46 | $self->{'line_count'} ||= 0; | ||||
47 | |||||
48 | my $scratch; | ||||
49 | |||||
50 | DEBUG > 4 and | ||||
51 | print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; | ||||
52 | |||||
53 | DEBUG > 5 and | ||||
54 | print "# About to parse lines: ", | ||||
55 | join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; | ||||
56 | |||||
57 | my $paras = ($self->{'paras'} ||= []); | ||||
58 | # paragraph buffer. Because we need to defer processing of =over | ||||
59 | # directives and verbatim paragraphs. We call _ponder_paragraph_buffer | ||||
60 | # to process this. | ||||
61 | |||||
62 | $self->{'pod_para_count'} ||= 0; | ||||
63 | |||||
64 | my $line; | ||||
65 | foreach my $source_line (@_) { | ||||
66 | if( $self->{'source_dead'} ) { | ||||
67 | DEBUG > 4 and print "# Source is dead.\n"; | ||||
68 | last; | ||||
69 | } | ||||
70 | |||||
71 | unless( defined $source_line ) { | ||||
72 | DEBUG > 4 and print "# Undef-line seen.\n"; | ||||
73 | |||||
74 | push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; | ||||
75 | push @$paras, $paras->[-1], $paras->[-1]; | ||||
76 | # So that it definitely fills the buffer. | ||||
77 | $self->{'source_dead'} = 1; | ||||
78 | $self->_ponder_paragraph_buffer; | ||||
79 | next; | ||||
80 | } | ||||
81 | |||||
82 | |||||
83 | if( $self->{'line_count'}++ ) { | ||||
84 | ($line = $source_line) =~ tr/\n\r//d; | ||||
85 | # If we don't have two vars, we'll end up with that there | ||||
86 | # tr/// modding the (potentially read-only) original source line! | ||||
87 | |||||
88 | } else { | ||||
89 | DEBUG > 2 and print "First line: [$source_line]\n"; | ||||
90 | |||||
91 | if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { | ||||
92 | DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; | ||||
93 | $self->_handle_encoding_line( "=encoding utf8" ); | ||||
94 | delete $self->{'_processed_encoding'}; | ||||
95 | $line =~ tr/\n\r//d; | ||||
96 | |||||
97 | } elsif( $line =~ s/^\xFE\xFF//s ) { | ||||
98 | DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
99 | $self->scream( | ||||
100 | $self->{'line_count'}, | ||||
101 | "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
102 | ); | ||||
103 | splice @_; | ||||
104 | push @_, undef; | ||||
105 | next; | ||||
106 | |||||
107 | # TODO: implement somehow? | ||||
108 | |||||
109 | } elsif( $line =~ s/^\xFF\xFE//s ) { | ||||
110 | DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
111 | $self->scream( | ||||
112 | $self->{'line_count'}, | ||||
113 | "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
114 | ); | ||||
115 | splice @_; | ||||
116 | push @_, undef; | ||||
117 | next; | ||||
118 | |||||
119 | # TODO: implement somehow? | ||||
120 | |||||
121 | } else { | ||||
122 | DEBUG > 2 and print "First line is BOM-less.\n"; | ||||
123 | ($line = $source_line) =~ tr/\n\r//d; | ||||
124 | } | ||||
125 | } | ||||
126 | |||||
127 | # Try to guess encoding. Inlined for performance reasons. | ||||
128 | if(!$self->{'parse_characters'} && !$self->{'encoding'} | ||||
129 | && ($self->{'in_pod'} || $line =~ /^=/s) | ||||
130 | && $line =~ /[^\x00-\x7f]/ | ||||
131 | ) { | ||||
132 | my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1'; | ||||
133 | $self->_handle_encoding_line( "=encoding $encoding" ); | ||||
134 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
135 | |||||
136 | my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/; | ||||
137 | |||||
138 | $self->whine( | ||||
139 | $self->{'line_count'}, | ||||
140 | "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" | ||||
141 | ); | ||||
142 | } | ||||
143 | |||||
144 | DEBUG > 5 and print "# Parsing line: [$line]\n"; | ||||
145 | |||||
146 | if(!$self->{'in_pod'}) { | ||||
147 | if($line =~ m/^=([a-zA-Z]+)/s) { | ||||
148 | if($1 eq 'cut') { | ||||
149 | $self->scream( | ||||
150 | $self->{'line_count'}, | ||||
151 | "=cut found outside a pod block. Skipping to next block." | ||||
152 | ); | ||||
153 | |||||
154 | ## Before there were errata sections in the world, it was | ||||
155 | ## least-pessimal to abort processing the file. But now we can | ||||
156 | ## just barrel on thru (but still not start a pod block). | ||||
157 | #splice @_; | ||||
158 | #push @_, undef; | ||||
159 | |||||
160 | next; | ||||
161 | } else { | ||||
162 | $self->{'in_pod'} = $self->{'start_of_pod_block'} | ||||
163 | = $self->{'last_was_blank'} = 1; | ||||
164 | # And fall thru to the pod-mode block further down | ||||
165 | } | ||||
166 | } else { | ||||
167 | DEBUG > 5 and print "# It's a code-line.\n"; | ||||
168 | $code_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
169 | if $code_handler; | ||||
170 | # Note: this may cause code to be processed out of order relative | ||||
171 | # to pods, but in order relative to cuts. | ||||
172 | |||||
173 | # Note also that we haven't yet applied the transcoding to $line | ||||
174 | # by time we call $code_handler! | ||||
175 | |||||
176 | if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { | ||||
177 | # That RE is from perlsyn, section "Plain Old Comments (Not!)", | ||||
178 | #$fname = $2 if defined $2; | ||||
179 | #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; | ||||
180 | DEBUG > 1 and print "# Setting nextline to $1\n"; | ||||
181 | $self->{'line_count'} = $1 - 1; | ||||
182 | } | ||||
183 | |||||
184 | next; | ||||
185 | } | ||||
186 | } | ||||
187 | |||||
188 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||||
189 | # Else we're in pod mode: | ||||
190 | |||||
191 | # Apply any necessary transcoding: | ||||
192 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
193 | |||||
194 | # HERE WE CATCH =encoding EARLY! | ||||
195 | if( $line =~ m/^=encoding\s+\S+\s*$/s ) { | ||||
196 | next if $self->parse_characters; # Ignore this line | ||||
197 | $line = $self->_handle_encoding_line( $line ); | ||||
198 | } | ||||
199 | |||||
200 | if($line =~ m/^=cut/s) { | ||||
201 | # here ends the pod block, and therefore the previous pod para | ||||
202 | DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; | ||||
203 | $self->{'in_pod'} = 0; | ||||
204 | # ++$self->{'pod_para_count'}; | ||||
205 | $self->_ponder_paragraph_buffer(); | ||||
206 | # by now it's safe to consider the previous paragraph as done. | ||||
207 | $cut_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
208 | if $cut_handler; | ||||
209 | |||||
210 | # TODO: add to docs: Note: this may cause cuts to be processed out | ||||
211 | # of order relative to pods, but in order relative to code. | ||||
212 | |||||
213 | } elsif($line =~ m/^(\s*)$/s) { # it's a blank line | ||||
214 | if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line | ||||
215 | $wl_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
216 | if $wl_handler; | ||||
217 | } | ||||
218 | |||||
219 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
220 | DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; | ||||
221 | push @{$paras->[-1]}, $line; | ||||
222 | } # otherwise it's not interesting | ||||
223 | |||||
224 | if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { | ||||
225 | DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; | ||||
226 | } | ||||
227 | |||||
228 | $self->{'last_was_blank'} = 1; | ||||
229 | |||||
230 | } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... | ||||
231 | |||||
232 | if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { | ||||
233 | # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS | ||||
234 | my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; | ||||
235 | # Note that in "=head1 foo", the WS is lost. | ||||
236 | # Example: ['=head1', {'start_line' => 123}, ' foo'] | ||||
237 | |||||
238 | ++$self->{'pod_para_count'}; | ||||
239 | |||||
240 | $self->_ponder_paragraph_buffer(); | ||||
241 | # by now it's safe to consider the previous paragraph as done. | ||||
242 | |||||
243 | push @$paras, $new; # the new incipient paragraph | ||||
244 | DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; | ||||
245 | |||||
246 | } elsif($line =~ m/^\s/s) { | ||||
247 | |||||
248 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
249 | DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; | ||||
250 | push @{$paras->[-1]}, $line; | ||||
251 | } else { | ||||
252 | ++$self->{'pod_para_count'}; | ||||
253 | $self->_ponder_paragraph_buffer(); | ||||
254 | # by now it's safe to consider the previous paragraph as done. | ||||
255 | DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; | ||||
256 | push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; | ||||
257 | } | ||||
258 | } else { | ||||
259 | ++$self->{'pod_para_count'}; | ||||
260 | $self->_ponder_paragraph_buffer(); | ||||
261 | # by now it's safe to consider the previous paragraph as done. | ||||
262 | push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; | ||||
263 | DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; | ||||
264 | } | ||||
265 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
266 | |||||
267 | } else { | ||||
268 | # It's a non-blank line /continuing/ the current para | ||||
269 | if(@$paras) { | ||||
270 | DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; | ||||
271 | push @{$paras->[-1]}, $line; | ||||
272 | } else { | ||||
273 | # Unexpected case! | ||||
274 | die "Continuing a paragraph but \@\$paras is empty?"; | ||||
275 | } | ||||
276 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
277 | } | ||||
278 | |||||
279 | } # ends the big while loop | ||||
280 | |||||
281 | DEBUG > 1 and print(pretty(@$paras), "\n"); | ||||
282 | return $self; | ||||
283 | } | ||||
284 | |||||
285 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
286 | |||||
287 | sub _handle_encoding_line { | ||||
288 | my($self, $line) = @_; | ||||
289 | |||||
290 | return if $self->parse_characters; | ||||
291 | |||||
292 | # The point of this routine is to set $self->{'_transcoder'} as indicated. | ||||
293 | |||||
294 | return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; | ||||
295 | DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; | ||||
296 | |||||
297 | my $e = $1; | ||||
298 | my $orig = $e; | ||||
299 | push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; | ||||
300 | |||||
301 | my $enc_error; | ||||
302 | |||||
303 | # Cf. perldoc Encode and perldoc Encode::Supported | ||||
304 | |||||
305 | require Pod::Simple::Transcode; | ||||
306 | |||||
307 | if( $self->{'encoding'} ) { | ||||
308 | my $norm_current = $self->{'encoding'}; | ||||
309 | my $norm_e = $e; | ||||
310 | foreach my $that ($norm_current, $norm_e) { | ||||
311 | $that = lc($that); | ||||
312 | $that =~ s/[-_]//g; | ||||
313 | } | ||||
314 | if($norm_current eq $norm_e) { | ||||
315 | DEBUG > 1 and print "The '=encoding $orig' line is ", | ||||
316 | "redundant. ($norm_current eq $norm_e). Ignoring.\n"; | ||||
317 | $enc_error = ''; | ||||
318 | # But that doesn't necessarily mean that the earlier one went okay | ||||
319 | } else { | ||||
320 | $enc_error = "Encoding is already set to " . $self->{'encoding'}; | ||||
321 | DEBUG > 1 and print $enc_error; | ||||
322 | } | ||||
323 | } elsif ( | ||||
324 | # OK, let's turn on the encoding | ||||
325 | do { | ||||
326 | DEBUG > 1 and print " Setting encoding to $e\n"; | ||||
327 | $self->{'encoding'} = $e; | ||||
328 | 1; | ||||
329 | } | ||||
330 | and $e eq 'HACKRAW' | ||||
331 | ) { | ||||
332 | DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; | ||||
333 | |||||
334 | } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { | ||||
335 | |||||
336 | die($enc_error = "WHAT? _transcoder is already set?!") | ||||
337 | if $self->{'_transcoder'}; # should never happen | ||||
338 | require Pod::Simple::Transcode; | ||||
339 | $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); | ||||
340 | eval { | ||||
341 | my @x = ('', "abc", "123"); | ||||
342 | $self->{'_transcoder'}->(@x); | ||||
343 | }; | ||||
344 | $@ && die( $enc_error = | ||||
345 | "Really unexpected error setting up encoding $e: $@\nAborting" | ||||
346 | ); | ||||
347 | $self->{'detected_encoding'} = $e; | ||||
348 | |||||
349 | } else { | ||||
350 | my @supported = Pod::Simple::Transcode::->all_encodings; | ||||
351 | |||||
352 | # Note unsupported, and complain | ||||
353 | DEBUG and print " Encoding [$e] is unsupported.", | ||||
354 | "\nSupporteds: @supported\n"; | ||||
355 | my $suggestion = ''; | ||||
356 | |||||
357 | # Look for a near match: | ||||
358 | my $norm = lc($e); | ||||
359 | $norm =~ tr[-_][]d; | ||||
360 | my $n; | ||||
361 | foreach my $enc (@supported) { | ||||
362 | $n = lc($enc); | ||||
363 | $n =~ tr[-_][]d; | ||||
364 | next unless $n eq $norm; | ||||
365 | $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; | ||||
366 | last; | ||||
367 | } | ||||
368 | my $encmodver = Pod::Simple::Transcode::->encmodver; | ||||
369 | $enc_error = join '' => | ||||
370 | "This document probably does not appear as it should, because its ", | ||||
371 | "\"=encoding $e\" line calls for an unsupported encoding.", | ||||
372 | $suggestion, " [$encmodver\'s supported encodings are: @supported]" | ||||
373 | ; | ||||
374 | |||||
375 | $self->scream( $self->{'line_count'}, $enc_error ); | ||||
376 | } | ||||
377 | push @{ $self->{'encoding_command_statuses'} }, $enc_error; | ||||
378 | if (defined($self->{'_processed_encoding'})) { | ||||
379 | # Should never happen | ||||
380 | die "Nested processed encoding."; | ||||
381 | } | ||||
382 | $self->{'_processed_encoding'} = $orig; | ||||
383 | |||||
384 | return $line; | ||||
385 | } | ||||
386 | |||||
387 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
388 | |||||
389 | sub _handle_encoding_second_level { | ||||
390 | # By time this is called, the encoding (if well formed) will already | ||||
391 | # have been acted one. | ||||
392 | my($self, $para) = @_; | ||||
393 | my @x = @$para; | ||||
394 | my $content = join ' ', splice @x, 2; | ||||
395 | $content =~ s/^\s+//s; | ||||
396 | $content =~ s/\s+$//s; | ||||
397 | |||||
398 | DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; | ||||
399 | |||||
400 | if (defined($self->{'_processed_encoding'})) { | ||||
401 | #if($content ne $self->{'_processed_encoding'}) { | ||||
402 | # Could it happen? | ||||
403 | #} | ||||
404 | delete $self->{'_processed_encoding'}; | ||||
405 | # It's already been handled. Check for errors. | ||||
406 | if(! $self->{'encoding_command_statuses'} ) { | ||||
407 | DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; | ||||
408 | } elsif( $self->{'encoding_command_statuses'}[-1] ) { | ||||
409 | $self->whine( $para->[1]{'start_line'}, | ||||
410 | sprintf "Couldn't do %s: %s", | ||||
411 | $self->{'encoding_command_reqs' }[-1], | ||||
412 | $self->{'encoding_command_statuses'}[-1], | ||||
413 | ); | ||||
414 | } else { | ||||
415 | DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; | ||||
416 | } | ||||
417 | |||||
418 | } else { | ||||
419 | # Otherwise it's a syntax error | ||||
420 | $self->whine( $para->[1]{'start_line'}, | ||||
421 | "Invalid =encoding syntax: $content" | ||||
422 | ); | ||||
423 | } | ||||
424 | |||||
425 | return; | ||||
426 | } | ||||
427 | |||||
428 | #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` | ||||
429 | |||||
430 | { | ||||
431 | 2 | 1µs | my $m = -321; # magic line number | ||
432 | |||||
433 | sub _gen_errata { | ||||
434 | my $self = $_[0]; | ||||
435 | # Return 0 or more fake-o paragraphs explaining the accumulated | ||||
436 | # errors on this document. | ||||
437 | |||||
438 | return() unless $self->{'errata'} and keys %{$self->{'errata'}}; | ||||
439 | |||||
440 | my @out; | ||||
441 | |||||
442 | foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { | ||||
443 | push @out, | ||||
444 | ['=item', {'start_line' => $m}, "Around line $line:"], | ||||
445 | map( ['~Para', {'start_line' => $m, '~cooked' => 1}, | ||||
446 | #['~Top', {'start_line' => $m}, | ||||
447 | $_ | ||||
448 | #] | ||||
449 | ], | ||||
450 | @{$self->{'errata'}{$line}} | ||||
451 | ) | ||||
452 | ; | ||||
453 | } | ||||
454 | |||||
455 | # TODO: report of unknown entities? unrenderable characters? | ||||
456 | |||||
457 | unshift @out, | ||||
458 | ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], | ||||
459 | ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, | ||||
460 | "Hey! ", | ||||
461 | ['B', {}, | ||||
462 | 'The above document had some coding errors, which are explained below:' | ||||
463 | ] | ||||
464 | ], | ||||
465 | ['=over', {'start_line' => $m, 'errata' => 1}, ''], | ||||
466 | ; | ||||
467 | |||||
468 | push @out, | ||||
469 | ['=back', {'start_line' => $m, 'errata' => 1}, ''], | ||||
470 | ; | ||||
471 | |||||
472 | DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; | ||||
473 | |||||
474 | return @out; | ||||
475 | } | ||||
476 | |||||
477 | } | ||||
478 | |||||
479 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
480 | |||||
481 | ############################################################################## | ||||
482 | ## | ||||
483 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
484 | ## | ||||
485 | ## HERE IT BECOMES REALLY SCARY | ||||
486 | ## | ||||
487 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
488 | ## | ||||
489 | ############################################################################## | ||||
490 | |||||
491 | sub _ponder_paragraph_buffer { | ||||
492 | |||||
493 | # Para-token types as found in the buffer. | ||||
494 | # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, | ||||
495 | # =over, =back, =item | ||||
496 | # and the null =pod (to be complained about if over one line) | ||||
497 | # | ||||
498 | # "~data" paragraphs are something we generate at this level, depending on | ||||
499 | # a currently open =over region | ||||
500 | |||||
501 | # Events fired: Begin and end for: | ||||
502 | # directivename (like head1 .. head4), item, extend, | ||||
503 | # for (from =begin...=end, =for), | ||||
504 | # over-bullet, over-number, over-text, over-block, | ||||
505 | # item-bullet, item-number, item-text, | ||||
506 | # Document, | ||||
507 | # Data, Para, Verbatim | ||||
508 | # B, C, longdirname (TODO -- wha?), etc. for all directives | ||||
509 | # | ||||
510 | |||||
511 | my $self = $_[0]; | ||||
512 | my $paras; | ||||
513 | return unless @{$paras = $self->{'paras'}}; | ||||
514 | my $curr_open = ($self->{'curr_open'} ||= []); | ||||
515 | |||||
516 | my $scratch; | ||||
517 | |||||
518 | DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; | ||||
519 | |||||
520 | # We have something in our buffer. So apparently the document has started. | ||||
521 | unless($self->{'doc_has_started'}) { | ||||
522 | $self->{'doc_has_started'} = 1; | ||||
523 | |||||
524 | my $starting_contentless; | ||||
525 | $starting_contentless = | ||||
526 | ( | ||||
527 | !@$curr_open | ||||
528 | and @$paras and ! grep $_->[0] ne '~end', @$paras | ||||
529 | # i.e., if the paras is all ~ends | ||||
530 | ) | ||||
531 | ; | ||||
532 | DEBUG and print "# Starting ", | ||||
533 | $starting_contentless ? 'contentless' : 'contentful', | ||||
534 | " document\n" | ||||
535 | ; | ||||
536 | |||||
537 | $self->_handle_element_start( | ||||
538 | ($scratch = 'Document'), | ||||
539 | { | ||||
540 | 'start_line' => $paras->[0][1]{'start_line'}, | ||||
541 | $starting_contentless ? ( 'contentless' => 1 ) : (), | ||||
542 | }, | ||||
543 | ); | ||||
544 | } | ||||
545 | |||||
546 | my($para, $para_type); | ||||
547 | while(@$paras) { | ||||
548 | last if @$paras == 1 and | ||||
549 | ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' | ||||
550 | or $paras->[0][0] eq '=item' ) | ||||
551 | ; | ||||
552 | # Those're the three kinds of paragraphs that require lookahead. | ||||
553 | # Actually, an "=item Foo" inside an <over type=text> region | ||||
554 | # and any =item inside an <over type=block> region (rare) | ||||
555 | # don't require any lookahead, but all others (bullets | ||||
556 | # and numbers) do. | ||||
557 | |||||
558 | # TODO: whinge about many kinds of directives in non-resolving =for regions? | ||||
559 | # TODO: many? like what? =head1 etc? | ||||
560 | |||||
561 | $para = shift @$paras; | ||||
562 | $para_type = $para->[0]; | ||||
563 | |||||
564 | DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", | ||||
565 | $self->_dump_curr_open(), ")\n"; | ||||
566 | |||||
567 | if($para_type eq '=for') { | ||||
568 | next if $self->_ponder_for($para,$curr_open,$paras); | ||||
569 | |||||
570 | } elsif($para_type eq '=begin') { | ||||
571 | next if $self->_ponder_begin($para,$curr_open,$paras); | ||||
572 | |||||
573 | } elsif($para_type eq '=end') { | ||||
574 | next if $self->_ponder_end($para,$curr_open,$paras); | ||||
575 | |||||
576 | } elsif($para_type eq '~end') { # The virtual end-document signal | ||||
577 | next if $self->_ponder_doc_end($para,$curr_open,$paras); | ||||
578 | } | ||||
579 | |||||
580 | |||||
581 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
582 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
583 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
584 | DEBUG > 1 and | ||||
585 | print "Skipping $para_type paragraph because in ignore mode.\n"; | ||||
586 | next; | ||||
587 | } | ||||
588 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
589 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
590 | |||||
591 | if($para_type eq '=pod') { | ||||
592 | $self->_ponder_pod($para,$curr_open,$paras); | ||||
593 | |||||
594 | } elsif($para_type eq '=over') { | ||||
595 | next if $self->_ponder_over($para,$curr_open,$paras); | ||||
596 | |||||
597 | } elsif($para_type eq '=back') { | ||||
598 | next if $self->_ponder_back($para,$curr_open,$paras); | ||||
599 | |||||
600 | } else { | ||||
601 | |||||
602 | # All non-magical codes!!! | ||||
603 | |||||
604 | # Here we start using $para_type for our own twisted purposes, to | ||||
605 | # mean how it should get treated, not as what the element name | ||||
606 | # should be. | ||||
607 | |||||
608 | DEBUG > 1 and print "Pondering non-magical $para_type\n"; | ||||
609 | |||||
610 | my $i; | ||||
611 | |||||
612 | # Enforce some =headN discipline | ||||
613 | if($para_type =~ m/^=head\d$/s | ||||
614 | and ! $self->{'accept_heads_anywhere'} | ||||
615 | and @$curr_open | ||||
616 | and $curr_open->[-1][0] eq '=over' | ||||
617 | ) { | ||||
618 | DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; | ||||
619 | $self->whine( | ||||
620 | $para->[1]{'start_line'}, | ||||
621 | "You forgot a '=back' before '$para_type'" | ||||
622 | ); | ||||
623 | unshift @$paras, ['=back', {}, ''], $para; # close the =over | ||||
624 | next; | ||||
625 | } | ||||
626 | |||||
627 | |||||
628 | if($para_type eq '=item') { | ||||
629 | |||||
630 | my $over; | ||||
631 | unless(@$curr_open and | ||||
632 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
633 | $self->whine( | ||||
634 | $para->[1]{'start_line'}, | ||||
635 | "'=item' outside of any '=over'" | ||||
636 | ); | ||||
637 | unshift @$paras, | ||||
638 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
639 | $para | ||||
640 | ; | ||||
641 | next; | ||||
642 | } | ||||
643 | |||||
644 | |||||
645 | my $over_type = $over->[1]{'~type'}; | ||||
646 | |||||
647 | if(!$over_type) { | ||||
648 | # Shouldn't happen1 | ||||
649 | die "Typeless over in stack, starting at line " | ||||
650 | . $over->[1]{'start_line'}; | ||||
651 | |||||
652 | } elsif($over_type eq 'block') { | ||||
653 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
654 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
655 | $self->whine( | ||||
656 | $curr_open->[-1][1]{'start_line'}, | ||||
657 | "You can't have =items (as at line " | ||||
658 | . $para->[1]{'start_line'} | ||||
659 | . ") unless the first thing after the =over is an =item" | ||||
660 | ); | ||||
661 | } | ||||
662 | # Just turn it into a paragraph and reconsider it | ||||
663 | $para->[0] = '~Para'; | ||||
664 | unshift @$paras, $para; | ||||
665 | next; | ||||
666 | |||||
667 | } elsif($over_type eq 'text') { | ||||
668 | my $item_type = $self->_get_item_type($para); | ||||
669 | # That kills the content of the item if it's a number or bullet. | ||||
670 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
671 | |||||
672 | if($item_type eq 'text') { | ||||
673 | # Nothing special needs doing for 'text' | ||||
674 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
675 | $self->whine( | ||||
676 | $para->[1]{'start_line'}, | ||||
677 | "Expected text after =item, not a $item_type" | ||||
678 | ); | ||||
679 | # Undo our clobbering: | ||||
680 | push @$para, $para->[1]{'~orig_content'}; | ||||
681 | delete $para->[1]{'number'}; | ||||
682 | # Only a PROPER item-number element is allowed | ||||
683 | # to have a number attribute. | ||||
684 | } else { | ||||
685 | die "Unhandled item type $item_type"; # should never happen | ||||
686 | } | ||||
687 | |||||
688 | # =item-text thingies don't need any assimilation, it seems. | ||||
689 | |||||
690 | } elsif($over_type eq 'number') { | ||||
691 | my $item_type = $self->_get_item_type($para); | ||||
692 | # That kills the content of the item if it's a number or bullet. | ||||
693 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
694 | |||||
695 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
696 | |||||
697 | if($item_type eq 'bullet') { | ||||
698 | # Hm, it's not numeric. Correct for this. | ||||
699 | $para->[1]{'number'} = $expected_value; | ||||
700 | $self->whine( | ||||
701 | $para->[1]{'start_line'}, | ||||
702 | "Expected '=item $expected_value'" | ||||
703 | ); | ||||
704 | push @$para, $para->[1]{'~orig_content'}; | ||||
705 | # restore the bullet, blocking the assimilation of next para | ||||
706 | |||||
707 | } elsif($item_type eq 'text') { | ||||
708 | # Hm, it's not numeric. Correct for this. | ||||
709 | $para->[1]{'number'} = $expected_value; | ||||
710 | $self->whine( | ||||
711 | $para->[1]{'start_line'}, | ||||
712 | "Expected '=item $expected_value'" | ||||
713 | ); | ||||
714 | # Text content will still be there and will block next ~Para | ||||
715 | |||||
716 | } elsif($item_type ne 'number') { | ||||
717 | die "Unknown item type $item_type"; # should never happen | ||||
718 | |||||
719 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
720 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | ||||
721 | |||||
722 | } else { | ||||
723 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | ||||
724 | " instead of the expected value of $expected_value\n"; | ||||
725 | $self->whine( | ||||
726 | $para->[1]{'start_line'}, | ||||
727 | "You have '=item " . $para->[1]{'number'} . | ||||
728 | "' instead of the expected '=item $expected_value'" | ||||
729 | ); | ||||
730 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
731 | } | ||||
732 | |||||
733 | if(@$para == 2) { | ||||
734 | # For the cases where we /didn't/ push to @$para | ||||
735 | if($paras->[0][0] eq '~Para') { | ||||
736 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
737 | push @$para, splice @{shift @$paras},2; | ||||
738 | } else { | ||||
739 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
740 | push @$para, ''; # Just so it's not contentless | ||||
741 | } | ||||
742 | } | ||||
743 | |||||
744 | |||||
745 | } elsif($over_type eq 'bullet') { | ||||
746 | my $item_type = $self->_get_item_type($para); | ||||
747 | # That kills the content of the item if it's a number or bullet. | ||||
748 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
749 | |||||
750 | if($item_type eq 'bullet') { | ||||
751 | # as expected! | ||||
752 | |||||
753 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
754 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
755 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
756 | } | ||||
757 | |||||
758 | } elsif($item_type eq 'number') { | ||||
759 | $self->whine( | ||||
760 | $para->[1]{'start_line'}, | ||||
761 | "Expected '=item *'" | ||||
762 | ); | ||||
763 | push @$para, $para->[1]{'~orig_content'}; | ||||
764 | # and block assimilation of the next paragraph | ||||
765 | delete $para->[1]{'number'}; | ||||
766 | # Only a PROPER item-number element is allowed | ||||
767 | # to have a number attribute. | ||||
768 | } elsif($item_type eq 'text') { | ||||
769 | $self->whine( | ||||
770 | $para->[1]{'start_line'}, | ||||
771 | "Expected '=item *'" | ||||
772 | ); | ||||
773 | # But doesn't need processing. But it'll block assimilation | ||||
774 | # of the next para. | ||||
775 | } else { | ||||
776 | die "Unhandled item type $item_type"; # should never happen | ||||
777 | } | ||||
778 | |||||
779 | if(@$para == 2) { | ||||
780 | # For the cases where we /didn't/ push to @$para | ||||
781 | if($paras->[0][0] eq '~Para') { | ||||
782 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
783 | push @$para, splice @{shift @$paras},2; | ||||
784 | } else { | ||||
785 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
786 | push @$para, ''; # Just so it's not contentless | ||||
787 | } | ||||
788 | } | ||||
789 | |||||
790 | } else { | ||||
791 | die "Unhandled =over type \"$over_type\"?"; | ||||
792 | # Shouldn't happen! | ||||
793 | } | ||||
794 | |||||
795 | $para_type = 'Plain'; | ||||
796 | $para->[0] .= '-' . $over_type; | ||||
797 | # Whew. Now fall thru and process it. | ||||
798 | |||||
799 | |||||
800 | } elsif($para_type eq '=extend') { | ||||
801 | # Well, might as well implement it here. | ||||
802 | $self->_ponder_extend($para); | ||||
803 | next; # and skip | ||||
804 | } elsif($para_type eq '=encoding') { | ||||
805 | # Not actually acted on here, but we catch errors here. | ||||
806 | $self->_handle_encoding_second_level($para); | ||||
807 | next unless $self->keep_encoding_directive; | ||||
808 | $para_type = 'Plain'; | ||||
809 | } elsif($para_type eq '~Verbatim') { | ||||
810 | $para->[0] = 'Verbatim'; | ||||
811 | $para_type = '?Verbatim'; | ||||
812 | } elsif($para_type eq '~Para') { | ||||
813 | $para->[0] = 'Para'; | ||||
814 | $para_type = '?Plain'; | ||||
815 | } elsif($para_type eq 'Data') { | ||||
816 | $para->[0] = 'Data'; | ||||
817 | $para_type = '?Data'; | ||||
818 | } elsif( $para_type =~ s/^=//s | ||||
819 | and defined( $para_type = $self->{'accept_directives'}{$para_type} ) | ||||
820 | ) { | ||||
821 | DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; | ||||
822 | } else { | ||||
823 | # An unknown directive! | ||||
824 | DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", | ||||
825 | $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) | ||||
826 | ; | ||||
827 | $self->whine( | ||||
828 | $para->[1]{'start_line'}, | ||||
829 | "Unknown directive: $para->[0]" | ||||
830 | ); | ||||
831 | |||||
832 | # And maybe treat it as text instead of just letting it go? | ||||
833 | next; | ||||
834 | } | ||||
835 | |||||
836 | if($para_type =~ s/^\?//s) { | ||||
837 | if(! @$curr_open) { # usual case | ||||
838 | DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; | ||||
839 | } else { | ||||
840 | my @fors = grep $_->[0] eq '=for', @$curr_open; | ||||
841 | DEBUG > 1 and print "Containing fors: ", | ||||
842 | join(',', map $_->[1]{'target'}, @fors), "\n"; | ||||
843 | |||||
844 | if(! @fors) { | ||||
845 | DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; | ||||
846 | |||||
847 | #} elsif(grep $_->[1]{'~resolve'}, @fors) { | ||||
848 | #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { | ||||
849 | } elsif( $fors[-1][1]{'~resolve'} ) { | ||||
850 | # Look to the immediately containing for | ||||
851 | |||||
852 | if($para_type eq 'Data') { | ||||
853 | DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
854 | $para->[0] = 'Para'; | ||||
855 | $para_type = 'Plain'; | ||||
856 | } else { | ||||
857 | DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
858 | } | ||||
859 | } else { | ||||
860 | DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; | ||||
861 | $para->[0] = $para_type = 'Data'; | ||||
862 | } | ||||
863 | } | ||||
864 | } | ||||
865 | |||||
866 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
867 | if($para_type eq 'Plain') { | ||||
868 | $self->_ponder_Plain($para); | ||||
869 | } elsif($para_type eq 'Verbatim') { | ||||
870 | $self->_ponder_Verbatim($para); | ||||
871 | } elsif($para_type eq 'Data') { | ||||
872 | $self->_ponder_Data($para); | ||||
873 | } else { | ||||
874 | die "\$para type is $para_type -- how did that happen?"; | ||||
875 | # Shouldn't happen. | ||||
876 | } | ||||
877 | |||||
878 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
879 | $para->[0] =~ s/^[~=]//s; | ||||
880 | |||||
881 | DEBUG and print "\n", pretty($para), "\n"; | ||||
882 | |||||
883 | # traverse the treelet (which might well be just one string scalar) | ||||
884 | $self->{'content_seen'} ||= 1; | ||||
885 | $self->_traverse_treelet_bit(@$para); | ||||
886 | } | ||||
887 | } | ||||
888 | |||||
889 | return; | ||||
890 | } | ||||
891 | |||||
892 | ########################################################################### | ||||
893 | # The sub-ponderers... | ||||
894 | |||||
- - | |||||
897 | sub _ponder_for { | ||||
898 | my ($self,$para,$curr_open,$paras) = @_; | ||||
899 | |||||
900 | # Fake it out as a begin/end | ||||
901 | my $target; | ||||
902 | |||||
903 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
904 | DEBUG > 1 and print "Ignoring ignorable =for\n"; | ||||
905 | return 1; | ||||
906 | } | ||||
907 | |||||
908 | for(my $i = 2; $i < @$para; ++$i) { | ||||
909 | if($para->[$i] =~ s/^\s*(\S+)\s*//s) { | ||||
910 | $target = $1; | ||||
911 | last; | ||||
912 | } | ||||
913 | } | ||||
914 | unless(defined $target) { | ||||
915 | $self->whine( | ||||
916 | $para->[1]{'start_line'}, | ||||
917 | "=for without a target?" | ||||
918 | ); | ||||
919 | return 1; | ||||
920 | } | ||||
921 | DEBUG > 1 and | ||||
922 | print "Faking out a =for $target as a =begin $target / =end $target\n"; | ||||
923 | |||||
924 | $para->[0] = 'Data'; | ||||
925 | |||||
926 | unshift @$paras, | ||||
927 | ['=begin', | ||||
928 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
929 | $target, | ||||
930 | ], | ||||
931 | $para, | ||||
932 | ['=end', | ||||
933 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
934 | $target, | ||||
935 | ], | ||||
936 | ; | ||||
937 | |||||
938 | return 1; | ||||
939 | } | ||||
940 | |||||
941 | sub _ponder_begin { | ||||
942 | my ($self,$para,$curr_open,$paras) = @_; | ||||
943 | my $content = join ' ', splice @$para, 2; | ||||
944 | $content =~ s/^\s+//s; | ||||
945 | $content =~ s/\s+$//s; | ||||
946 | unless(length($content)) { | ||||
947 | $self->whine( | ||||
948 | $para->[1]{'start_line'}, | ||||
949 | "=begin without a target?" | ||||
950 | ); | ||||
951 | DEBUG and print "Ignoring targetless =begin\n"; | ||||
952 | return 1; | ||||
953 | } | ||||
954 | |||||
955 | my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; | ||||
956 | $para->[1]{'title'} = $title if ($title); | ||||
957 | $para->[1]{'target'} = $target; # without any ':' | ||||
958 | $content = $target; # strip off the title | ||||
959 | |||||
960 | $content =~ s/^:!/!:/s; | ||||
961 | my $neg; # whether this is a negation-match | ||||
962 | $neg = 1 if $content =~ s/^!//s; | ||||
963 | my $to_resolve; # whether to process formatting codes | ||||
964 | $to_resolve = 1 if $content =~ s/^://s; | ||||
965 | |||||
966 | my $dont_ignore; # whether this target matches us | ||||
967 | |||||
968 | foreach my $target_name ( | ||||
969 | split(',', $content, -1), | ||||
970 | $neg ? () : '*' | ||||
971 | ) { | ||||
972 | DEBUG > 2 and | ||||
973 | print " Considering whether =begin $content matches $target_name\n"; | ||||
974 | next unless $self->{'accept_targets'}{$target_name}; | ||||
975 | |||||
976 | DEBUG > 2 and | ||||
977 | print " It DOES match the acceptable target $target_name!\n"; | ||||
978 | $to_resolve = 1 | ||||
979 | if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; | ||||
980 | $dont_ignore = 1; | ||||
981 | $para->[1]{'target_matching'} = $target_name; | ||||
982 | last; # stop looking at other target names | ||||
983 | } | ||||
984 | |||||
985 | if($neg) { | ||||
986 | if( $dont_ignore ) { | ||||
987 | $dont_ignore = ''; | ||||
988 | delete $para->[1]{'target_matching'}; | ||||
989 | DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; | ||||
990 | } else { | ||||
991 | $dont_ignore = 1; | ||||
992 | $para->[1]{'target_matching'} = '!'; | ||||
993 | DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; | ||||
994 | } | ||||
995 | } | ||||
996 | |||||
997 | $para->[0] = '=for'; # Just what we happen to call these, internally | ||||
998 | $para->[1]{'~really'} ||= '=begin'; | ||||
999 | $para->[1]{'~ignore'} = (! $dont_ignore) || 0; | ||||
1000 | $para->[1]{'~resolve'} = $to_resolve || 0; | ||||
1001 | |||||
1002 | DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', | ||||
1003 | "ignore contents of this region\n"; | ||||
1004 | DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", | ||||
1005 | ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; | ||||
1006 | DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; | ||||
1007 | |||||
1008 | push @$curr_open, $para; | ||||
1009 | if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
1010 | DEBUG > 1 and print "Ignoring ignorable =begin\n"; | ||||
1011 | } else { | ||||
1012 | $self->{'content_seen'} ||= 1; | ||||
1013 | $self->_handle_element_start((my $scratch='for'), $para->[1]); | ||||
1014 | } | ||||
1015 | |||||
1016 | return 1; | ||||
1017 | } | ||||
1018 | |||||
1019 | sub _ponder_end { | ||||
1020 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1021 | my $content = join ' ', splice @$para, 2; | ||||
1022 | $content =~ s/^\s+//s; | ||||
1023 | $content =~ s/\s+$//s; | ||||
1024 | DEBUG and print "Ogling '=end $content' directive\n"; | ||||
1025 | |||||
1026 | unless(length($content)) { | ||||
1027 | $self->whine( | ||||
1028 | $para->[1]{'start_line'}, | ||||
1029 | "'=end' without a target?" . ( | ||||
1030 | ( @$curr_open and $curr_open->[-1][0] eq '=for' ) | ||||
1031 | ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) | ||||
1032 | : '' | ||||
1033 | ) | ||||
1034 | ); | ||||
1035 | DEBUG and print "Ignoring targetless =end\n"; | ||||
1036 | return 1; | ||||
1037 | } | ||||
1038 | |||||
1039 | unless($content =~ m/^\S+$/) { # i.e., unless it's one word | ||||
1040 | $self->whine( | ||||
1041 | $para->[1]{'start_line'}, | ||||
1042 | "'=end $content' is invalid. (Stack: " | ||||
1043 | . $self->_dump_curr_open() . ')' | ||||
1044 | ); | ||||
1045 | DEBUG and print "Ignoring mistargetted =end $content\n"; | ||||
1046 | return 1; | ||||
1047 | } | ||||
1048 | |||||
1049 | unless(@$curr_open and $curr_open->[-1][0] eq '=for') { | ||||
1050 | $self->whine( | ||||
1051 | $para->[1]{'start_line'}, | ||||
1052 | "=end $content without matching =begin. (Stack: " | ||||
1053 | . $self->_dump_curr_open() . ')' | ||||
1054 | ); | ||||
1055 | DEBUG and print "Ignoring mistargetted =end $content\n"; | ||||
1056 | return 1; | ||||
1057 | } | ||||
1058 | |||||
1059 | unless($content eq $curr_open->[-1][1]{'target'}) { | ||||
1060 | $self->whine( | ||||
1061 | $para->[1]{'start_line'}, | ||||
1062 | "=end $content doesn't match =begin " | ||||
1063 | . $curr_open->[-1][1]{'target'} | ||||
1064 | . ". (Stack: " | ||||
1065 | . $self->_dump_curr_open() . ')' | ||||
1066 | ); | ||||
1067 | DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; | ||||
1068 | return 1; | ||||
1069 | } | ||||
1070 | |||||
1071 | # Else it's okay to close... | ||||
1072 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
1073 | DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; | ||||
1074 | # And that may be because of this to-be-closed =for region, or some | ||||
1075 | # other one, but it doesn't matter. | ||||
1076 | } else { | ||||
1077 | $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; | ||||
1078 | # what's that for? | ||||
1079 | |||||
1080 | $self->{'content_seen'} ||= 1; | ||||
1081 | $self->_handle_element_end( my $scratch = 'for', $para->[1]); | ||||
1082 | } | ||||
1083 | DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; | ||||
1084 | pop @$curr_open; | ||||
1085 | |||||
1086 | return 1; | ||||
1087 | } | ||||
1088 | |||||
1089 | sub _ponder_doc_end { | ||||
1090 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1091 | if(@$curr_open) { # Deal with things left open | ||||
1092 | DEBUG and print "Stack is nonempty at end-document: (", | ||||
1093 | $self->_dump_curr_open(), ")\n"; | ||||
1094 | |||||
1095 | DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; | ||||
1096 | unshift @$paras, $self->_closers_for_all_curr_open; | ||||
1097 | # Make sure there is exactly one ~end in the parastack, at the end: | ||||
1098 | @$paras = grep $_->[0] ne '~end', @$paras; | ||||
1099 | push @$paras, $para, $para; | ||||
1100 | # We need two -- once for the next cycle where we | ||||
1101 | # generate errata, and then another to be at the end | ||||
1102 | # when that loop back around to process the errata. | ||||
1103 | return 1; | ||||
1104 | |||||
1105 | } else { | ||||
1106 | DEBUG and print "Okay, stack is empty now.\n"; | ||||
1107 | } | ||||
1108 | |||||
1109 | # Try generating errata section, if applicable | ||||
1110 | unless($self->{'~tried_gen_errata'}) { | ||||
1111 | $self->{'~tried_gen_errata'} = 1; | ||||
1112 | my @extras = $self->_gen_errata(); | ||||
1113 | if(@extras) { | ||||
1114 | unshift @$paras, @extras; | ||||
1115 | DEBUG and print "Generated errata... relooping...\n"; | ||||
1116 | return 1; # I.e., loop around again to process these fake-o paragraphs | ||||
1117 | } | ||||
1118 | } | ||||
1119 | |||||
1120 | splice @$paras; # Well, that's that for this paragraph buffer. | ||||
1121 | DEBUG and print "Throwing end-document event.\n"; | ||||
1122 | |||||
1123 | $self->_handle_element_end( my $scratch = 'Document' ); | ||||
1124 | return 1; # Hasta la byebye | ||||
1125 | } | ||||
1126 | |||||
1127 | sub _ponder_pod { | ||||
1128 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1129 | $self->whine( | ||||
1130 | $para->[1]{'start_line'}, | ||||
1131 | "=pod directives shouldn't be over one line long! Ignoring all " | ||||
1132 | . (@$para - 2) . " lines of content" | ||||
1133 | ) if @$para > 3; | ||||
1134 | |||||
1135 | # Content ignored unless 'pod_handler' is set | ||||
1136 | if (my $pod_handler = $self->{'pod_handler'}) { | ||||
1137 | my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; | ||||
1138 | $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output | ||||
1139 | $pod_handler->($line, $line_num, $self); | ||||
1140 | } | ||||
1141 | |||||
1142 | # The surrounding methods set content_seen, so let us remain consistent. | ||||
1143 | # I do not know why it was not here before -- should it not be here? | ||||
1144 | # $self->{'content_seen'} ||= 1; | ||||
1145 | |||||
1146 | return; | ||||
1147 | } | ||||
1148 | |||||
1149 | sub _ponder_over { | ||||
1150 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1151 | return 1 unless @$paras; | ||||
1152 | my $list_type; | ||||
1153 | |||||
1154 | if($paras->[0][0] eq '=item') { # most common case | ||||
1155 | $list_type = $self->_get_initial_item_type($paras->[0]); | ||||
1156 | |||||
1157 | } elsif($paras->[0][0] eq '=back') { | ||||
1158 | # Ignore empty lists by default | ||||
1159 | if ($self->{'parse_empty_lists'}) { | ||||
1160 | $list_type = 'empty'; | ||||
1161 | } else { | ||||
1162 | shift @$paras; | ||||
1163 | return 1; | ||||
1164 | } | ||||
1165 | } elsif($paras->[0][0] eq '~end') { | ||||
1166 | $self->whine( | ||||
1167 | $para->[1]{'start_line'}, | ||||
1168 | "=over is the last thing in the document?!" | ||||
1169 | ); | ||||
1170 | return 1; # But feh, ignore it. | ||||
1171 | } else { | ||||
1172 | $list_type = 'block'; | ||||
1173 | } | ||||
1174 | $para->[1]{'~type'} = $list_type; | ||||
1175 | push @$curr_open, $para; | ||||
1176 | # yes, we reuse the paragraph as a stack item | ||||
1177 | |||||
1178 | my $content = join ' ', splice @$para, 2; | ||||
1179 | my $overness; | ||||
1180 | if($content =~ m/^\s*$/s) { | ||||
1181 | $para->[1]{'indent'} = 4; | ||||
1182 | } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { | ||||
1183 | 2 | 3.26ms | 2 | 16µs | # spent 14µs (11+2) within Pod::Simple::BlackBox::BEGIN@1183 which was called:
# once (11µs+2µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1183 # spent 14µs making 1 call to Pod::Simple::BlackBox::BEGIN@1183
# spent 2µs making 1 call to integer::unimport |
1184 | $para->[1]{'indent'} = $1; | ||||
1185 | if($1 == 0) { | ||||
1186 | $self->whine( | ||||
1187 | $para->[1]{'start_line'}, | ||||
1188 | "Can't have a 0 in =over $content" | ||||
1189 | ); | ||||
1190 | $para->[1]{'indent'} = 4; | ||||
1191 | } | ||||
1192 | } else { | ||||
1193 | $self->whine( | ||||
1194 | $para->[1]{'start_line'}, | ||||
1195 | "=over should be: '=over' or '=over positive_number'" | ||||
1196 | ); | ||||
1197 | $para->[1]{'indent'} = 4; | ||||
1198 | } | ||||
1199 | DEBUG > 1 and print "=over found of type $list_type\n"; | ||||
1200 | |||||
1201 | $self->{'content_seen'} ||= 1; | ||||
1202 | $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); | ||||
1203 | |||||
1204 | return; | ||||
1205 | } | ||||
1206 | |||||
1207 | sub _ponder_back { | ||||
1208 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1209 | # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? | ||||
1210 | |||||
1211 | my $content = join ' ', splice @$para, 2; | ||||
1212 | if($content =~ m/\S/) { | ||||
1213 | $self->whine( | ||||
1214 | $para->[1]{'start_line'}, | ||||
1215 | "=back doesn't take any parameters, but you said =back $content" | ||||
1216 | ); | ||||
1217 | } | ||||
1218 | |||||
1219 | if(@$curr_open and $curr_open->[-1][0] eq '=over') { | ||||
1220 | DEBUG > 1 and print "=back happily closes matching =over\n"; | ||||
1221 | # Expected case: we're closing the most recently opened thing | ||||
1222 | #my $over = pop @$curr_open; | ||||
1223 | $self->{'content_seen'} ||= 1; | ||||
1224 | $self->_handle_element_end( my $scratch = | ||||
1225 | 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] | ||||
1226 | ); | ||||
1227 | } else { | ||||
1228 | DEBUG > 1 and print "=back found without a matching =over. Stack: (", | ||||
1229 | join(', ', map $_->[0], @$curr_open), ").\n"; | ||||
1230 | $self->whine( | ||||
1231 | $para->[1]{'start_line'}, | ||||
1232 | '=back without =over' | ||||
1233 | ); | ||||
1234 | return 1; # and ignore it | ||||
1235 | } | ||||
1236 | } | ||||
1237 | |||||
1238 | sub _ponder_item { | ||||
1239 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1240 | my $over; | ||||
1241 | unless(@$curr_open and | ||||
1242 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
1243 | $self->whine( | ||||
1244 | $para->[1]{'start_line'}, | ||||
1245 | "'=item' outside of any '=over'" | ||||
1246 | ); | ||||
1247 | unshift @$paras, | ||||
1248 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
1249 | $para | ||||
1250 | ; | ||||
1251 | return 1; | ||||
1252 | } | ||||
1253 | |||||
1254 | |||||
1255 | my $over_type = $over->[1]{'~type'}; | ||||
1256 | |||||
1257 | if(!$over_type) { | ||||
1258 | # Shouldn't happen1 | ||||
1259 | die "Typeless over in stack, starting at line " | ||||
1260 | . $over->[1]{'start_line'}; | ||||
1261 | |||||
1262 | } elsif($over_type eq 'block') { | ||||
1263 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
1264 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
1265 | $self->whine( | ||||
1266 | $curr_open->[-1][1]{'start_line'}, | ||||
1267 | "You can't have =items (as at line " | ||||
1268 | . $para->[1]{'start_line'} | ||||
1269 | . ") unless the first thing after the =over is an =item" | ||||
1270 | ); | ||||
1271 | } | ||||
1272 | # Just turn it into a paragraph and reconsider it | ||||
1273 | $para->[0] = '~Para'; | ||||
1274 | unshift @$paras, $para; | ||||
1275 | return 1; | ||||
1276 | |||||
1277 | } elsif($over_type eq 'text') { | ||||
1278 | my $item_type = $self->_get_item_type($para); | ||||
1279 | # That kills the content of the item if it's a number or bullet. | ||||
1280 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1281 | |||||
1282 | if($item_type eq 'text') { | ||||
1283 | # Nothing special needs doing for 'text' | ||||
1284 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
1285 | $self->whine( | ||||
1286 | $para->[1]{'start_line'}, | ||||
1287 | "Expected text after =item, not a $item_type" | ||||
1288 | ); | ||||
1289 | # Undo our clobbering: | ||||
1290 | push @$para, $para->[1]{'~orig_content'}; | ||||
1291 | delete $para->[1]{'number'}; | ||||
1292 | # Only a PROPER item-number element is allowed | ||||
1293 | # to have a number attribute. | ||||
1294 | } else { | ||||
1295 | die "Unhandled item type $item_type"; # should never happen | ||||
1296 | } | ||||
1297 | |||||
1298 | # =item-text thingies don't need any assimilation, it seems. | ||||
1299 | |||||
1300 | } elsif($over_type eq 'number') { | ||||
1301 | my $item_type = $self->_get_item_type($para); | ||||
1302 | # That kills the content of the item if it's a number or bullet. | ||||
1303 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1304 | |||||
1305 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
1306 | |||||
1307 | if($item_type eq 'bullet') { | ||||
1308 | # Hm, it's not numeric. Correct for this. | ||||
1309 | $para->[1]{'number'} = $expected_value; | ||||
1310 | $self->whine( | ||||
1311 | $para->[1]{'start_line'}, | ||||
1312 | "Expected '=item $expected_value'" | ||||
1313 | ); | ||||
1314 | push @$para, $para->[1]{'~orig_content'}; | ||||
1315 | # restore the bullet, blocking the assimilation of next para | ||||
1316 | |||||
1317 | } elsif($item_type eq 'text') { | ||||
1318 | # Hm, it's not numeric. Correct for this. | ||||
1319 | $para->[1]{'number'} = $expected_value; | ||||
1320 | $self->whine( | ||||
1321 | $para->[1]{'start_line'}, | ||||
1322 | "Expected '=item $expected_value'" | ||||
1323 | ); | ||||
1324 | # Text content will still be there and will block next ~Para | ||||
1325 | |||||
1326 | } elsif($item_type ne 'number') { | ||||
1327 | die "Unknown item type $item_type"; # should never happen | ||||
1328 | |||||
1329 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
1330 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | ||||
1331 | |||||
1332 | } else { | ||||
1333 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | ||||
1334 | " instead of the expected value of $expected_value\n"; | ||||
1335 | $self->whine( | ||||
1336 | $para->[1]{'start_line'}, | ||||
1337 | "You have '=item " . $para->[1]{'number'} . | ||||
1338 | "' instead of the expected '=item $expected_value'" | ||||
1339 | ); | ||||
1340 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
1341 | } | ||||
1342 | |||||
1343 | if(@$para == 2) { | ||||
1344 | # For the cases where we /didn't/ push to @$para | ||||
1345 | if($paras->[0][0] eq '~Para') { | ||||
1346 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
1347 | push @$para, splice @{shift @$paras},2; | ||||
1348 | } else { | ||||
1349 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
1350 | push @$para, ''; # Just so it's not contentless | ||||
1351 | } | ||||
1352 | } | ||||
1353 | |||||
1354 | |||||
1355 | } elsif($over_type eq 'bullet') { | ||||
1356 | my $item_type = $self->_get_item_type($para); | ||||
1357 | # That kills the content of the item if it's a number or bullet. | ||||
1358 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1359 | |||||
1360 | if($item_type eq 'bullet') { | ||||
1361 | # as expected! | ||||
1362 | |||||
1363 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
1364 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
1365 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
1366 | } | ||||
1367 | |||||
1368 | } elsif($item_type eq 'number') { | ||||
1369 | $self->whine( | ||||
1370 | $para->[1]{'start_line'}, | ||||
1371 | "Expected '=item *'" | ||||
1372 | ); | ||||
1373 | push @$para, $para->[1]{'~orig_content'}; | ||||
1374 | # and block assimilation of the next paragraph | ||||
1375 | delete $para->[1]{'number'}; | ||||
1376 | # Only a PROPER item-number element is allowed | ||||
1377 | # to have a number attribute. | ||||
1378 | } elsif($item_type eq 'text') { | ||||
1379 | $self->whine( | ||||
1380 | $para->[1]{'start_line'}, | ||||
1381 | "Expected '=item *'" | ||||
1382 | ); | ||||
1383 | # But doesn't need processing. But it'll block assimilation | ||||
1384 | # of the next para. | ||||
1385 | } else { | ||||
1386 | die "Unhandled item type $item_type"; # should never happen | ||||
1387 | } | ||||
1388 | |||||
1389 | if(@$para == 2) { | ||||
1390 | # For the cases where we /didn't/ push to @$para | ||||
1391 | if($paras->[0][0] eq '~Para') { | ||||
1392 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
1393 | push @$para, splice @{shift @$paras},2; | ||||
1394 | } else { | ||||
1395 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
1396 | push @$para, ''; # Just so it's not contentless | ||||
1397 | } | ||||
1398 | } | ||||
1399 | |||||
1400 | } else { | ||||
1401 | die "Unhandled =over type \"$over_type\"?"; | ||||
1402 | # Shouldn't happen! | ||||
1403 | } | ||||
1404 | $para->[0] .= '-' . $over_type; | ||||
1405 | |||||
1406 | return; | ||||
1407 | } | ||||
1408 | |||||
1409 | sub _ponder_Plain { | ||||
1410 | my ($self,$para) = @_; | ||||
1411 | DEBUG and print " giving plain treatment...\n"; | ||||
1412 | unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) | ||||
1413 | or $para->[1]{'~cooked'} | ||||
1414 | ) { | ||||
1415 | push @$para, | ||||
1416 | @{$self->_make_treelet( | ||||
1417 | join("\n", splice(@$para, 2)), | ||||
1418 | $para->[1]{'start_line'} | ||||
1419 | )}; | ||||
1420 | } | ||||
1421 | # Empty paragraphs don't need a treelet for any reason I can see. | ||||
1422 | # And precooked paragraphs already have a treelet. | ||||
1423 | return; | ||||
1424 | } | ||||
1425 | |||||
1426 | sub _ponder_Verbatim { | ||||
1427 | my ($self,$para) = @_; | ||||
1428 | DEBUG and print " giving verbatim treatment...\n"; | ||||
1429 | |||||
1430 | $para->[1]{'xml:space'} = 'preserve'; | ||||
1431 | |||||
1432 | my $indent = $self->strip_verbatim_indent; | ||||
1433 | if ($indent && ref $indent eq 'CODE') { | ||||
1434 | my @shifted = (shift @{$para}, shift @{$para}); | ||||
1435 | $indent = $indent->($para); | ||||
1436 | unshift @{$para}, @shifted; | ||||
1437 | } | ||||
1438 | |||||
1439 | for(my $i = 2; $i < @$para; $i++) { | ||||
1440 | foreach my $line ($para->[$i]) { # just for aliasing | ||||
1441 | # Strip indentation. | ||||
1442 | $line =~ s/^\Q$indent// if $indent | ||||
1443 | && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); | ||||
1444 | while( $line =~ | ||||
1445 | # Sort of adapted from Text::Tabs -- yes, it's hardwired in that | ||||
1446 | # tabs are at every EIGHTH column. For portability, it has to be | ||||
1447 | # one setting everywhere, and 8th wins. | ||||
1448 | s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e | ||||
1449 | ) {} | ||||
1450 | |||||
1451 | # TODO: whinge about (or otherwise treat) unindented or overlong lines | ||||
1452 | |||||
1453 | } | ||||
1454 | } | ||||
1455 | |||||
1456 | # Now the VerbatimFormatted hoodoo... | ||||
1457 | if( $self->{'accept_codes'} and | ||||
1458 | $self->{'accept_codes'}{'VerbatimFormatted'} | ||||
1459 | ) { | ||||
1460 | while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } | ||||
1461 | # Kill any number of terminal newlines | ||||
1462 | $self->_verbatim_format($para); | ||||
1463 | } elsif ($self->{'codes_in_verbatim'}) { | ||||
1464 | push @$para, | ||||
1465 | @{$self->_make_treelet( | ||||
1466 | join("\n", splice(@$para, 2)), | ||||
1467 | $para->[1]{'start_line'}, $para->[1]{'xml:space'} | ||||
1468 | )}; | ||||
1469 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
1470 | } else { | ||||
1471 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
1472 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
1473 | } | ||||
1474 | return; | ||||
1475 | } | ||||
1476 | |||||
1477 | sub _ponder_Data { | ||||
1478 | my ($self,$para) = @_; | ||||
1479 | DEBUG and print " giving data treatment...\n"; | ||||
1480 | $para->[1]{'xml:space'} = 'preserve'; | ||||
1481 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
1482 | return; | ||||
1483 | } | ||||
1484 | |||||
- - | |||||
1488 | ########################################################################### | ||||
1489 | |||||
1490 | sub _traverse_treelet_bit { # for use only by the routine above | ||||
1491 | my($self, $name) = splice @_,0,2; | ||||
1492 | |||||
1493 | my $scratch; | ||||
1494 | $self->_handle_element_start(($scratch=$name), shift @_); | ||||
1495 | |||||
1496 | while (@_) { | ||||
1497 | my $x = shift; | ||||
1498 | if (ref($x)) { | ||||
1499 | &_traverse_treelet_bit($self, @$x); | ||||
1500 | } else { | ||||
1501 | $x .= shift while @_ && !ref($_[0]); | ||||
1502 | $self->_handle_text($x); | ||||
1503 | } | ||||
1504 | } | ||||
1505 | |||||
1506 | $self->_handle_element_end($scratch=$name); | ||||
1507 | return; | ||||
1508 | } | ||||
1509 | |||||
1510 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1511 | |||||
1512 | sub _closers_for_all_curr_open { | ||||
1513 | my $self = $_[0]; | ||||
1514 | my @closers; | ||||
1515 | foreach my $still_open (@{ $self->{'curr_open'} || return }) { | ||||
1516 | my @copy = @$still_open; | ||||
1517 | $copy[1] = {%{ $copy[1] }}; | ||||
1518 | #$copy[1]{'start_line'} = -1; | ||||
1519 | if($copy[0] eq '=for') { | ||||
1520 | $copy[0] = '=end'; | ||||
1521 | } elsif($copy[0] eq '=over') { | ||||
1522 | $self->whine( | ||||
1523 | $still_open->[1]{start_line} , | ||||
1524 | "=over without closing =back" | ||||
1525 | ); | ||||
1526 | |||||
1527 | $copy[0] = '=back'; | ||||
1528 | } else { | ||||
1529 | die "I don't know how to auto-close an open $copy[0] region"; | ||||
1530 | } | ||||
1531 | |||||
1532 | unless( @copy > 2 ) { | ||||
1533 | push @copy, $copy[1]{'target'}; | ||||
1534 | $copy[-1] = '' unless defined $copy[-1]; | ||||
1535 | # since =over's don't have targets | ||||
1536 | } | ||||
1537 | |||||
1538 | $copy[1]{'fake-closer'} = 1; | ||||
1539 | |||||
1540 | DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; | ||||
1541 | unshift @closers, \@copy; | ||||
1542 | } | ||||
1543 | return @closers; | ||||
1544 | } | ||||
1545 | |||||
1546 | #-------------------------------------------------------------------------- | ||||
1547 | |||||
1548 | sub _verbatim_format { | ||||
1549 | my($it, $p) = @_; | ||||
1550 | |||||
1551 | my $formatting; | ||||
1552 | |||||
1553 | for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines | ||||
1554 | DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; | ||||
1555 | $p->[$i] .= "\n"; | ||||
1556 | # Unlike with simple Verbatim blocks, we don't end up just doing | ||||
1557 | # a join("\n", ...) on the contents, so we have to append a | ||||
1558 | # newline to ever line, and then nix the last one later. | ||||
1559 | } | ||||
1560 | |||||
1561 | if( DEBUG > 4 ) { | ||||
1562 | print "<<\n"; | ||||
1563 | for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines | ||||
1564 | print "_verbatim_format $i: $p->[$i]"; | ||||
1565 | } | ||||
1566 | print ">>\n"; | ||||
1567 | } | ||||
1568 | |||||
1569 | for(my $i = $#$p; $i > 2; $i--) { | ||||
1570 | # work backwards over the lines, except the first (#2) | ||||
1571 | |||||
1572 | #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s | ||||
1573 | # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; | ||||
1574 | # look at a formatty line preceding a nonformatty one | ||||
1575 | DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; | ||||
1576 | if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { | ||||
1577 | DEBUG > 5 and print " It's a formatty line. ", | ||||
1578 | "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; | ||||
1579 | |||||
1580 | if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { | ||||
1581 | DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; | ||||
1582 | next; | ||||
1583 | } else { | ||||
1584 | DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; | ||||
1585 | } | ||||
1586 | } else { | ||||
1587 | DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; | ||||
1588 | next; | ||||
1589 | } | ||||
1590 | |||||
1591 | # A formatty line has to have #: in the first two columns, and uses | ||||
1592 | # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. | ||||
1593 | # Example: | ||||
1594 | # What do you want? i like pie. [or whatever] | ||||
1595 | # #:^^^^^^^^^^^^^^^^^ ///////////// | ||||
1596 | |||||
1597 | |||||
1598 | DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; | ||||
1599 | |||||
1600 | $formatting = ' ' . $1; | ||||
1601 | $formatting =~ s/\s+$//s; # nix trailing whitespace | ||||
1602 | unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op | ||||
1603 | splice @$p,$i,1; # remove this line | ||||
1604 | $i--; # don't consider next line | ||||
1605 | next; | ||||
1606 | } | ||||
1607 | |||||
1608 | if( length($formatting) >= length($p->[$i-1]) ) { | ||||
1609 | $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; | ||||
1610 | } else { | ||||
1611 | $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); | ||||
1612 | } | ||||
1613 | # Make $formatting and the previous line be exactly the same length, | ||||
1614 | # with $formatting having a " " as the last character. | ||||
1615 | |||||
1616 | DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; | ||||
1617 | |||||
1618 | |||||
1619 | my @new_line; | ||||
1620 | while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { | ||||
1621 | #print "Format matches $1\n"; | ||||
1622 | |||||
1623 | if($2) { | ||||
1624 | #print "SKIPPING <$2>\n"; | ||||
1625 | push @new_line, | ||||
1626 | substr($p->[$i-1], pos($formatting)-length($1), length($1)); | ||||
1627 | } else { | ||||
1628 | #print "SNARING $+\n"; | ||||
1629 | push @new_line, [ | ||||
1630 | ( | ||||
1631 | $3 ? 'VerbatimB' : | ||||
1632 | $4 ? 'VerbatimI' : | ||||
1633 | $5 ? 'VerbatimBI' : die("Should never get called") | ||||
1634 | ), {}, | ||||
1635 | substr($p->[$i-1], pos($formatting)-length($1), length($1)) | ||||
1636 | ]; | ||||
1637 | #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; | ||||
1638 | } | ||||
1639 | } | ||||
1640 | my @nixed = | ||||
1641 | splice @$p, $i-1, 2, @new_line; # replace myself and the next line | ||||
1642 | DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; | ||||
1643 | |||||
1644 | DEBUG > 6 and print "New version of the above line is these tokens (", | ||||
1645 | scalar(@new_line), "):", | ||||
1646 | map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; | ||||
1647 | $i--; # So the next line we scrutinize is the line before the one | ||||
1648 | # that we just went and formatted | ||||
1649 | } | ||||
1650 | |||||
1651 | $p->[0] = 'VerbatimFormatted'; | ||||
1652 | |||||
1653 | # Collapse adjacent text nodes, just for kicks. | ||||
1654 | for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last | ||||
1655 | if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { | ||||
1656 | DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; | ||||
1657 | $p->[$i] .= splice @$p, $i+1, 1; # merge | ||||
1658 | --$i; # and back up | ||||
1659 | } | ||||
1660 | } | ||||
1661 | |||||
1662 | # Now look for the last text token, and remove the terminal newline | ||||
1663 | for( my $i = $#$p; $i >= 2; $i-- ) { | ||||
1664 | # work backwards over the tokens, even the first | ||||
1665 | if( !ref($p->[$i]) ) { | ||||
1666 | if($p->[$i] =~ s/\n$//s) { | ||||
1667 | DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; | ||||
1668 | } else { | ||||
1669 | DEBUG > 5 and print | ||||
1670 | "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; | ||||
1671 | } | ||||
1672 | last; # we only want the next one | ||||
1673 | } | ||||
1674 | } | ||||
1675 | |||||
1676 | return; | ||||
1677 | } | ||||
1678 | |||||
1679 | |||||
1680 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1681 | |||||
1682 | |||||
1683 | sub _treelet_from_formatting_codes { | ||||
1684 | # Given a paragraph, returns a treelet. Full of scary tokenizing code. | ||||
1685 | # Like [ '~Top', {'start_line' => $start_line}, | ||||
1686 | # "I like ", | ||||
1687 | # [ 'B', {}, "pie" ], | ||||
1688 | # "!" | ||||
1689 | # ] | ||||
1690 | |||||
1691 | my($self, $para, $start_line, $preserve_space) = @_; | ||||
1692 | |||||
1693 | my $treelet = ['~Top', {'start_line' => $start_line},]; | ||||
1694 | |||||
1695 | unless ($preserve_space || $self->{'preserve_whitespace'}) { | ||||
1696 | $para =~ s/\s+/ /g; # collapse and trim all whitespace first. | ||||
1697 | $para =~ s/ $//; | ||||
1698 | $para =~ s/^ //; | ||||
1699 | } | ||||
1700 | |||||
1701 | # Only apparent problem the above code is that N<< >> turns into | ||||
1702 | # N<< >>. But then, word wrapping does that too! So don't do that! | ||||
1703 | |||||
1704 | my @stack; | ||||
1705 | my @lineage = ($treelet); | ||||
1706 | my $raw = ''; # raw content of L<> fcode before splitting/processing | ||||
1707 | # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed | ||||
1708 | # into just 1 ' '. Is this the regex's doing or 'raw's? | ||||
1709 | my $inL = 0; | ||||
1710 | |||||
1711 | DEBUG > 4 and print "Paragraph:\n$para\n\n"; | ||||
1712 | |||||
1713 | # Here begins our frightening tokenizer RE. The following regex matches | ||||
1714 | # text in four main parts: | ||||
1715 | # | ||||
1716 | # * Start-codes. The first alternative matches C< or C<<, the latter | ||||
1717 | # followed by some whitespace. $1 will hold the entire start code | ||||
1718 | # (including any space following a multiple-angle-bracket delimiter), | ||||
1719 | # and $2 will hold only the additional brackets past the first in a | ||||
1720 | # multiple-bracket delimiter. length($2) + 1 will be the number of | ||||
1721 | # closing brackets we have to find. | ||||
1722 | # | ||||
1723 | # * Closing brackets. Match some amount of whitespace followed by | ||||
1724 | # multiple close brackets. The logic to see if this closes anything | ||||
1725 | # is down below. Note that in order to parse C<< >> correctly, we | ||||
1726 | # have to use look-behind (?<=\s\s), since the match of the starting | ||||
1727 | # code will have consumed the whitespace. | ||||
1728 | # | ||||
1729 | # * A single closing bracket, to close a simple code like C<>. | ||||
1730 | # | ||||
1731 | # * Something that isn't a start or end code. We have to be careful | ||||
1732 | # about accepting whitespace, since perlpodspec says that any whitespace | ||||
1733 | # before a multiple-bracket closing delimiter should be ignored. | ||||
1734 | # | ||||
1735 | while($para =~ | ||||
1736 | m/\G | ||||
1737 | (?: | ||||
1738 | # Match starting codes, including the whitespace following a | ||||
1739 | # multiple-delimiter start code. $1 gets the whole start code and | ||||
1740 | # $2 gets all but one of the <s in the multiple-bracket case. | ||||
1741 | ([A-Z]<(?:(<+)\s+)?) | ||||
1742 | | | ||||
1743 | # Match multiple-bracket end codes. $3 gets the whitespace that | ||||
1744 | # should be discarded before an end bracket but kept in other cases | ||||
1745 | # and $4 gets the end brackets themselves. | ||||
1746 | (\s+|(?<=\s\s))(>{2,}) | ||||
1747 | | | ||||
1748 | (\s?>) # $5: simple end-codes | ||||
1749 | | | ||||
1750 | ( # $6: stuff containing no start-codes or end-codes | ||||
1751 | (?: | ||||
1752 | [^A-Z\s>] | ||||
1753 | | | ||||
1754 | (?: | ||||
1755 | [A-Z](?!<) | ||||
1756 | ) | ||||
1757 | | | ||||
1758 | # whitespace is ok, but we don't want to eat the whitespace before | ||||
1759 | # a multiple-bracket end code. | ||||
1760 | # NOTE: we may still have problems with e.g. S<< >> | ||||
1761 | (?: | ||||
1762 | \s(?!\s*>{2,}) | ||||
1763 | ) | ||||
1764 | )+ | ||||
1765 | ) | ||||
1766 | ) | ||||
1767 | /xgo | ||||
1768 | ) { | ||||
1769 | DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; | ||||
1770 | if(defined $1) { | ||||
1771 | if(defined $2) { | ||||
1772 | DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; | ||||
1773 | push @stack, length($2) + 1; | ||||
1774 | # length of the necessary complex end-code string | ||||
1775 | } else { | ||||
1776 | DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; | ||||
1777 | push @stack, 0; # signal that we're looking for simple | ||||
1778 | } | ||||
1779 | push @lineage, [ substr($1,0,1), {}, ]; # new node object | ||||
1780 | push @{ $lineage[-2] }, $lineage[-1]; | ||||
1781 | if ('L' eq substr($1,0,1)) { | ||||
1782 | $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator | ||||
1783 | $inL = 1; | ||||
1784 | } else { | ||||
1785 | $raw .= $1 if $inL; | ||||
1786 | } | ||||
1787 | |||||
1788 | } elsif(defined $4) { | ||||
1789 | DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; | ||||
1790 | # This is where it gets messy... | ||||
1791 | if(! @stack) { | ||||
1792 | # We saw " >>>>" but needed nothing. This is ALL just stuff then. | ||||
1793 | DEBUG > 4 and print " But it's really just stuff.\n"; | ||||
1794 | push @{ $lineage[-1] }, $3, $4; | ||||
1795 | next; | ||||
1796 | } elsif(!$stack[-1]) { | ||||
1797 | # We saw " >>>>" but needed only ">". Back pos up. | ||||
1798 | DEBUG > 4 and print " And that's more than we needed to close simple.\n"; | ||||
1799 | push @{ $lineage[-1] }, $3; # That was a for-real space, too. | ||||
1800 | pos($para) = pos($para) - length($4) + 1; | ||||
1801 | } elsif($stack[-1] == length($4)) { | ||||
1802 | # We found " >>>>", and it was exactly what we needed. Commonest case. | ||||
1803 | DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; | ||||
1804 | } elsif($stack[-1] < length($4)) { | ||||
1805 | # We saw " >>>>" but needed only " >>". Back pos up. | ||||
1806 | DEBUG > 4 and print " And that's more than we needed to close complex.\n"; | ||||
1807 | pos($para) = pos($para) - length($4) + $stack[-1]; | ||||
1808 | } else { | ||||
1809 | # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! | ||||
1810 | DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; | ||||
1811 | push @{ $lineage[-1] }, $3, $4; | ||||
1812 | next; | ||||
1813 | } | ||||
1814 | #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; | ||||
1815 | |||||
1816 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
1817 | # Keep the element from being childless | ||||
1818 | |||||
1819 | pop @stack; | ||||
1820 | pop @lineage; | ||||
1821 | |||||
1822 | unless (@stack) { # not in an L if there are no open fcodes | ||||
1823 | $inL = 0; | ||||
1824 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
1825 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
1826 | } | ||||
1827 | } | ||||
1828 | $raw .= $3.$4 if $inL; | ||||
1829 | |||||
1830 | } elsif(defined $5) { | ||||
1831 | DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n"; | ||||
1832 | |||||
1833 | if(@stack and ! $stack[-1]) { | ||||
1834 | # We're indeed expecting a simple end-code | ||||
1835 | DEBUG > 4 and print " It's indeed an end-code.\n"; | ||||
1836 | |||||
1837 | if(length($5) == 2) { # There was a space there: " >" | ||||
1838 | push @{ $lineage[-1] }, ' '; | ||||
1839 | } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element | ||||
1840 | push @{ $lineage[-1] }, ''; # keep it from being really childless | ||||
1841 | } | ||||
1842 | |||||
1843 | pop @stack; | ||||
1844 | pop @lineage; | ||||
1845 | } else { | ||||
1846 | DEBUG > 4 and print " It's just stuff.\n"; | ||||
1847 | push @{ $lineage[-1] }, $5; | ||||
1848 | } | ||||
1849 | |||||
1850 | unless (@stack) { # not in an L if there are no open fcodes | ||||
1851 | $inL = 0; | ||||
1852 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
1853 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
1854 | } | ||||
1855 | } | ||||
1856 | $raw .= $5 if $inL; | ||||
1857 | |||||
1858 | } elsif(defined $6) { | ||||
1859 | DEBUG > 3 and print "Found stuff \"$6\"\n"; | ||||
1860 | push @{ $lineage[-1] }, $6; | ||||
1861 | $raw .= $6 if $inL; | ||||
1862 | # XXX does not capture multiplace whitespaces -- 'raw' ends up with | ||||
1863 | # at most 1 leading/trailing whitespace, why not all of it? | ||||
1864 | |||||
1865 | } else { | ||||
1866 | # should never ever ever ever happen | ||||
1867 | DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; | ||||
1868 | die "SPORK 512512!"; | ||||
1869 | } | ||||
1870 | } | ||||
1871 | |||||
1872 | if(@stack) { # Uhoh, some sequences weren't closed. | ||||
1873 | my $x= "..."; | ||||
1874 | while(@stack) { | ||||
1875 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
1876 | # Hmmmmm! | ||||
1877 | |||||
1878 | my $code = (pop @lineage)->[0]; | ||||
1879 | my $ender_length = pop @stack; | ||||
1880 | if($ender_length) { | ||||
1881 | --$ender_length; | ||||
1882 | $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); | ||||
1883 | } else { | ||||
1884 | $x = $code . "<$x>"; | ||||
1885 | } | ||||
1886 | } | ||||
1887 | DEBUG > 1 and print "Unterminated $x sequence\n"; | ||||
1888 | $self->whine($start_line, | ||||
1889 | "Unterminated $x sequence", | ||||
1890 | ); | ||||
1891 | } | ||||
1892 | |||||
1893 | return $treelet; | ||||
1894 | } | ||||
1895 | |||||
1896 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1897 | |||||
1898 | sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) | ||||
1899 | return stringify_lol($_[1]); | ||||
1900 | } | ||||
1901 | |||||
1902 | sub stringify_lol { # function: stringify_lol($lol) | ||||
1903 | my $string_form = ''; | ||||
1904 | _stringify_lol( $_[0] => \$string_form ); | ||||
1905 | return $string_form; | ||||
1906 | } | ||||
1907 | |||||
1908 | sub _stringify_lol { # the real recursor | ||||
1909 | my($lol, $to) = @_; | ||||
1910 | for(my $i = 2; $i < @$lol; ++$i) { | ||||
1911 | if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { | ||||
1912 | _stringify_lol( $lol->[$i], $to); # recurse! | ||||
1913 | } else { | ||||
1914 | $$to .= $lol->[$i]; | ||||
1915 | } | ||||
1916 | } | ||||
1917 | return; | ||||
1918 | } | ||||
1919 | |||||
1920 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1921 | |||||
1922 | sub _dump_curr_open { # return a string representation of the stack | ||||
1923 | my $curr_open = $_[0]{'curr_open'}; | ||||
1924 | |||||
1925 | return '[empty]' unless @$curr_open; | ||||
1926 | return join '; ', | ||||
1927 | map {; | ||||
1928 | ($_->[0] eq '=for') | ||||
1929 | ? ( ($_->[1]{'~really'} || '=over') | ||||
1930 | . ' ' . $_->[1]{'target'}) | ||||
1931 | : $_->[0] | ||||
1932 | } | ||||
1933 | @$curr_open | ||||
1934 | ; | ||||
1935 | } | ||||
1936 | |||||
1937 | ########################################################################### | ||||
1938 | 1 | 7µs | my %pretty_form = ( | ||
1939 | "\a" => '\a', # ding! | ||||
1940 | "\b" => '\b', # BS | ||||
1941 | "\e" => '\e', # ESC | ||||
1942 | "\f" => '\f', # FF | ||||
1943 | "\t" => '\t', # tab | ||||
1944 | "\cm" => '\cm', | ||||
1945 | "\cj" => '\cj', | ||||
1946 | "\n" => '\n', # probably overrides one of either \cm or \cj | ||||
1947 | '"' => '\"', | ||||
1948 | '\\' => '\\\\', | ||||
1949 | '$' => '\\$', | ||||
1950 | '@' => '\\@', | ||||
1951 | '%' => '\\%', | ||||
1952 | '#' => '\\#', | ||||
1953 | ); | ||||
1954 | |||||
1955 | sub pretty { # adopted from Class::Classless | ||||
1956 | # Not the most brilliant routine, but passable. | ||||
1957 | # Don't give it a cyclic data structure! | ||||
1958 | my @stuff = @_; # copy | ||||
1959 | my $x; | ||||
1960 | my $out = | ||||
1961 | # join ",\n" . | ||||
1962 | join ", ", | ||||
1963 | map {; | ||||
1964 | if(!defined($_)) { | ||||
1965 | "undef"; | ||||
1966 | } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { | ||||
1967 | $x = "[ " . pretty(@$_) . " ]" ; | ||||
1968 | $x; | ||||
1969 | } elsif(ref($_) eq 'SCALAR') { | ||||
1970 | $x = "\\" . pretty($$_) ; | ||||
1971 | $x; | ||||
1972 | } elsif(ref($_) eq 'HASH') { | ||||
1973 | my $hr = $_; | ||||
1974 | $x = "{" . join(", ", | ||||
1975 | map(pretty($_) . '=>' . pretty($hr->{$_}), | ||||
1976 | sort keys %$hr ) ) . "}" ; | ||||
1977 | $x; | ||||
1978 | } elsif(!length($_)) { q{''} # empty string | ||||
1979 | } elsif( | ||||
1980 | $_ eq '0' # very common case | ||||
1981 | or( | ||||
1982 | m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s | ||||
1983 | and $_ ne '-0' # the strange case that that RE lets thru | ||||
1984 | ) | ||||
1985 | ) { $_; | ||||
1986 | } else { | ||||
1987 | if( chr(65) eq 'A' ) { | ||||
1988 | s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> | ||||
1989 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
1990 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
1991 | } else { | ||||
1992 | # We're in some crazy non-ASCII world! | ||||
1993 | s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> | ||||
1994 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
1995 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
1996 | } | ||||
1997 | qq{"$_"}; | ||||
1998 | } | ||||
1999 | } @stuff; | ||||
2000 | # $out =~ s/\n */ /g if length($out) < 75; | ||||
2001 | return $out; | ||||
2002 | } | ||||
2003 | |||||
2004 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
2005 | |||||
2006 | # A rather unsubtle method of blowing away all the state information | ||||
2007 | # from a parser object so it can be reused. Provided as a utility for | ||||
2008 | # backward compatibility in Pod::Man, etc. but not recommended for | ||||
2009 | # general use. | ||||
2010 | |||||
2011 | sub reinit { | ||||
2012 | my $self = shift; | ||||
2013 | foreach (qw(source_dead source_filename doc_has_started | ||||
2014 | start_of_pod_block content_seen last_was_blank paras curr_open | ||||
2015 | line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen | ||||
2016 | Title)) { | ||||
2017 | |||||
2018 | delete $self->{$_}; | ||||
2019 | } | ||||
2020 | } | ||||
2021 | |||||
2022 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
2023 | 1 | 7µs | 1; | ||
2024 |